From ed6354398ae309d029a9e5b0aedd9a0e1fdc9586 Mon Sep 17 00:00:00 2001 From: panxuefeng Date: Wed, 1 Mar 2023 13:54:10 +0800 Subject: [PATCH] Update to Loongson jdk-11.0.18+10-ls-1 --- LoongArch64-support-11.0.15.patch | 56542 ++++++++++++++++++++++++++-- 1 file changed, 53252 insertions(+), 3290 deletions(-) diff --git a/LoongArch64-support-11.0.15.patch b/LoongArch64-support-11.0.15.patch index 88bd20f..0cc36eb 100644 --- a/LoongArch64-support-11.0.15.patch +++ b/LoongArch64-support-11.0.15.patch @@ -1,11 +1,33 @@ -commit 8ad6215509c85805e7ebc1edfefde04b0fb087d1 -Author: aoqi -Date: Mon Aug 8 21:21:49 2022 +0800 - - init la - +diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk +index 40c7e06f54..142485c05a 100644 +--- a/make/CompileJavaModules.gmk ++++ b/make/CompileJavaModules.gmk +@@ -434,6 +434,7 @@ jdk.internal.vm.ci_ADD_JAVAC_FLAGS += -parameters -Xlint:-exports -XDstringConca + + jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \ ++ --add-exports jdk.internal.vm.ci/jdk.vm.ci.loongarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \ +@@ -441,6 +442,7 @@ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \ ++ --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.loongarch64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \ + --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \ +@@ -460,6 +462,7 @@ jdk.internal.vm.compiler_EXCLUDES += \ + org.graalvm.compiler.api.directives.test \ + org.graalvm.compiler.api.test \ + org.graalvm.compiler.asm.aarch64.test \ ++ org.graalvm.compiler.asm.loongarch64.test \ + org.graalvm.compiler.asm.amd64.test \ + org.graalvm.compiler.asm.sparc.test \ + org.graalvm.compiler.asm.test \ diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4 -index 5a3adf6b04..94f1eba2cc 100644 +index a3e1e00b2c..22f479120b 100644 --- a/make/autoconf/hotspot.m4 +++ b/make/autoconf/hotspot.m4 @@ -34,6 +34,12 @@ DEPRECATED_JVM_FEATURES="trace" @@ -21,7 +43,7 @@ index 5a3adf6b04..94f1eba2cc 100644 ############################################################################### # Check if the specified JVM variant should be built. To be used in shell if # constructs, like this: -@@ -334,6 +340,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], +@@ -337,6 +343,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], HOTSPOT_TARGET_CPU_ARCH=arm fi @@ -48,8 +70,22 @@ index 5a3adf6b04..94f1eba2cc 100644 # Verify that dependencies are met for explicitly set features. if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services']) +@@ -421,10 +447,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES], + JVM_FEATURES_jvmci="" + INCLUDE_JVMCI="false" + else +- # Only enable jvmci on x86_64, sparcv9 and aarch64 ++ # Only enable jvmci on x86_64, sparcv9, aarch64 and loongarch64 + if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \ + test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \ +- test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then ++ test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \ ++ test "x$OPENJDK_TARGET_CPU" = "xloongarch64" ; then + AC_MSG_RESULT([yes]) + JVM_FEATURES_jvmci="jvmci" + INCLUDE_JVMCI="true" diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4 -index 95bdbb2831..9184315c7e 100644 +index f89b22f5fc..a7aa4f4aaa 100644 --- a/make/autoconf/platform.m4 +++ b/make/autoconf/platform.m4 @@ -23,6 +23,12 @@ @@ -65,7 +101,7 @@ index 95bdbb2831..9184315c7e 100644 # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD. # Converts autoconf style CPU name to OpenJDK style, into # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN. -@@ -512,6 +518,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], +@@ -554,6 +560,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER], HOTSPOT_$1_CPU_DEFINE=PPC64 elif test "x$OPENJDK_$1_CPU" = xppc64le; then HOTSPOT_$1_CPU_DEFINE=PPC64 @@ -105,10 +141,10 @@ index fdd2c0ca3d..318191233a 100644 assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp -index 8c23214a85..ce0905740f 100644 +index cebc1e410d..816226c068 100644 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp -@@ -277,18 +277,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { +@@ -260,18 +260,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { __ store(reg, addr); } @@ -1258,10 +1294,10 @@ index 0000000000..67c9a859aa +} diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp new file mode 100644 -index 0000000000..b78f2ab280 +index 0000000000..9f50e46138 --- /dev/null +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp -@@ -0,0 +1,2799 @@ +@@ -0,0 +1,2804 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -1287,8 +1323,8 @@ index 0000000000..b78f2ab280 + * + */ + -+#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP + +#include "asm/register.hpp" +#include "runtime/vm_version.hpp" @@ -2719,7 +2755,6 @@ index 0000000000..b78f2ab280 + // else return -1. + static int is_jlong_mask(jlong x); + -+ // LOONGARCH lui is sign extended, so if you wan't to use imm, you have to use the follow + static int split_low16(int x) { + return (x & 0xffff); + } @@ -2748,6 +2783,12 @@ index 0000000000..b78f2ab280 + return (x & 0xfff); + } + ++ static inline void split_simm32(jlong si32, jint& si12, jint& si20) { ++ si12 = ((jint)(si32 & 0xfff) << 20) >> 20; ++ si32 += (si32 & 0x800) << 1; ++ si20 = si32 >> 12; ++ } ++ + static inline void split_simm38(jlong si38, jint& si18, jint& si20) { + si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14; + si38 += (si38 & 0x20000) << 1; @@ -3200,7 +3241,7 @@ index 0000000000..b78f2ab280 + assert(is_uimm(hint, 15), "not a unsigned 15-bit int"); + + if (os::is_ActiveCoresMP()) -+ andi(R0, R0, 1); ++ andi(R0, R0, 0); + else + emit_int32(insn_I15(dbar_op, hint)); + } @@ -4060,16 +4101,16 @@ index 0000000000..b78f2ab280 + +#include "assembler_loongarch.inline.hpp" + -+#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp new file mode 100644 -index 0000000000..0f4f452bf5 +index 0000000000..616cd9efe2 --- /dev/null +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp @@ -0,0 +1,47 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -4092,8 +4133,8 @@ index 0000000000..0f4f452bf5 + * + */ + -+#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP -+#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP ++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/codeBuffer.hpp" @@ -4113,10 +4154,10 @@ index 0000000000..0f4f452bf5 + emit_int32(x); +} + -+#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP ++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp new file mode 100644 -index 0000000000..558cc1dc6e +index 0000000000..c15344eb39 --- /dev/null +++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp @@ -0,0 +1,73 @@ @@ -4145,8 +4186,8 @@ index 0000000000..558cc1dc6e + * + */ + -+#ifndef CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP ++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP + +#include "memory/allocation.hpp" + @@ -4155,12 +4196,12 @@ index 0000000000..558cc1dc6e + // Returns true if the byte ordering used by Java is different from the native byte ordering + // of the underlying machine. For example, this is true for Intel x86, but false for Solaris + // on Sparc. -+ // we use loongarch, so return true ++ // we use LoongArch, so return true + static inline bool is_Java_byte_ordering_different(){ return true; } + + + // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering -+ // (no special code is needed since loongarch CPUs can access unaligned data) ++ // (no special code is needed since LoongArch CPUs can access unaligned data) + static inline u2 get_native_u2(address p) { return *(u2*)p; } + static inline u4 get_native_u4(address p) { return *(u4*)p; } + static inline u8 get_native_u8(address p) { return *(u8*)p; } @@ -4172,7 +4213,7 @@ index 0000000000..558cc1dc6e + + // Efficient reading and writing of unaligned unsigned data in Java + // byte ordering (i.e. big-endian ordering). Byte-order reversal is -+ // needed since LOONGARCH64EL CPUs use little-endian format. ++ // needed since LoongArch64 CPUs use little-endian format. + static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } + static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } + static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } @@ -4192,7 +4233,7 @@ index 0000000000..558cc1dc6e +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] +#include OS_CPU_HEADER_INLINE(bytes) + -+#endif // CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp new file mode 100644 index 0000000000..c0eeb63962 @@ -4630,13 +4671,13 @@ index 0000000000..1140e44431 +#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp new file mode 100644 -index 0000000000..b613abfa04 +index 0000000000..bd8578c72a --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -4662,7 +4703,7 @@ index 0000000000..b613abfa04 +#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP +#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP + -+// No FPU stack on LOONGARCH ++// No FPU stack on LoongArch +class FpuStackSim; + +#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP @@ -4854,7 +4895,7 @@ index 0000000000..4f0cf05361 +#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp new file mode 100644 -index 0000000000..eb333c61c1 +index 0000000000..3b60899071 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp @@ -0,0 +1,354 @@ @@ -5206,7 +5247,7 @@ index 0000000000..eb333c61c1 + +// JSR 292 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() { -+ return LIR_OprFact::illegalOpr; // Not needed on loongarch64 ++ return LIR_OprFact::illegalOpr; // Not needed on LoongArch64 +} + +bool FrameMap::validate_frame() { @@ -8696,13 +8737,13 @@ index 0000000000..c989e25c3a +#undef __ diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp new file mode 100644 -index 0000000000..723010867e +index 0000000000..72a80f37c4 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp @@ -0,0 +1,1396 @@ +/* + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -9423,7 +9464,7 @@ index 0000000000..723010867e + bool is_oop = is_reference_type(type); + LIR_Opr result = new_register(type); + value.load_item(); -+ assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type"); ++ assert(type == T_INT || is_oop || type == T_LONG , "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xchg(addr, value.result(), result, tmp); + return result; @@ -9432,7 +9473,7 @@ index 0000000000..723010867e +LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) { + LIR_Opr result = new_register(type); + value.load_item(); -+ assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type"); ++ assert(type == T_INT || type == T_LONG , "unexpected type"); + LIR_Opr tmp = new_register(T_INT); + __ xadd(addr, value.result(), result, tmp); + return result; @@ -10412,13 +10453,13 @@ index 0000000000..38ff4c5836 +#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp new file mode 100644 -index 0000000000..28100b0116 +index 0000000000..17ff93a595 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp -@@ -0,0 +1,343 @@ +@@ -0,0 +1,344 @@ +/* + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -10450,6 +10491,7 @@ index 0000000000..28100b0116 +#include "oops/arrayOop.hpp" +#include "oops/markOop.hpp" +#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" +#include "runtime/os.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" @@ -11905,13 +11947,13 @@ index 0000000000..aaa708f71e +} diff --git a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp new file mode 100644 -index 0000000000..f4df7b8958 +index 0000000000..164016e123 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp -@@ -0,0 +1,67 @@ +@@ -0,0 +1,71 @@ +/* + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -11945,6 +11987,8 @@ index 0000000000..f4df7b8958 + +#ifndef COMPILER2 +define_pd_global(bool, BackgroundCompilation, true ); ++define_pd_global(bool, UseTLAB, true ); ++define_pd_global(bool, ResizeTLAB, true ); +define_pd_global(bool, InlineIntrinsics, true ); +define_pd_global(bool, PreferInterpreterNativeStubs, false); +define_pd_global(bool, ProfileTraps, false); @@ -11953,6 +11997,7 @@ index 0000000000..f4df7b8958 +define_pd_global(intx, CompileThreshold, 1500 ); + +define_pd_global(intx, OnStackReplacePercentage, 933 ); ++define_pd_global(intx, FreqInlineSize, 325 ); +define_pd_global(intx, NewSizeThreadIncrease, 4*K ); +define_pd_global(intx, InitialCodeCacheSize, 160*K); +define_pd_global(intx, ReservedCodeCacheSize, 32*M ); @@ -11963,6 +12008,7 @@ index 0000000000..f4df7b8958 +define_pd_global(intx, CodeCacheExpansionSize, 32*K ); +define_pd_global(uintx, CodeCacheMinBlockLength, 1); +define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++define_pd_global(uintx, MetaspaceSize, 12*M ); +define_pd_global(bool, NeverActAsServerClassMachine, true ); +define_pd_global(uint64_t,MaxRAM, 1ULL*G); +define_pd_global(bool, CICompileOSR, true ); @@ -11978,13 +12024,13 @@ index 0000000000..f4df7b8958 +#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp new file mode 100644 -index 0000000000..6d54dd6c06 +index 0000000000..27a4ec5229 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12007,8 +12053,8 @@ index 0000000000..6d54dd6c06 + * + */ + -+#ifndef CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" @@ -12075,16 +12121,16 @@ index 0000000000..6d54dd6c06 +// Ergonomics related flags +define_pd_global(bool, NeverActAsServerClassMachine, false); + -+#endif // CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp new file mode 100644 -index 0000000000..cd27f36bfc +index 0000000000..ec78b942d4 --- /dev/null +++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp -@@ -0,0 +1,34 @@ +@@ -0,0 +1,37 @@ +/* + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12111,14 +12157,17 @@ index 0000000000..cd27f36bfc +#include "opto/compile.hpp" +#include "opto/node.hpp" + -+// processor dependent initialization for loongarch ++// processor dependent initialization for LoongArch ++ ++extern void reg_mask_init(); + +void Compile::pd_compiler2_init() { + guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++ reg_mask_init(); +} diff --git a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp new file mode 100644 -index 0000000000..33ff315261 +index 0000000000..653d95806b --- /dev/null +++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp @@ -0,0 +1,35 @@ @@ -12147,8 +12196,8 @@ index 0000000000..33ff315261 + * + */ + -+#ifndef CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP ++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP + +private: + void pd_initialize() {} @@ -12156,16 +12205,16 @@ index 0000000000..33ff315261 +public: + void flush_bundle(bool start_new_bundle) {} + -+#endif // CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp new file mode 100644 -index 0000000000..ec86955854 +index 0000000000..d063d5d93e --- /dev/null +++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp @@ -0,0 +1,148 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12212,7 +12261,7 @@ index 0000000000..ec86955854 + // That's why we must use the macroassembler to generate a stub. + MacroAssembler _masm(&cbuf); + -+ address base = __ start_a_stub(Compile::MAX_stubs_size); ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); + if (base == NULL) return NULL; // CodeBuffer::expand failed + // static stub relocation stores the instruction address of the call + @@ -12313,7 +12362,7 @@ index 0000000000..ec86955854 +#endif // !PRODUCT diff --git a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp new file mode 100644 -index 0000000000..c97d7ba90a +index 0000000000..54b847a736 --- /dev/null +++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp @@ -0,0 +1,77 @@ @@ -12342,8 +12391,8 @@ index 0000000000..c97d7ba90a + * + */ + -+#ifndef CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP ++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP + +// Inline functions for memory copy and fill. + @@ -12393,10 +12442,10 @@ index 0000000000..c97d7ba90a + (void)memset(to, 0, count); +} + -+#endif //CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP ++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp new file mode 100644 -index 0000000000..343a1b6c12 +index 0000000000..e4a92d1035 --- /dev/null +++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp @@ -0,0 +1,30 @@ @@ -12429,16 +12478,16 @@ index 0000000000..343a1b6c12 +#include "compiler/disassembler.hpp" +#include "depChecker_loongarch.hpp" + -+// Nothing to do on loongarch ++// Nothing to do on LoongArch diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp new file mode 100644 -index 0000000000..26398f7c66 +index 0000000000..29c292a74a --- /dev/null +++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -12461,15 +12510,15 @@ index 0000000000..26398f7c66 + * + */ + -+#ifndef CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP ++#define CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP + -+// Nothing to do on LOONGARCH ++// Nothing to do on LoongArch + -+#endif // CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp new file mode 100644 -index 0000000000..ccd89e8d6d +index 0000000000..04359bc172 --- /dev/null +++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp @@ -0,0 +1,37 @@ @@ -12498,8 +12547,8 @@ index 0000000000..ccd89e8d6d + * + */ + -+#ifndef CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP + + static int pd_instruction_alignment() { + return sizeof(int); @@ -12509,10 +12558,10 @@ index 0000000000..ccd89e8d6d + return "gpr-names=64"; + } + -+#endif // CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp new file mode 100644 -index 0000000000..bfeccffd35 +index 0000000000..9b4f3b88d4 --- /dev/null +++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp @@ -0,0 +1,690 @@ @@ -12911,11 +12960,11 @@ index 0000000000..bfeccffd35 + // Since the interpreter always saves FP if we record where it is then + // we don't have to always save FP on entry and exit to c2 compiled + // code, on entry will be enough. -+#ifdef COMPILER2 ++#ifdef COMPILER2_OR_JVMCI + if (map->update_map()) { + update_map_with_saved_link(map, (intptr_t**) addr_at(java_frame_link_offset)); + } -+#endif /* COMPILER2 */ ++#endif // COMPILER2_OR_JVMCI + return frame(sender_sp, unextended_sp, link(), sender_pc()); +} + @@ -12945,7 +12994,7 @@ index 0000000000..bfeccffd35 +//------------------------------------------------------------------------------ +// frame::adjust_unextended_sp +void frame::adjust_unextended_sp() { -+ // On LOONGARCH, sites calling method handle intrinsics and lambda forms are treated ++ // On LoongArch, sites calling method handle intrinsics and lambda forms are treated + // as any other call site. Therefore, no special action is needed when we are + // returning to any of these call sites. + @@ -13208,7 +13257,7 @@ index 0000000000..bfeccffd35 +#endif diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp new file mode 100644 -index 0000000000..fb3040c458 +index 0000000000..b16389b3a3 --- /dev/null +++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp @@ -0,0 +1,171 @@ @@ -13237,8 +13286,8 @@ index 0000000000..fb3040c458 + * + */ + -+#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP + +#include "runtime/synchronizer.hpp" + @@ -13382,16 +13431,16 @@ index 0000000000..fb3040c458 + + static jint interpreter_frame_expression_stack_direction() { return -1; } + -+#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp new file mode 100644 -index 0000000000..8039789726 +index 0000000000..1ddc038eea --- /dev/null +++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp -@@ -0,0 +1,246 @@ +@@ -0,0 +1,252 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -13414,8 +13463,8 @@ index 0000000000..8039789726 + * + */ + -+#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP -+#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP ++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP + +#include "code/codeCache.hpp" +#include "code/vmreg.inline.hpp" @@ -13534,6 +13583,12 @@ index 0000000000..8039789726 + return (intptr_t*) *(intptr_t **)addr_at(native_frame_link_offset); +} + ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = is_java_frame() ? (intptr_t **)addr_at(java_frame_link_offset) ++ : (intptr_t **)addr_at(native_frame_link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ +inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } + +// Return address: @@ -13634,7 +13689,7 @@ index 0000000000..8039789726 + *((oop*) map->location(V0->as_VMReg())) = obj; +} + -+#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP ++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp new file mode 100644 index 0000000000..2cdf3dddb7 @@ -14907,10 +14962,10 @@ index 0000000000..18a6f92bc2 +#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp new file mode 100644 -index 0000000000..1276316760 +index 0000000000..dc21d001cc --- /dev/null +++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp -@@ -0,0 +1,61 @@ +@@ -0,0 +1,53 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -14936,9 +14991,9 @@ index 0000000000..1276316760 + * + */ + -+#ifndef CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP -+// Size of LOONGARCH Instructions ++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP ++// Size of LoongArch Instructions +const int BytesPerInstWord = 4; + +const int StackAlignmentInBytes = (2*wordSize); @@ -14952,14 +15007,6 @@ index 0000000000..1276316760 +#define SUPPORTS_NATIVE_CX8 + +// FIXME: LA -+// According to the ARMv8 ARM, "Concurrent modification and execution -+// of instructions can lead to the resulting instruction performing -+// any behavior that can be achieved by executing any sequence of -+// instructions that can be executed from the same Exception level, -+// except where the instruction before modification and the -+// instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, or -+// SMC instruction." -+// +// This makes the games we play when patching difficult, so when we +// come across an access that needs patching we deoptimize. There are +// ways we can avoid this, but these would slow down C1-compiled code @@ -14971,13 +15018,13 @@ index 0000000000..1276316760 + +#define THREAD_LOCAL_POLL + -+#endif // CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp new file mode 100644 -index 0000000000..d4c18dfd87 +index 0000000000..e6b758b554 --- /dev/null +++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp -@@ -0,0 +1,132 @@ +@@ -0,0 +1,109 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -15003,8 +15050,8 @@ index 0000000000..d4c18dfd87 + * + */ + -+#ifndef CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP ++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP + +#include "utilities/globalDefinitions.hpp" +#include "utilities/macros.hpp" @@ -15020,14 +15067,7 @@ index 0000000000..d4c18dfd87 +define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast + +define_pd_global(uintx, CodeCacheSegmentSize, 64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment. -+// See 4827828 for this change. There is no globals_core_i486.hpp. I can't -+// assign a different value for C2 without touching a number of files. Use -+// #ifdef to minimize the change as it's late in Mantis. -- FIXME. -+// c1 doesn't have this problem because the fix to 4858033 assures us -+// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns -+// the uep and the vep doesn't get real alignment but just slops on by -+// only assured that the entry instruction meets the 5 byte size requirement. -+define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, CodeEntryAlignment, 16); +define_pd_global(intx, OptoLoopAlignment, 16); +define_pd_global(intx, InlineFrequencyCount, 100); +define_pd_global(intx, InlineSmallCode, 2000); @@ -15084,32 +15124,16 @@ index 0000000000..d4c18dfd87 + product(bool, UseLASX, false, \ + "Use LASX 256-bit vector instructions") \ + \ -+ product(intx, UseSyncLevel, 10000, \ -+ "The sync level on Loongson CPUs" \ -+ "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ -+ "UseSyncLevel == 4000, 101, maybe for GS464V" \ -+ "UseSyncLevel == 3000, 001, maybe for GS464V" \ -+ "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ -+ "UseSyncLevel == 1000, 110, maybe for GS464") \ -+ \ + product(bool, UseBarriersForVolatile, false, \ + "Use memory barriers to implement volatile accesses") \ + \ -+ develop(bool, UseBoundCheckInstruction, false, \ -+ "Use bound check instruction") \ -+ \ -+ product(intx, SetFSFOFN, 999, \ -+ "Set the FS/FO/FN bits in FCSR" \ -+ "999 means FS/FO/FN will not be changed" \ -+ "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ -+ \ + product(bool, UseCRC32, false, \ + "Use CRC32 instructions for CRC32 computation") \ + \ + product(bool, UseActiveCoresMP, false, \ + "Eliminate barriers for single active cpu") + -+#endif // CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp new file mode 100644 index 0000000000..7b97694827 @@ -15258,13 +15282,13 @@ index 0000000000..1ae7e5376c +} diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp new file mode 100644 -index 0000000000..15e45cb350 +index 0000000000..3a180549fc --- /dev/null +++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15287,8 +15311,8 @@ index 0000000000..15e45cb350 + * + */ + -+#ifndef CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP ++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP + +// Interface for updating the instruction cache. Whenever the VM modifies +// code, part of the processor instruction cache potentially has to be flushed. @@ -15302,13 +15326,13 @@ index 0000000000..15e45cb350 + }; +}; + -+#endif // CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp new file mode 100644 -index 0000000000..23abf7b891 +index 0000000000..53a06ba7fd --- /dev/null +++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp -@@ -0,0 +1,276 @@ +@@ -0,0 +1,281 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -15334,8 +15358,8 @@ index 0000000000..23abf7b891 + * + */ + -+#ifndef CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP -+#define CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP ++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP + +#include "asm/assembler.hpp" +#include "asm/macroAssembler.hpp" @@ -15345,6 +15369,7 @@ index 0000000000..23abf7b891 + +// This file specializes the assember with interpreter-specific macros + ++typedef ByteSize (*OffsetFunction)(uint); + +class InterpreterMacroAssembler: public MacroAssembler { +#ifndef CC_INTERP @@ -15544,6 +15569,10 @@ index 0000000000..23abf7b891 + Register reg2, int start_row, + Label& done, bool is_virtual_call); + ++ void record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset); + void update_mdp_by_offset(Register mdp_in, int offset_of_offset); + void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); + void update_mdp_by_constant(Register mdp_in, int constant); @@ -15584,16 +15613,16 @@ index 0000000000..23abf7b891 + void notify_method_exit(TosState state, NotifyMethodExitMode mode); +}; + -+#endif // CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP ++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp new file mode 100644 -index 0000000000..85ef3d5535 +index 0000000000..c533a57652 --- /dev/null +++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp -@@ -0,0 +1,2019 @@ +@@ -0,0 +1,2043 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -15630,6 +15659,7 @@ index 0000000000..85ef3d5535 +#include "prims/jvmtiThreadState.hpp" +#include "runtime/basicLock.hpp" +#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" +#include "runtime/safepointMechanism.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/thread.inline.hpp" @@ -15682,7 +15712,7 @@ index 0000000000..85ef3d5535 + int number_of_arguments) { + // interpreter specific + // -+ // Note: No need to save/restore bcp & locals (r13 & r14) pointer ++ // Note: No need to save/restore bcp & locals pointer + // since these are callee saved registers and no blocking/ + // GC can happen in leaf calls. + // Further Note: DO NOT save/restore bcp/locals. If a caller has @@ -16119,9 +16149,9 @@ index 0000000000..85ef3d5535 + + +// The following two routines provide a hook so that an implementation -+// can schedule the dispatch in two parts. loongarch64 does not do this. ++// can schedule the dispatch in two parts. LoongArch64 does not do this. +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { -+ // Nothing loongarch64 specific to be done here ++ // Nothing LoongArch64 specific to be done here +} + +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { @@ -16215,7 +16245,7 @@ index 0000000000..85ef3d5535 + + +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { -+ // load next bytecode (load before advancing r13 to prevent AGI) ++ // load next bytecode + ld_bu(Rnext, BCP, step); + increment(BCP, step); + dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); @@ -16510,10 +16540,10 @@ index 0000000000..85ef3d5535 + save_bcp(); // Save in case of exception + + // Convert from BasicObjectLock structure to object and BasicLock structure -+ // Store the BasicLock address into %T2 ++ // Store the BasicLock address into tmp_reg + addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); + -+ // Load oop into scr_reg(%T1) ++ // Load oop into scr_reg + ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); + // free entry + st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); @@ -16915,14 +16945,36 @@ index 0000000000..85ef3d5535 + if (is_virtual_call) { + increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); + } -+ return; ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset())); ++ } ++#endif // INCLUDE_JVMCI ++ } else { ++ int non_profiled_offset = -1; ++ if (is_virtual_call) { ++ non_profiled_offset = in_bytes(CounterData::count_offset()); ++ } ++#if INCLUDE_JVMCI ++ else if (EnableJVMCI) { ++ non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()); ++ } ++#endif // INCLUDE_JVMCI ++ ++ record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth, ++ &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset); + } ++} + -+ int last_row = VirtualCallData::row_limit() - 1; ++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp, ++ Register reg2, int start_row, Label& done, int total_rows, ++ OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn, ++ int non_profiled_offset) { ++ int last_row = total_rows - 1; + assert(start_row <= last_row, "must be work left to do"); -+ // Test this row for both the receiver and for null. ++ // Test this row for both the item and for null. + // Take any of three different outcomes: -+ // 1. found receiver => increment count and goto done ++ // 1. found item => increment count and goto done + // 2. found null => keep looking for case 1, maybe allocate this cell + // 3. found something else => keep looking for cases 1 and 2 + // Case 3 is handled by a recursive call. @@ -16930,59 +16982,60 @@ index 0000000000..85ef3d5535 + Label next_test; + bool test_for_null_also = (row == start_row); + -+ // See if the receiver is receiver[n]. -+ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); -+ test_mdp_data_at(mdp, recvr_offset, receiver, ++ // See if the receiver is item[n]. ++ int item_offset = in_bytes(item_offset_fn(row)); ++ test_mdp_data_at(mdp, item_offset, item, + (test_for_null_also ? reg2 : noreg), + next_test); -+ // (Reg2 now contains the receiver from the CallData.) ++ // (Reg2 now contains the item from the CallData.) + -+ // The receiver is receiver[n]. Increment count[n]. -+ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); ++ // The receiver is item[n]. Increment count[n]. ++ int count_offset = in_bytes(item_count_offset_fn(row)); + increment_mdp_data_at(mdp, count_offset); -+ beq(R0, R0, done); ++ b(done); + bind(next_test); + + if (test_for_null_also) { + Label found_null; -+ // Failed the equality check on receiver[n]... Test for null. ++ // Failed the equality check on item[n]... Test for null. + if (start_row == last_row) { + // The only thing left to do is handle the null case. -+ if (is_virtual_call) { -+ beq(reg2, R0, found_null); -+ // Receiver did not match any saved receiver and there is no empty row for it. ++ if (non_profiled_offset >= 0) { ++ beqz(reg2, found_null); ++ // Item did not match any saved item and there is no empty row for it. + // Increment total counter to indicate polymorphic case. -+ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); -+ beq(R0, R0, done); ++ increment_mdp_data_at(mdp, non_profiled_offset); ++ b(done); + bind(found_null); + } else { -+ bne(reg2, R0, done); ++ bnez(reg2, done); + } + break; + } + // Since null is rare, make it be the branch-taken case. -+ beq(reg2, R0, found_null); ++ beqz(reg2, found_null); + + // Put all the "Case 3" tests here. -+ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); ++ record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows, ++ item_offset_fn, item_count_offset_fn, non_profiled_offset); + -+ // Found a null. Keep searching for a matching receiver, ++ // Found a null. Keep searching for a matching item, + // but remember that this is an empty (unused) slot. + bind(found_null); + } + } + -+ // In the fall-through case, we found no matching receiver, but we -+ // observed the receiver[start_row] is NULL. ++ // In the fall-through case, we found no matching item, but we ++ // observed the item[start_row] is NULL. + -+ // Fill in the receiver field and increment the count. -+ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); -+ set_mdp_data_at(mdp, recvr_offset, receiver); -+ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); ++ // Fill in the item field and increment the count. ++ int item_offset = in_bytes(item_offset_fn(start_row)); ++ set_mdp_data_at(mdp, item_offset, item); ++ int count_offset = in_bytes(item_count_offset_fn(start_row)); + li(reg2, DataLayout::counter_increment); + set_mdp_data_at(mdp, count_offset, reg2); + if (start_row > 0) { -+ beq(R0, R0, done); ++ b(done); + } +} + @@ -17455,7 +17508,7 @@ index 0000000000..85ef3d5535 +} + +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { -+ guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LOONGARCH !"); ++ guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !"); + + if (ProfileInterpreter && MethodData::profile_parameters()) { + Label profile_continue, done; @@ -17612,13 +17665,13 @@ index 0000000000..85ef3d5535 +} diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp new file mode 100644 -index 0000000000..052eb997e4 +index 0000000000..d53d951a16 --- /dev/null +++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp -@@ -0,0 +1,66 @@ +@@ -0,0 +1,62 @@ +/* + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -17641,10 +17694,11 @@ index 0000000000..052eb997e4 + * + */ + -+#ifndef CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP ++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP + -+#include "memory/allocation.hpp" ++// This is included in the middle of class Interpreter. ++// Do not include files here. + +// native method calls + @@ -17665,12 +17719,7 @@ index 0000000000..052eb997e4 + + public: + // Creation -+ SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) { -+ _masm = new MacroAssembler(buffer); -+ _num_int_args = (method->is_static() ? 1 : 0); -+ _num_fp_args = 0; -+ _stack_offset = 0; -+ } ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); + + // Code generation + void generate(uint64_t fingerprint); @@ -17681,13 +17730,13 @@ index 0000000000..052eb997e4 + static Register temp(); +}; + -+#endif // CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp new file mode 100644 -index 0000000000..3b75424f05 +index 0000000000..e2f31997b7 --- /dev/null +++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp -@@ -0,0 +1,266 @@ +@@ -0,0 +1,273 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. @@ -17738,6 +17787,13 @@ index 0000000000..3b75424f05 +#define T8 RT8 + +// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++ _num_int_args = (method->is_static() ? 1 : 0); ++ _num_fp_args = 0; ++ _stack_offset = 0; ++} + +void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { + __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); @@ -17956,13 +18012,13 @@ index 0000000000..3b75424f05 +IRT_END diff --git a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp new file mode 100644 -index 0000000000..a2a86c5ead +index 0000000000..6814fa44a0 --- /dev/null +++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -17985,8 +18041,8 @@ index 0000000000..a2a86c5ead + * + */ + -+#ifndef CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP ++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP + +private: + @@ -18046,7 +18102,7 @@ index 0000000000..a2a86c5ead + // Assert (last_Java_sp == NULL || fp == NULL) + void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } + -+#endif // CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp new file mode 100644 index 0000000000..114e47b053 @@ -18220,7 +18276,7 @@ index 0000000000..114e47b053 +} diff --git a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp new file mode 100644 -index 0000000000..dc838b3531 +index 0000000000..b281f86372 --- /dev/null +++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp @@ -0,0 +1,144 @@ @@ -18249,8 +18305,8 @@ index 0000000000..dc838b3531 + * + */ + -+#ifndef CPU_LOOGNARCH_VM_JNITYPES_LOOGNARCH_HPP -+#define CPU_LOOGNARCH_VM_JNITYPES_LOOGNARCH_HPP ++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP ++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP + +#include "jni.h" +#include "memory/allocation.hpp" @@ -18367,7 +18423,212 @@ index 0000000000..dc838b3531 +#undef _JNI_SLOT_OFFSET +}; + -+#endif // CPU_LOOGNARCH_VM_JNITYPES_LOOGNARCH_HPP ++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP +diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +new file mode 100644 +index 0000000000..ea481c7fa6 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp +@@ -0,0 +1,199 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "jvmci/jvmciCodeInstaller.hpp" ++#include "jvmci/jvmciRuntime.hpp" ++#include "jvmci/jvmciCompilerToVM.hpp" ++#include "jvmci/jvmciJavaClasses.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_loongarch.inline.hpp" ++ ++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) { ++ if (inst->is_int_branch() || inst->is_float_branch()) { ++ return pc_offset + NativeInstruction::nop_instruction_size; ++ } else if (inst->is_call()) { ++ return pc_offset + NativeCall::instruction_size; ++ } else if (inst->is_far_call()) { ++ return pc_offset + NativeFarCall::instruction_size; ++ } else if (inst->is_jump()) { ++ return pc_offset + NativeGeneralJump::instruction_size; ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match LoongArch64TestAssembler.java emitCall ++ // lu12i_w; lu32i_d; jirl ++ return pc_offset + 3 * NativeInstruction::nop_instruction_size; ++ } else { ++ JVMCI_ERROR_0("unsupported type of instruction for call site"); ++ } ++ return 0; ++} ++ ++void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ Handle obj(THREAD, HotSpotObjectConstantImpl::object(constant)); ++ jobject value = JNIHandles::make_local(obj()); ++ if (HotSpotObjectConstantImpl::compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop
(obj()))))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ move->set_data((intptr_t)(cast_from_oop
(obj()))); ++ int oop_index = _oop_recorder->find_index(value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ _instructions->relocate(pc, rspec); ++ } ++} ++ ++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ if (HotSpotMetaspaceConstantImpl::compressed(constant)) { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK); ++ move->set_data((intptr_t) narrowOop); ++ TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop); ++ } else { ++ NativeMovConstReg* move = nativeMovConstReg_at(pc); ++ void* reference = record_metadata_reference(_instructions, pc, constant, CHECK); ++ move->set_data((intptr_t) reference); ++ TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference)); ++ } ++} ++ ++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) { ++ address pc = _instructions->start() + pc_offset; ++ NativeInstruction* inst = nativeInstruction_at(pc); ++ if (inst->is_pcaddu12i_add()) { ++ address dest = _constants->start() + data_offset; ++ _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS)); ++ TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset); ++ } else { ++ JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc)); ++ } ++} ++ ++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) { ++ address pc = (address) inst; ++ if (inst->is_call()) { ++ NativeCall* call = nativeCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_far_call()) { ++ NativeFarCall* call = nativeFarCall_at(pc); ++ call->set_destination((address) foreign_call_destination); ++ _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_jump()) { ++ NativeGeneralJump* jump = nativeGeneralJump_at(pc); ++ jump->set_jump_destination((address) foreign_call_destination); ++ _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec()); ++ } else if (inst->is_lu12iw_lu32id()) { ++ // match emitCall of LoongArch64TestAssembler.java ++ // lu12i_w; lu32i_d; jirl ++ MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination); ++ } else { ++ JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc)); ++ } ++ TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst)); ++} ++ ++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) { ++#ifdef ASSERT ++ Method* method = NULL; ++ // we need to check, this might also be an unresolved method ++ if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) { ++ method = getMethodFromHotSpotMethod(hotspot_method()); ++ } ++#endif ++ switch (_next_call_type) { ++ case INLINE_INVOKE: ++ break; ++ case INVOKEVIRTUAL: ++ case INVOKEINTERFACE: { ++ assert(!method->is_static(), "cannot call static method with invokeinterface"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc)); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub()); ++ break; ++ } ++ case INVOKESTATIC: { ++ assert(method->is_static(), "cannot call non-static method with invokestatic"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::static_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub()); ++ break; ++ } ++ case INVOKESPECIAL: { ++ assert(!method->is_static(), "cannot call static method with invokespecial"); ++ NativeCall* call = nativeCall_at(_instructions->start() + pc_offset); ++ _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type); ++ call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub()); ++ break; ++ } ++ default: ++ JVMCI_ERROR("invalid _next_call_type value"); ++ break; ++ } ++} ++ ++void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) { ++ switch (mark) { ++ case POLL_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_FAR: ++ _instructions->relocate(pc, relocInfo::poll_type); ++ break; ++ case POLL_RETURN_NEAR: ++ JVMCI_ERROR("unimplemented"); ++ break; ++ case POLL_RETURN_FAR: ++ _instructions->relocate(pc, relocInfo::poll_return_type); ++ break; ++ default: ++ JVMCI_ERROR("invalid mark value"); ++ break; ++ } ++} ++ ++// convert JVMCI register indices (as used in oop maps) to HotSpot registers ++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) { ++ if (jvmci_reg < RegisterImpl::number_of_registers) { ++ return as_Register(jvmci_reg)->as_VMReg(); ++ } else { ++ jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers; ++ if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) { ++ return as_FloatRegister(floatRegisterNumber)->as_VMReg(); ++ } ++ JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg); ++ } ++} ++ ++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) { ++ return !hotspotRegister->is_FloatRegister(); ++} diff --git a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad new file mode 100644 index 0000000000..80dff0c762 @@ -18401,10 +18662,10 @@ index 0000000000..80dff0c762 + diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad new file mode 100644 -index 0000000000..3c65f7518d +index 0000000000..a5fb5f7b85 --- /dev/null +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad -@@ -0,0 +1,14043 @@ +@@ -0,0 +1,13906 @@ +// +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. +// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -18452,10 +18713,10 @@ index 0000000000..3c65f7518d + +// General Registers +// Integer Registers -+ reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); ++ reg_def R0 ( NS, NS, Op_RegI, 0, R0->as_VMReg()); ++ reg_def R0_H ( NS, NS, Op_RegI, 0, R0->as_VMReg()->next()); + reg_def RA ( NS, NS, Op_RegI, 1, RA->as_VMReg()); + reg_def RA_H ( NS, NS, Op_RegI, 1, RA->as_VMReg()->next()); -+ // TODO: LA + reg_def TP ( NS, NS, Op_RegI, 2, TP->as_VMReg()); + reg_def TP_H ( NS, NS, Op_RegI, 2, TP->as_VMReg()->next()); + reg_def SP ( NS, NS, Op_RegI, 3, SP->as_VMReg()); @@ -18519,293 +18780,293 @@ index 0000000000..3c65f7518d + + +// Floating/Vector registers. -+reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); -+reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); -+reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); -+reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); -+reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); -+reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); -+reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); -+reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); -+ -+reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); -+reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); -+reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); -+reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); -+reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); -+reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); -+reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); -+reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); -+ -+reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); -+reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); -+reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); -+reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); -+reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); -+reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); -+reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); -+reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); -+ -+reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); -+reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); -+reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); -+reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); -+reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); -+reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); -+reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); -+reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); -+ -+reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); -+reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); -+reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); -+reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); -+reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); -+reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); -+reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); -+reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); -+ -+reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); -+reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); -+reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); -+reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); -+reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); -+reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); -+reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); -+reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); -+ -+reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); -+reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); -+reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); -+reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); -+reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); -+reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); -+reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); -+reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); -+ -+reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); -+reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); -+reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); -+reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); -+reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); -+reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); -+reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); -+reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); -+ -+reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); -+reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); -+reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); -+reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); -+reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); -+reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); -+reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); -+reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); -+ -+reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); -+reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); -+reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); -+reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); -+reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); -+reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); -+reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); -+reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); -+ -+reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); -+reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); -+reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); -+reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); -+reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); -+reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); -+reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); -+reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); -+ -+reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); -+reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); -+reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); -+reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); -+reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); -+reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); -+reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); -+reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); -+ -+reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); -+reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); -+reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); -+reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); -+reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); -+reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); -+reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); -+reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); -+ -+reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); -+reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); -+reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); -+reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); -+reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); -+reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); -+reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); -+reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); -+ -+reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); -+reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); -+reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); -+reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); -+reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); -+reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); -+reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); -+reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); -+ -+reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); -+reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); -+reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); -+reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); -+reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); -+reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); -+reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); -+reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); -+ -+reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); -+reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); -+reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); -+reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); -+reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); -+reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); -+reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); -+reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); -+ -+reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); -+reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); -+reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); -+reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); -+reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); -+reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); -+reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); -+reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); -+ -+reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); -+reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); -+reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); -+reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); -+reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); -+reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); -+reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); -+reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); -+ -+reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); -+reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); -+reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); -+reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); -+reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); -+reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); -+reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); -+reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); -+ -+reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); -+reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); -+reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); -+reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); -+reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); -+reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); -+reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); -+reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); -+ -+reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); -+reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); -+reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); -+reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); -+reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); -+reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); -+reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); -+reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); -+ -+reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); -+reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); -+reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); -+reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); -+reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); -+reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); -+reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); -+reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); -+ -+reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); -+reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); -+reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); -+reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); -+reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); -+reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); -+reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); -+reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); -+ -+reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg() ); -+reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next() ); -+reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); -+reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); -+reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); -+reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); -+reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); -+reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); -+ -+reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg() ); -+reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next() ); -+reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); -+reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); -+reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); -+reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); -+reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); -+reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); -+ -+reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg() ); -+reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next() ); -+reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); -+reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); -+reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); -+reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); -+reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); -+reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); -+ -+reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg() ); -+reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next() ); -+reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); -+reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); -+reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); -+reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); -+reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); -+reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); -+ -+reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg() ); -+reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next() ); -+reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); -+reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); -+reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); -+reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); -+reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); -+reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); -+ -+reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg() ); -+reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next() ); -+reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); -+reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); -+reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); -+reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); -+reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); -+reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); -+ -+reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg() ); -+reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next() ); -+reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); -+reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); -+reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); -+reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); -+reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); -+reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); -+ -+reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg() ); -+reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next() ); -+reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); -+reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); -+reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); -+reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); -+reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); -+reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); ++ reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg() ); ++ reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next() ); ++ reg_def F0_J ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) ); ++ reg_def F0_K ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) ); ++ reg_def F0_L ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) ); ++ reg_def F0_M ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) ); ++ reg_def F0_N ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) ); ++ reg_def F0_O ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) ); ++ ++ reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg() ); ++ reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next() ); ++ reg_def F1_J ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) ); ++ reg_def F1_K ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) ); ++ reg_def F1_L ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) ); ++ reg_def F1_M ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) ); ++ reg_def F1_N ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) ); ++ reg_def F1_O ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) ); ++ ++ reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg() ); ++ reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next() ); ++ reg_def F2_J ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) ); ++ reg_def F2_K ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) ); ++ reg_def F2_L ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) ); ++ reg_def F2_M ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) ); ++ reg_def F2_N ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) ); ++ reg_def F2_O ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) ); ++ ++ reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg() ); ++ reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next() ); ++ reg_def F3_J ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) ); ++ reg_def F3_K ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) ); ++ reg_def F3_L ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) ); ++ reg_def F3_M ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) ); ++ reg_def F3_N ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) ); ++ reg_def F3_O ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) ); ++ ++ reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg() ); ++ reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next() ); ++ reg_def F4_J ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) ); ++ reg_def F4_K ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) ); ++ reg_def F4_L ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) ); ++ reg_def F4_M ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) ); ++ reg_def F4_N ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) ); ++ reg_def F4_O ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) ); ++ ++ reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg() ); ++ reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next() ); ++ reg_def F5_J ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) ); ++ reg_def F5_K ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) ); ++ reg_def F5_L ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) ); ++ reg_def F5_M ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) ); ++ reg_def F5_N ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) ); ++ reg_def F5_O ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) ); ++ ++ reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg() ); ++ reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next() ); ++ reg_def F6_J ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) ); ++ reg_def F6_K ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) ); ++ reg_def F6_L ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) ); ++ reg_def F6_M ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) ); ++ reg_def F6_N ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) ); ++ reg_def F6_O ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) ); ++ ++ reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg() ); ++ reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next() ); ++ reg_def F7_J ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) ); ++ reg_def F7_K ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) ); ++ reg_def F7_L ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) ); ++ reg_def F7_M ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) ); ++ reg_def F7_N ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) ); ++ reg_def F7_O ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) ); ++ ++ reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg() ); ++ reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next() ); ++ reg_def F8_J ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) ); ++ reg_def F8_K ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) ); ++ reg_def F8_L ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) ); ++ reg_def F8_M ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) ); ++ reg_def F8_N ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) ); ++ reg_def F8_O ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) ); ++ ++ reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg() ); ++ reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next() ); ++ reg_def F9_J ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) ); ++ reg_def F9_K ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) ); ++ reg_def F9_L ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) ); ++ reg_def F9_M ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) ); ++ reg_def F9_N ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) ); ++ reg_def F9_O ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) ); ++ ++ reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg() ); ++ reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next() ); ++ reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) ); ++ reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) ); ++ reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) ); ++ reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) ); ++ reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) ); ++ reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) ); ++ ++ reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg() ); ++ reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next() ); ++ reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) ); ++ reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) ); ++ reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) ); ++ reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) ); ++ reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) ); ++ reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) ); ++ ++ reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg() ); ++ reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next() ); ++ reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) ); ++ reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) ); ++ reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) ); ++ reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) ); ++ reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) ); ++ reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) ); ++ ++ reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg() ); ++ reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next() ); ++ reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) ); ++ reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) ); ++ reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) ); ++ reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) ); ++ reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) ); ++ reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) ); ++ ++ reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg() ); ++ reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next() ); ++ reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) ); ++ reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) ); ++ reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) ); ++ reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) ); ++ reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) ); ++ reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) ); ++ ++ reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg() ); ++ reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next() ); ++ reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) ); ++ reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) ); ++ reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) ); ++ reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) ); ++ reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) ); ++ reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) ); ++ ++ reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg() ); ++ reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next() ); ++ reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) ); ++ reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) ); ++ reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) ); ++ reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) ); ++ reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) ); ++ reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) ); ++ ++ reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg() ); ++ reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next() ); ++ reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) ); ++ reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) ); ++ reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) ); ++ reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) ); ++ reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) ); ++ reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) ); ++ ++ reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg() ); ++ reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next() ); ++ reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) ); ++ reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) ); ++ reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) ); ++ reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) ); ++ reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) ); ++ reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) ); ++ ++ reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg() ); ++ reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next() ); ++ reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) ); ++ reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) ); ++ reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) ); ++ reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) ); ++ reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) ); ++ reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) ); ++ ++ reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg() ); ++ reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next() ); ++ reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) ); ++ reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) ); ++ reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) ); ++ reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) ); ++ reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) ); ++ reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) ); ++ ++ reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg() ); ++ reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next() ); ++ reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) ); ++ reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) ); ++ reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) ); ++ reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) ); ++ reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) ); ++ reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) ); ++ ++ reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg() ); ++ reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next() ); ++ reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) ); ++ reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) ); ++ reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) ); ++ reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) ); ++ reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) ); ++ reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) ); ++ ++ reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg() ); ++ reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next() ); ++ reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) ); ++ reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) ); ++ reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) ); ++ reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) ); ++ reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) ); ++ reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) ); ++ ++ reg_def F24 ( SOC, SOE, Op_RegF, 24, F24->as_VMReg() ); ++ reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next() ); ++ reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) ); ++ reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) ); ++ reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) ); ++ reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) ); ++ reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) ); ++ reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) ); ++ ++ reg_def F25 ( SOC, SOE, Op_RegF, 25, F25->as_VMReg() ); ++ reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next() ); ++ reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) ); ++ reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) ); ++ reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) ); ++ reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) ); ++ reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) ); ++ reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) ); ++ ++ reg_def F26 ( SOC, SOE, Op_RegF, 26, F26->as_VMReg() ); ++ reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next() ); ++ reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) ); ++ reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) ); ++ reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) ); ++ reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) ); ++ reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) ); ++ reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) ); ++ ++ reg_def F27 ( SOC, SOE, Op_RegF, 27, F27->as_VMReg() ); ++ reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next() ); ++ reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) ); ++ reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) ); ++ reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) ); ++ reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) ); ++ reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) ); ++ reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) ); ++ ++ reg_def F28 ( SOC, SOE, Op_RegF, 28, F28->as_VMReg() ); ++ reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next() ); ++ reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) ); ++ reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) ); ++ reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) ); ++ reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) ); ++ reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) ); ++ reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) ); ++ ++ reg_def F29 ( SOC, SOE, Op_RegF, 29, F29->as_VMReg() ); ++ reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next() ); ++ reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) ); ++ reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) ); ++ reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) ); ++ reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) ); ++ reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) ); ++ reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) ); ++ ++ reg_def F30 ( SOC, SOE, Op_RegF, 30, F30->as_VMReg() ); ++ reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next() ); ++ reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) ); ++ reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) ); ++ reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) ); ++ reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) ); ++ reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) ); ++ reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) ); ++ ++ reg_def F31 ( SOC, SOE, Op_RegF, 31, F31->as_VMReg() ); ++ reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next() ); ++ reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) ); ++ reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) ); ++ reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) ); ++ reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) ); ++ reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) ); ++ reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) ); + + +// ---------------------------- @@ -18840,7 +19101,13 @@ index 0000000000..3c65f7518d + S8, S8_H + RA, RA_H, + SP, SP_H, // stack_pointer -+ FP, FP_H // frame_pointer ++ FP, FP_H, // frame_pointer ++ ++ // non-allocatable registers ++ T7, T7_H, ++ TP, TP_H, ++ RX, RX_H, ++ R0, R0_H, + ); + +// F23 is scratch reg @@ -18874,7 +19141,11 @@ index 0000000000..3c65f7518d + F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O, + F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O, + F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O, -+ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O); ++ F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O, ++ ++ // non-allocatable registers ++ F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O, ++ ); + +reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); +reg_class s0_reg( S0 ); @@ -18938,10 +19209,10 @@ index 0000000000..3c65f7518d + +//reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 ); + -+reg_class int_reg( ++reg_class all_reg32( + S8, + S7, -+ /* S5, S5_heapbase */ ++ S5, /* S5_heapbase */ + /* S6, S6 TREG */ + S4, + S3, @@ -18956,6 +19227,7 @@ index 0000000000..3c65f7518d + T3, + T2, + T1, ++ T0, + A7, + A6, + A5, @@ -18963,35 +19235,17 @@ index 0000000000..3c65f7518d + A3, + A2, + A1, -+ A0, -+ T0 ); ++ A0 ); ++ ++reg_class int_reg %{ ++ return _ANY_REG32_mask; ++%} + +reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 ); + -+reg_class p_reg( -+ S8, S8_H, -+ S7, S7_H, -+ S0, S0_H, -+ S1, S1_H, -+ S2, S2_H, -+ S4, S4_H, -+ S3, S3_H, -+ T8, T8_H, -+ T6, T6_H, -+ T5, T5_H, -+ T2, T2_H, -+ T3, T3_H, -+ T1, T1_H, -+ A7, A7_H, -+ A6, A6_H, -+ A5, A5_H, -+ A4, A4_H, -+ A3, A3_H, -+ A2, A2_H, -+ A1, A1_H, -+ A0, A0_H, -+ T0, T0_H -+ ); ++reg_class p_reg %{ ++ return _PTR_REG_mask; ++%} + +reg_class no_T8_p_reg( + S7, S7_H, @@ -19027,11 +19281,11 @@ index 0000000000..3c65f7518d + T0, T0_H + ); + -+reg_class long_reg( ++reg_class all_reg( + S8, S8_H, + S7, S7_H, + /* S6, S6_H, S6 TREG */ -+ /* S5, S5_H, S5_heapbase */ ++ S5, S5_H, /* S5_heapbase */ + S4, S4_H, + S3, S3_H, + S2, S2_H, @@ -19045,6 +19299,7 @@ index 0000000000..3c65f7518d + T3, T3_H, + T2, T2_H, + T1, T1_H, ++ T0, T0_H, + A7, A7_H, + A6, A6_H, + A5, A5_H, @@ -19052,11 +19307,14 @@ index 0000000000..3c65f7518d + A3, A3_H, + A2, A2_H, + A1, A1_H, -+ A0, A0_H, -+ T0, T0_H ++ A0, A0_H + ); + + ++reg_class long_reg %{ ++ return _ANY_REG_mask; ++%} ++ +// Floating point registers. +// F31 are not used as temporary registers in D2I +reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31); @@ -19212,6 +19470,10 @@ index 0000000000..3c65f7518d +// To keep related declarations/definitions/uses close together, +// we switch between source %{ }% and source_hpp %{ }% freely as needed. + ++extern RegMask _ANY_REG32_mask; ++extern RegMask _ANY_REG_mask; ++extern RegMask _PTR_REG_mask; ++ +class CallStubImpl { + + //-------------------------------------------------------------- @@ -19288,6 +19550,22 @@ index 0000000000..3c65f7518d +#define T7 RT7 +#define T8 RT8 + ++RegMask _ANY_REG32_mask; ++RegMask _ANY_REG_mask; ++RegMask _PTR_REG_mask; ++ ++void reg_mask_init() { ++ _ANY_REG32_mask = _ALL_REG32_mask; ++ _ANY_REG_mask = _ALL_REG_mask; ++ _PTR_REG_mask = _ALL_REG_mask; ++ ++ if (UseCompressedOops && (Universe::narrow_ptrs_base() != NULL)) { ++ _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg())); ++ _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask); ++ } ++} ++ +// Emit exception handler code. +// Stuff framesize into a register and call a VM stub routine. +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { @@ -19363,7 +19641,7 @@ index 0000000000..3c65f7518d +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { + const int safety_zone = 3 * BytesPerInstWord; + int offs = offset - br_size + 4; -+ // To be conservative on LOONGARCH ++ // To be conservative on LoongArch + // branch node should be end with: + // branch inst + offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2; @@ -19487,7 +19765,7 @@ index 0000000000..3c65f7518d + return MIN2(size,max_size); +} + -+// LOONGARCH supports misaligned vectors store/load? FIXME ++// LoongArch supports misaligned vectors store/load? FIXME +const bool Matcher::misaligned_vectors_ok() { + return false; + //return !AlignVector; // can be changed by flag @@ -19570,7 +19848,7 @@ index 0000000000..3c65f7518d + return FP_REG_mask(); +} + -+// LOONGARCH doesn't support AES intrinsics ++// LoongArch doesn't support AES intrinsics +const bool Matcher::pass_original_key_for_aes() { + return false; +} @@ -20268,7 +20546,7 @@ index 0000000000..3c65f7518d +} + +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { -+ return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug ++ return MachNode::size(ra_); // too many variables; just compute it the hard way +} + +int MachEpilogNode::reloc() const { @@ -20315,9 +20593,6 @@ index 0000000000..3c65f7518d + } +} + -+ -+//static int sizeof_FFree_Float_Stack_All = -1; -+ +int MachCallRuntimeNode::ret_addr_offset() { + // pcaddu18i + // jirl @@ -21683,26 +21958,6 @@ index 0000000000..3c65f7518d + interface(REG_INTER); +%} + -+/* -+operand mV0RegI() %{ -+ constraint(ALLOC_IN_RC(v0_reg)); -+ match(RegI); -+ match(mRegI); -+ -+ format %{ "V0" %} -+ interface(REG_INTER); -+%} -+ -+operand mV1RegI() %{ -+ constraint(ALLOC_IN_RC(v1_reg)); -+ match(RegI); -+ match(mRegI); -+ -+ format %{ "V1" %} -+ interface(REG_INTER); -+%} -+*/ -+ +operand mRegN() %{ + constraint(ALLOC_IN_RC(int_reg)); + match(RegN); @@ -21747,17 +22002,6 @@ index 0000000000..3c65f7518d + interface(REG_INTER); +%} + -+/* -+operand t9_RegN() %{ -+ constraint(ALLOC_IN_RC(t9_reg)); -+ match(RegN); -+ match(mRegN); -+ -+ format %{ %} -+ interface(REG_INTER); -+%} -+*/ -+ +operand a0_RegN() %{ + constraint(ALLOC_IN_RC(a0_reg)); + match(RegN); @@ -21902,26 +22146,6 @@ index 0000000000..3c65f7518d + interface(REG_INTER); +%} + -+/* -+operand v0_RegN() %{ -+ constraint(ALLOC_IN_RC(v0_reg)); -+ match(RegN); -+ match(mRegN); -+ -+ format %{ %} -+ interface(REG_INTER); -+%} -+ -+operand v1_RegN() %{ -+ constraint(ALLOC_IN_RC(v1_reg)); -+ match(RegN); -+ match(mRegN); -+ -+ format %{ %} -+ interface(REG_INTER); -+%} -+*/ -+ +// Pointer Register +operand mRegP() %{ + constraint(ALLOC_IN_RC(p_reg)); @@ -22070,19 +22294,6 @@ index 0000000000..3c65f7518d + interface(REG_INTER); +%} + -+/* -+operand t9_RegP() -+%{ -+ constraint(ALLOC_IN_RC(t9_long_reg)); -+ match(RegP); -+ match(mRegP); -+ match(no_T8_mRegP); -+ -+ format %{ %} -+ interface(REG_INTER); -+%} -+*/ -+ +operand a0_RegP() +%{ + constraint(ALLOC_IN_RC(a0_long_reg)); @@ -22194,24 +22405,6 @@ index 0000000000..3c65f7518d + interface(REG_INTER); +%} + -+/* -+operand mSPRegP(mRegP reg) %{ -+ constraint(ALLOC_IN_RC(sp_reg)); -+ match(reg); -+ -+ format %{ "SP" %} -+ interface(REG_INTER); -+%} -+ -+operand mFPRegP(mRegP reg) %{ -+ constraint(ALLOC_IN_RC(fp_reg)); -+ match(reg); -+ -+ format %{ "FP" %} -+ interface(REG_INTER); -+%} -+*/ -+ +operand mRegL() %{ + constraint(ALLOC_IN_RC(long_reg)); + match(RegL); @@ -23856,7 +24049,7 @@ index 0000000000..3c65f7518d + effect(USE labl); + + ins_cost(300); -+ format %{ "J$cop $labl #loongarch uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %} + + ins_encode %{ + Label* L = $labl$$label; @@ -24316,7 +24509,7 @@ index 0000000000..3c65f7518d + effect(USE labl); + + ins_cost(300); -+ format %{ "J$cop $labl #loongarch uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ format %{ "J$cop $labl #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %} + + ins_encode %{ + Label &L = *($labl$$label); @@ -26159,7 +26352,7 @@ index 0000000000..3c65f7518d +instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{ + match(Set dst (AddP src1 src2)); + -+ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg" %} ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_reg" %} + + ins_encode %{ + Register dst = $dst$$Register; @@ -26189,7 +26382,7 @@ index 0000000000..3c65f7518d +instruct addP_reg_imm12(mRegP dst, mRegP src1, immL12 src2) %{ + match(Set dst (AddP src1 src2)); + -+ format %{ "daddi $dst, $src1, $src2 #@addP_reg_imm12" %} ++ format %{ "ADD $dst, $src1, $src2 #@addP_reg_imm12" %} + ins_encode %{ + Register src1 = $src1$$Register; + long src2 = $src2$$constant; @@ -26416,27 +26609,6 @@ index 0000000000..3c65f7518d + ins_pipe( ialu_mult ); +%} + -+instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ -+ match(Set dst (AddI (MulI src1 src2) src3)); -+ -+ ins_cost(999); -+ format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} -+ ins_encode %{ -+ //TODO: LA -+ guarantee(0, "LA not implemented yet"); -+#if 0 -+ Register src1 = $src1$$Register; -+ Register src2 = $src2$$Register; -+ Register src3 = $src3$$Register; -+ Register dst = $dst$$Register; -+ -+ __ mul_w(dst, src1, src2); -+ __ add_w(dst, dst, src3); -+#endif -+ %} -+ ins_pipe( ialu_mult ); -+%} -+ +instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ + match(Set dst (DivI src1 src2)); + @@ -26463,7 +26635,6 @@ index 0000000000..3c65f7518d + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + -+ /* Here do we need to trap an exception manually ? */ + __ fdiv_s(dst, src1, src2); + %} + ins_pipe( pipe_slow ); @@ -26479,7 +26650,6 @@ index 0000000000..3c65f7518d + FloatRegister src2 = $src2$$FloatRegister; + FloatRegister dst = $dst$$FloatRegister; + -+ /* Here do we need to trap an exception manually ? */ + __ fdiv_d(dst, src1, src2); + %} + ins_pipe( pipe_slow ); @@ -28174,7 +28344,7 @@ index 0000000000..3c65f7518d +instruct prefetchAlloc(memory mem) %{ + match(PrefetchAllocation mem); + ins_cost(125); -+ format %{ "pref $mem\t# Prefetch allocation @ prefetchAlloc" %} ++ format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %} + ins_encode %{ + int base = $mem$$base; + int index = $mem$$index; @@ -28484,7 +28654,7 @@ index 0000000000..3c65f7518d + match(Set dst src); + + ins_cost(125); -+ format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} ++ format %{ "fld_s $dst, $src\t# float stk @ loadSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !"); + __ fld_s($dst$$FloatRegister, SP, $src$$disp); @@ -28497,7 +28667,7 @@ index 0000000000..3c65f7518d + match(Set dst src); + + ins_cost(100); -+ format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} ++ format %{ "fst_s $dst, $src\t# float stk @ storeSSF" %} + ins_encode %{ + guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !"); + __ fst_s($src$$FloatRegister, SP, $dst$$disp); @@ -28511,7 +28681,7 @@ index 0000000000..3c65f7518d + match(Set dst src); + + ins_cost(125); -+ format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} ++ format %{ "fld_d $dst, $src\t# double stk @ loadSSD" %} + ins_encode %{ + guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !"); + __ fld_d($dst$$FloatRegister, SP, $src$$disp); @@ -28581,8 +28751,10 @@ index 0000000000..3c65f7518d + // Use the following format syntax + format %{ "ILLTRAP ;#@ShouldNotReachHere" %} + ins_encode %{ -+ // Here we should emit illtrap! -+ __ brk(18); ++ if (is_reachable()) { ++ // Here we should emit illtrap! ++ __ stop("ShouldNotReachHere"); ++ } + %} + ins_pipe( pipe_jump ); +%} @@ -28964,30 +29136,6 @@ index 0000000000..3c65f7518d +%} + +//----------Max and Min-------------------------------------------------------- -+// Min Instructions -+//// -+// *** Min and Max using the conditional move are slower than the -+// *** branch version on a Pentium III. -+// // Conditional move for min -+//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ -+// effect( USE_DEF op2, USE op1, USE cr ); -+// format %{ "CMOVlt $op2,$op1\t! min" %} -+// opcode(0x4C,0x0F); -+// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); -+// ins_pipe( pipe_cmov_reg ); -+//%} -+// -+//// Min Register with Register (P6 version) -+//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ -+// predicate(VM_Version::supports_cmov() ); -+// match(Set op2 (MinI op1 op2)); -+// ins_cost(200); -+// expand %{ -+// eFlagsReg cr; -+// compI_eReg(cr,op1,op2); -+// cmovI_reg_lt(op2,op1,cr); -+// %} -+//%} + +// Min Register with Register (generic version) +instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ @@ -29009,30 +29157,6 @@ index 0000000000..3c65f7518d + ins_pipe( pipe_slow ); +%} + -+// Max Register with Register -+// *** Min and Max using the conditional move are slower than the -+// *** branch version on a Pentium III. -+// // Conditional move for max -+//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ -+// effect( USE_DEF op2, USE op1, USE cr ); -+// format %{ "CMOVgt $op2,$op1\t! max" %} -+// opcode(0x4F,0x0F); -+// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); -+// ins_pipe( pipe_cmov_reg ); -+//%} -+// -+// // Max Register with Register (P6 version) -+//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ -+// predicate(VM_Version::supports_cmov() ); -+// match(Set op2 (MaxI op1 op2)); -+// ins_cost(200); -+// expand %{ -+// eFlagsReg cr; -+// compI_eReg(cr,op1,op2); -+// cmovI_reg_gt(op2,op1,cr); -+// %} -+//%} -+ +// Max Register with Register (generic version) +instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ + match(Set dst (MaxI dst src)); @@ -32450,10 +32574,10 @@ index 0000000000..3c65f7518d + diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp new file mode 100644 -index 0000000000..f8670f5081 +index 0000000000..74cda82963 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp -@@ -0,0 +1,4750 @@ +@@ -0,0 +1,4531 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. @@ -32491,6 +32615,7 @@ index 0000000000..f8670f5081 +#include "interpreter/interpreter.hpp" +#include "memory/resourceArea.hpp" +#include "memory/universe.hpp" ++#include "nativeInst_loongarch.hpp" +#include "prims/methodHandles.hpp" +#include "runtime/biasedLocking.hpp" +#include "runtime/interfaceSupport.inline.hpp" @@ -32504,6 +32629,7 @@ index 0000000000..f8670f5081 + +#ifdef COMPILER2 +#include "opto/compile.hpp" ++#include "opto/intrinsicnode.hpp" +#endif + +#define T0 RT0 @@ -32595,6 +32721,24 @@ index 0000000000..f8670f5081 + MacroAssembler masm(&cb); + masm.pcaddi(as_Register(low(stub_inst, 5)), offs); + return; ++ } else if (high(stub_inst, 7) == pcaddu12i_op) { ++ // pc-relative ++ jlong offs = target - branch; ++ guarantee(is_simm(offs, 32), "Not signed 32-bit offset"); ++ jint si12, si20; ++ jint& stub_instNext = *(jint*)(branch+4); ++ split_simm32(offs, si12, si20); ++ CodeBuffer cb(branch, 2 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20); ++ masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12); ++ return; ++ } else if (high(stub_inst, 7) == lu12i_w_op) { ++ // long call (absolute) ++ CodeBuffer cb(branch, 3 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.call_long(target); ++ return; + } + + stub_inst = patched_branch(target - branch, stub_inst, 0); @@ -33013,7 +33157,7 @@ index 0000000000..f8670f5081 + + // The bias pattern is present in the object's header. Need to check + // whether the bias owner and the epoch are both still current. -+ // Note that because there is no current thread register on MIPS we ++ // Note that because there is no current thread register on LA we + // need to store off the mark word we read out of the object to + // avoid reloading it and needing to recheck invariants below. This + // store is unfortunate but it makes the overall code shorter and @@ -33415,55 +33559,6 @@ index 0000000000..f8670f5081 + popad(); +} + -+void MacroAssembler::print_reg(Register reg) { -+ void * cur_pc = pc(); -+ pushad(); -+ -+ li(A0, (long)reg->name()); -+ if (reg == SP) -+ addi_d(A1, SP, wordSize * 23); //23 registers saved in pushad() -+ else if (reg == A0) -+ ld_d(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code! -+ else -+ move(A1, reg); -+ li(A2, (long)cur_pc); -+ push(S2); -+ li(AT, -(StackAlignmentInBytes)); -+ move(S2, SP); // use S2 as a sender SP holder -+ andr(SP, SP, AT); // align stack as required by ABI -+ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type); -+ move(SP, S2); // use S2 as a sender SP holder -+ pop(S2); -+ popad(); -+} -+ -+void MacroAssembler::print_reg(FloatRegister reg) { -+ //TODO: LA -+ guarantee(0, "LA not implemented yet"); -+#if 0 -+ void * cur_pc = pc(); -+ pushad(); -+ li(A0, (long)reg->name()); -+ push(S2); -+ move(AT, -(StackAlignmentInBytes)); -+ move(S2, SP); // use S2 as a sender SP holder -+ andr(SP, SP, AT); // align stack as required by ABI -+ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type); -+ move(SP, S2); // use S2 as a sender SP holder -+ pop(S2); -+ popad(); -+ -+ pushad(); -+ move(FP, SP); -+ move(AT, -(StackAlignmentInBytes)); -+ andr(SP , SP , AT); -+ mov_d(F12, reg); -+ call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type); -+ move(SP, FP); -+ popad(); -+#endif -+} -+ +void MacroAssembler::increment(Register reg, int imm) { + if (!imm) return; + if (is_simm(imm, 12)) { @@ -34211,8 +34306,6 @@ index 0000000000..f8670f5081 + + // make sure klass is 'reasonable' + // add for compressedoops -+ reinit_heapbase(); -+ // add for compressedoops + load_klass(SCR2, A1); + beqz(SCR2, error); // if klass is NULL it is broken + // return if everything seems ok @@ -34405,6 +34498,7 @@ index 0000000000..f8670f5081 + guarantee(0, "LA not implemented yet"); +} + ++#ifdef COMPILER2 +// Fast_Lock and Fast_Unlock used by C2 + +// Because the transitions from emitted code to the runtime @@ -34785,6 +34879,7 @@ index 0000000000..f8670f5081 + if (EmitSync & 32768) { nop() ; } + } +} ++#endif // COMPILER2 + +void MacroAssembler::align(int modulus) { + while (offset() % modulus != 0) nop(); @@ -35161,7 +35256,6 @@ index 0000000000..f8670f5081 + // Cannot assert, unverified entry point counts instructions (see .ad file) + // vtableStubs also counts instructions in pd_code_size_limit. + // Also do not verify_oop as this is called by verify_oop. -+ //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); + if (Universe::narrow_oop_shift() != 0) { + assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); + if (Universe::narrow_oop_base() != NULL) { @@ -35438,7 +35532,6 @@ index 0000000000..f8670f5081 + // Skip to start of data. + addi_d(temp_reg, temp_reg, Array::base_offset_in_bytes()); + -+ // OpenJDK8 never compresses klass pointers in secondary-super array. + Label Loop, subtype; + bind(Loop); + beq(temp2_reg, R0, *L_failure); @@ -35606,216 +35699,7 @@ index 0000000000..f8670f5081 + ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes()); +} + -+void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { -+ switch (type) { -+ case T_LONG: -+ st_ptr(src_reg, tmp_reg, disp); -+ break; -+ case T_ARRAY: -+ case T_OBJECT: -+ if (UseCompressedOops && !wide) { -+ st_w(src_reg, tmp_reg, disp); -+ } else { -+ st_ptr(src_reg, tmp_reg, disp); -+ } -+ break; -+ case T_ADDRESS: -+ st_ptr(src_reg, tmp_reg, disp); -+ break; -+ case T_INT: -+ st_w(src_reg, tmp_reg, disp); -+ break; -+ case T_CHAR: -+ case T_SHORT: -+ st_h(src_reg, tmp_reg, disp); -+ break; -+ case T_BYTE: -+ case T_BOOLEAN: -+ st_b(src_reg, tmp_reg, disp); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { -+ Register tmp_reg = T4; -+ Register index_reg = addr.index(); -+ if (index_reg == NOREG) { -+ tmp_reg = NOREG; -+ } -+ -+ int scale = addr.scale(); -+ if (tmp_reg != NOREG && scale >= 0) { -+ slli_d(tmp_reg, index_reg, scale); -+ } -+ -+ int disp = addr.disp(); -+ bool disp_is_simm16 = true; -+ if (!Assembler::is_simm16(disp)) { -+ disp_is_simm16 = false; -+ } -+ -+ Register base_reg = addr.base(); -+ if (tmp_reg != NOREG) { -+ assert_different_registers(tmp_reg, base_reg, index_reg); -+ } -+ -+ if (tmp_reg != NOREG) { -+ add_d(tmp_reg, base_reg, tmp_reg); -+ if (!disp_is_simm16) { -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); -+ } else { -+ if (!disp_is_simm16) { -+ tmp_reg = T4; -+ assert_different_registers(tmp_reg, base_reg); -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); -+ } -+} -+ -+void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { -+ switch (type) { -+ case T_DOUBLE: -+ fst_d(src_reg, tmp_reg, disp); -+ break; -+ case T_FLOAT: -+ fst_s(src_reg, tmp_reg, disp); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { -+ Register tmp_reg = T4; -+ Register index_reg = addr.index(); -+ if (index_reg == NOREG) { -+ tmp_reg = NOREG; -+ } -+ -+ int scale = addr.scale(); -+ if (tmp_reg != NOREG && scale >= 0) { -+ slli_d(tmp_reg, index_reg, scale); -+ } -+ -+ int disp = addr.disp(); -+ bool disp_is_simm16 = true; -+ if (!Assembler::is_simm16(disp)) { -+ disp_is_simm16 = false; -+ } -+ -+ Register base_reg = addr.base(); -+ if (tmp_reg != NOREG) { -+ assert_different_registers(tmp_reg, base_reg, index_reg); -+ } -+ -+ if (tmp_reg != NOREG) { -+ add_d(tmp_reg, base_reg, tmp_reg); -+ if (!disp_is_simm16) { -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); -+ } else { -+ if (!disp_is_simm16) { -+ tmp_reg = T4; -+ assert_different_registers(tmp_reg, base_reg); -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); -+ } -+} -+ -+void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { -+ switch (type) { -+ case T_LONG: -+ ld_ptr(dst_reg, tmp_reg, disp); -+ break; -+ case T_ARRAY: -+ case T_OBJECT: -+ if (UseCompressedOops && !wide) { -+ ld_wu(dst_reg, tmp_reg, disp); -+ } else { -+ ld_ptr(dst_reg, tmp_reg, disp); -+ } -+ break; -+ case T_ADDRESS: -+ if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { -+ ld_wu(dst_reg, tmp_reg, disp); -+ } else { -+ ld_ptr(dst_reg, tmp_reg, disp); -+ } -+ break; -+ case T_INT: -+ ld_w(dst_reg, tmp_reg, disp); -+ break; -+ case T_CHAR: -+ ld_hu(dst_reg, tmp_reg, disp); -+ break; -+ case T_SHORT: -+ ld_h(dst_reg, tmp_reg, disp); -+ break; -+ case T_BYTE: -+ case T_BOOLEAN: -+ ld_b(dst_reg, tmp_reg, disp); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { -+ int code_offset = 0; -+ Register tmp_reg = T4; -+ Register index_reg = addr.index(); -+ if (index_reg == NOREG) { -+ tmp_reg = NOREG; -+ } -+ int scale = addr.scale(); -+ if (tmp_reg != NOREG && scale >= 0) { -+ slli_d(tmp_reg, index_reg, scale); -+ } -+ -+ int disp = addr.disp(); -+ bool disp_is_simm16 = true; -+ if (!Assembler::is_simm16(disp)) { -+ disp_is_simm16 = false; -+ } -+ -+ Register base_reg = addr.base(); -+ if (tmp_reg != NOREG) { -+ assert_different_registers(tmp_reg, base_reg, index_reg); -+ } -+ -+ if (tmp_reg != NOREG) { -+ add_d(tmp_reg, base_reg, tmp_reg); -+ if (!disp_is_simm16) { -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ code_offset = offset(); -+ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); -+ } else { -+ if (!disp_is_simm16) { -+ tmp_reg = T4; -+ assert_different_registers(tmp_reg, base_reg); -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ code_offset = offset(); -+ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); -+ } -+ -+ return code_offset; -+} -+ ++#ifdef COMPILER2 +// Compare strings, used for char[] and byte[]. +void MacroAssembler::string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, @@ -35908,63 +35792,7 @@ index 0000000000..f8670f5081 + + bind(True); +} -+ -+void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { -+ switch (type) { -+ case T_DOUBLE: -+ fld_d(dst_reg, tmp_reg, disp); -+ break; -+ case T_FLOAT: -+ fld_s(dst_reg, tmp_reg, disp); -+ break; -+ default: -+ ShouldNotReachHere(); -+ } -+} -+ -+int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { -+ int code_offset = 0; -+ Register tmp_reg = T4; -+ Register index_reg = addr.index(); -+ if (index_reg == NOREG) { -+ tmp_reg = NOREG; -+ } -+ -+ int scale = addr.scale(); -+ if (tmp_reg != NOREG && scale >= 0) { -+ slli_d(tmp_reg, index_reg, scale); -+ } -+ -+ int disp = addr.disp(); -+ bool disp_is_simm16 = true; -+ if (!Assembler::is_simm16(disp)) { -+ disp_is_simm16 = false; -+ } -+ Register base_reg = addr.base(); -+ if (tmp_reg != NOREG) { -+ assert_different_registers(tmp_reg, base_reg, index_reg); -+ } -+ -+ if (tmp_reg != NOREG) { -+ add_d(tmp_reg, base_reg, tmp_reg); -+ if (!disp_is_simm16) { -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ code_offset = offset(); -+ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); -+ } else { -+ if (!disp_is_simm16) { -+ tmp_reg = T4; -+ assert_different_registers(tmp_reg, base_reg); -+ li(tmp_reg, disp); -+ add_d(tmp_reg, base_reg, tmp_reg); -+ } -+ code_offset = offset(); -+ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); -+ } -+ return code_offset; -+} ++#endif // COMPILER2 + +void MacroAssembler::load_byte_map_base(Register reg) { + jbyte *byte_map_base = @@ -36751,6 +36579,7 @@ index 0000000000..f8670f5081 + } +} + ++#ifdef COMPILER2 +void MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) { + switch (type) { + case T_BYTE: @@ -36956,6 +36785,7 @@ index 0000000000..f8670f5081 + ShouldNotReachHere(); + } +} ++#endif // COMPILER2 + +/** + * Emits code to update CRC-32 with a byte value according to constants in table @@ -37105,6 +36935,7 @@ index 0000000000..f8670f5081 + bind(L_exit); +} + ++#ifdef COMPILER2 +void MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) { + + switch(flag) { @@ -37192,6 +37023,7 @@ index 0000000000..f8670f5081 + Unimplemented(); + } +} ++#endif // COMPILER2 + +void MacroAssembler::membar(Membar_mask_bits hint){ + address prev = pc() - NativeInstruction::sync_instruction_size; @@ -37204,12 +37036,85 @@ index 0000000000..f8670f5081 + dbar(hint); + } +} ++ ++// Code for BigInteger::mulAdd intrinsic ++// out = A0 ++// in = A1 ++// offset = A2 (already out.length-offset) ++// len = A3 ++// k = A4 ++// ++// pseudo code from java implementation: ++// long kLong = k & LONG_MASK; ++// carry = 0; ++// offset = out.length-offset - 1; ++// for (int j = len - 1; j >= 0; j--) { ++// product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry; ++// out[offset--] = (int)product; ++// carry = product >>> 32; ++// } ++// return (int)carry; ++void MacroAssembler::mul_add(Register out, Register in, Register offset, ++ Register len, Register k) { ++ Label L_tail_loop, L_unroll, L_end; ++ ++ move(SCR2, out); ++ move(out, R0); // should clear out ++ bge(R0, len, L_end); ++ ++ alsl_d(offset, offset, SCR2, LogBytesPerInt - 1); ++ alsl_d(in, len, in, LogBytesPerInt - 1); ++ ++ const int unroll = 16; ++ li(SCR2, unroll); ++ blt(len, SCR2, L_tail_loop); ++ ++ bind(L_unroll); ++ ++ addi_d(in, in, -unroll * BytesPerInt); ++ addi_d(offset, offset, -unroll * BytesPerInt); ++ ++ for (int i = unroll - 1; i >= 0; i--) { ++ ld_wu(SCR1, in, i * BytesPerInt); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ld_wu(SCR1, offset, i * BytesPerInt); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, i * BytesPerInt); ++ srli_d(out, SCR1, 32); // keep carry ++ } ++ ++ sub_w(len, len, SCR2); ++ bge(len, SCR2, L_unroll); ++ ++ bge(R0, len, L_end); // check tail ++ ++ bind(L_tail_loop); ++ ++ addi_d(in, in, -BytesPerInt); ++ ld_wu(SCR1, in, 0); ++ mulw_d_wu(SCR1, SCR1, k); ++ add_d(out, out, SCR1); // out as scratch ++ ++ addi_d(offset, offset, -BytesPerInt); ++ ld_wu(SCR1, offset, 0); ++ add_d(SCR1, SCR1, out); ++ st_w(SCR1, offset, 0); ++ ++ srli_d(out, SCR1, 32); // keep carry ++ ++ addi_w(len, len, -1); ++ blt(R0, len, L_tail_loop); ++ ++ bind(L_end); ++} ++ diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp new file mode 100644 -index 0000000000..48d4ad07fd +index 0000000000..d49cf2e680 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp -@@ -0,0 +1,848 @@ +@@ -0,0 +1,820 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -37235,8 +37140,8 @@ index 0000000000..48d4ad07fd + * + */ + -+#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP + +#include "asm/assembler.hpp" +#include "runtime/rtmLocking.hpp" @@ -37591,9 +37496,6 @@ index 0000000000..48d4ad07fd + static void debug(char* msg/*, RegistersForDebugging* regs*/); + static void debug64(char* msg, int64_t pc, int64_t regs[]); + -+ void print_reg(Register reg); -+ void print_reg(FloatRegister reg); -+ + void untested() { stop("untested"); } + + void unimplemented(const char* what = ""); @@ -37663,34 +37565,6 @@ index 0000000000..48d4ad07fd +#endif + + -+ // Arithmetics -+ // Regular vs. d* versions -+#if 0 -+ inline void addu_long(Register rd, Register rs, Register rt) { -+ //TODO: LA -+ guarantee(0, "LA not implemented yet"); -+ add_d(rd, rs, rt); -+ } -+#endif -+ inline void addu_long(Register rd, Register rs, long imm32_64) { -+ //TODO: LA -+ guarantee(0, "LA not implemented yet"); -+#if 0 -+ addi_d(rd, rs, imm32_64); -+#endif -+ } -+ -+ void round_to(Register reg, int modulus) { -+ //TODO: LA -+ guarantee(0, "LA not implemented yet"); -+#if 0 -+ assert_different_registers(reg, AT); -+ increment(reg, modulus - 1); -+ move(AT, - modulus); -+ andr(reg, reg, AT); -+#endif -+ } -+ + // the follow two might use AT register, be sure you have no meanful data in AT before you call them + void increment(Register reg, int imm); + void decrement(Register reg, int imm); @@ -37764,6 +37638,9 @@ index 0000000000..48d4ad07fd + void patchable_call(address target, address call_size = 0); + + // Floating ++ void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi, ++ address pio2, address dsin_coef, address dcos_coef); ++ + // Data + + // Load and store values by size and signed-ness @@ -37799,10 +37676,6 @@ index 0000000000..48d4ad07fd + // convert big endian integer to little endian integer + void swap(Register reg); + -+ // implement the x86 instruction semantic -+ // if c_reg == *dest then *dest <= x_reg -+ // else c_reg <= *dest -+ // the AT indicate if xchg occurred, 1 for xchged, else 0 + void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, + bool retold, bool barrier); + void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, @@ -37812,7 +37685,6 @@ index 0000000000..48d4ad07fd + void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, + bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); + -+ //pop & push, added by aoqi + void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");} + void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");} + void push (Register reg) { addi_d(SP, SP, -8); st_d (reg, SP, 0); } @@ -37839,15 +37711,6 @@ index 0000000000..48d4ad07fd + void mov_metadata(Register dst, Metadata* obj); + void mov_metadata(Address dst, Metadata* obj); + -+ void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); -+ void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); -+ void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); -+ void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); -+ void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); -+ void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); -+ int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); -+ int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); -+ + // Load the base of the cardtable byte map into reg. + void load_byte_map_base(Register reg); + @@ -37872,6 +37735,7 @@ index 0000000000..48d4ad07fd + //FIXME + void empty_FPU_stack(){/*need implemented*/}; + ++#ifdef COMPILER2 + // Compare strings. + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, @@ -37881,12 +37745,13 @@ index 0000000000..48d4ad07fd + void arrays_equals(Register str1, Register str2, + Register cnt, Register tmp1, Register tmp2, Register result, + bool is_char); ++#endif + + // method handles (JSR 292) + Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); + + -+// LA added: ++ // LA added: + void jr (Register reg) { jirl(R0, reg, 0); } + void jalr(Register reg) { jirl(RA, reg, 0); } + void nop () { andi(R0, R0, 0); } @@ -37949,6 +37814,10 @@ index 0000000000..48d4ad07fd + code()->clear_last_insn(); + } + ++ // Code for java.math.BigInteger::mulAdd intrinsic. ++ void mul_add(Register out, Register in, Register offset, ++ Register len, Register k); ++ +#undef VIRTUAL + +public: @@ -37995,8 +37864,10 @@ index 0000000000..48d4ad07fd + loadstore_t(reg, base, index, scale, disp, type); + } + ++#ifdef COMPILER2 + void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size); + void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size); ++#endif + +private: + template @@ -38013,9 +37884,15 @@ index 0000000000..48d4ad07fd + void loadstore(FloatRegister reg, Register base, int disp, int type); + void loadstore(FloatRegister reg, Register base, Register disp, int type); + ++#ifdef COMPILER2 + void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode); + void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode); + void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode); ++#endif ++ void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef); ++ void generate_kernel_cos(FloatRegister x, address dcos_coef); ++ void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2); ++ void generate__kernel_rem_pio2(address two_over_pi, address pio2); +}; + +/** @@ -38057,16 +37934,16 @@ index 0000000000..48d4ad07fd + Label _branches; +}; + -+#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp new file mode 100644 -index 0000000000..f98d93174f +index 0000000000..49302590c3 --- /dev/null +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2017, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -38089,24 +37966,1655 @@ index 0000000000..f98d93174f + * + */ + -+#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP -+#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP + +#include "asm/assembler.inline.hpp" +#include "asm/macroAssembler.hpp" +#include "asm/codeBuffer.hpp" +#include "code/codeCache.hpp" + -+#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP ++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +new file mode 100644 +index 0000000000..3ed4c36651 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp +@@ -0,0 +1,1625 @@ ++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT) ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "macroAssembler_loongarch.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T4 RT4 ++#define T5 RT5 ++#define T6 RT6 ++#define T7 RT7 ++#define T8 RT8 ++ ++// The following code is a optimized version of fdlibm sin/cos implementation ++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64. ++ ++// Please refer to sin/cos approximation via polynomial and ++// trigonometric argument reduction techniques to the following literature: ++// ++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin, ++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond, ++// Nathalie Revol, Damien Stehlé, and Serge Torres: ++// Handbook of floating-point arithmetic. ++// Springer Science & Business Media, 2009. ++// [2] K. C. Ng ++// Argument Reduction for Huge Arguments: Good to the Last Bit ++// July 13, 1992, SunPro ++// ++// HOW TO READ THIS CODE: ++// This code consists of several functions. Each function has following header: ++// 1) Description ++// 2) C-pseudo code with differences from fdlibm marked by comments starting ++// with "NOTE". Check unmodified fdlibm code in ++// share/runtime/SharedRuntimeTrig.cpp ++// 3) Brief textual description of changes between fdlibm and current ++// implementation along with optimization notes (if applicable) ++// 4) Assumptions, input and output ++// 5) (Optional) additional notes about intrinsic implementation ++// Each function is separated in blocks which follow the pseudo-code structure ++// ++// HIGH-LEVEL ALGORITHM DESCRIPTION: ++// - entry point: generate_dsin_dcos(...); ++// - check corner cases: NaN, INF, tiny argument. ++// - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos) ++// -- else proceed to argument reduction routine (__ieee754_rem_pio2) and ++// use reduced argument to get result via kernel_sin/kernel_cos ++// ++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM: ++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version ++// has these int values converted to double representation to load converted ++// double values directly (see stubRoutines_aarch4::_two_over_pi) ++// 2) Several loops are unrolled and vectorized: see comments in code after ++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2 ++// 3) fdlibm npio2_hw table now has "prefix" with constants used in ++// calculation. These constants are loaded from npio2_hw table instead of ++// constructing it in code (see stubRoutines_loongarch64.cpp) ++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef ++// and cos_coef to use the same optimization as in 3). It allows to load most of ++// required constants via single instruction ++// ++// ++// ++///* __ieee754_rem_pio2(x,y) ++// * ++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2) ++// * x is input argument, y[] is hi and low parts of reduced argument (x) ++// * uses __kernel_rem_pio2() ++// */ ++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw ++// ++// BEGIN __ieee754_rem_pio2 PSEUDO CODE ++// ++//static int __ieee754_rem_pio2(double x, double *y) { ++// double z,w,t,r,fn; ++// double tx[3]; ++// int e0,i,j,nx,n,ix,hx,i0; ++// ++// i0 = ((*(int*)&two24A)>>30)^1; /* high word index */ ++// hx = *(i0+(int*)&x); /* high word of x */ ++// ix = hx&0x7fffffff; ++// if(ix<0x4002d97c) { /* |x| < 3pi/4, special case with n=+-1 */ ++// if(hx>0) { ++// z = x - pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z - pio2_1t; ++// y[1] = (z-y[0])-pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z -= pio2_2; ++// y[0] = z - pio2_2t; ++// y[1] = (z-y[0])-pio2_2t; ++// } ++// return 1; ++// } else { /* negative x */ ++// z = x + pio2_1; ++// if(ix!=0x3ff921fb) { /* 33+53 bit pi is good enough */ ++// y[0] = z + pio2_1t; ++// y[1] = (z-y[0])+pio2_1t; ++// } else { /* near pi/2, use 33+33+53 bit pi */ ++// z += pio2_2; ++// y[0] = z + pio2_2t; ++// y[1] = (z-y[0])+pio2_2t; ++// } ++// return -1; ++// } ++// } ++// if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */ ++// t = fabsd(x); ++// n = (int) (t*invpio2+half); ++// fn = (double)n; ++// r = t-fn*pio2_1; ++// w = fn*pio2_1t; /* 1st round good to 85 bit */ ++// // NOTE: y[0] = r-w; is moved from if/else below to be before "if" ++// y[0] = r-w; ++// if(n<32&&ix!=npio2_hw[n-1]) { ++// // y[0] = r-w; /* quick check no cancellation */ // NOTE: moved earlier ++// } else { ++// j = ix>>20; ++// // y[0] = r-w; // NOTE: moved earlier ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>16) { /* 2nd iteration needed, good to 118 */ ++// t = r; ++// w = fn*pio2_2; ++// r = t-w; ++// w = fn*pio2_2t-((t-r)-w); ++// y[0] = r-w; ++// i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++// if(i>49) { /* 3rd iteration need, 151 bits acc */ ++// t = r; /* will cover all possible cases */ ++// w = fn*pio2_3; ++// r = t-w; ++// w = fn*pio2_3t-((t-r)-w); ++// y[0] = r-w; ++// } ++// } ++// } ++// y[1] = (r-y[0])-w; ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// else return n; ++// } ++// /* ++// * all other (large) arguments ++// */ ++// // NOTE: this check is removed, because it was checked in dsin/dcos ++// // if(ix>=0x7ff00000) { /* x is inf or NaN */ ++// // y[0]=y[1]=x-x; return 0; ++// // } ++// /* set z = scalbn(|x|,ilogb(x)-23) */ ++// *(1-i0+(int*)&z) = *(1-i0+(int*)&x); ++// e0 = (ix>>20)-1046; /* e0 = ilogb(z)-23; */ ++// *(i0+(int*)&z) = ix - (e0<<20); ++// ++// // NOTE: "for" loop below in unrolled. See comments in asm code ++// for(i=0;i<2;i++) { ++// tx[i] = (double)((int)(z)); ++// z = (z-tx[i])*two24A; ++// } ++// ++// tx[2] = z; ++// nx = 3; ++// ++// // NOTE: while(tx[nx-1]==zeroA) nx--; is unrolled. See comments in asm code ++// while(tx[nx-1]==zeroA) nx--; /* skip zero term */ ++// ++// n = __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi); ++// if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;} ++// return n; ++//} ++// ++// END __ieee754_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2: ++// 1. INF/NaN check for huge argument is removed in comparison with fdlibm ++// code, because this check is already done in dcos/dsin code ++// 2. Most constants are now loaded from table instead of direct initialization ++// 3. Two loops are unrolled ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// 2. Assume SCR1 = 0x3fe921fb00000000 (~ PI/4) ++// 3. Assume ix = A3 ++// Input and output: ++// 1. Input: X = A0 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) { ++ const int64_t PIO2_1t = 0x3DD0B4611A626331ULL; ++ const int64_t PIO2_2 = 0x3DD0B4611A600000ULL; ++ const int64_t PIO2_2t = 0x3BA3198A2E037073ULL; ++ Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE, ++ REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET, ++ X_IS_NEGATIVE_LONG_PI; ++ Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15; ++ ++ push2(S0, S1); ++ ++ // initializing constants first ++ li(SCR1, 0x3ff921fb54400000); // PIO2_1 ++ li(SCR2, 0x4002d97c); // 3*PI/4 high word ++ movgr2fr_d(v1, SCR1); // v1 = PIO2_1 ++ bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE); ++ ++ block_comment("if(ix<0x4002d97c) {... /* |x| ~< 3pi/4 */ "); { ++ blt(X, R0, X_IS_NEGATIVE); ++ ++ block_comment("if(hx>0) {"); { ++ fsub_d(v2, v0, v1); // v2 = z = x - pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, 1); ++ beq(ix, SCR1, X_IS_POSITIVE_LONG_PI); ++ ++ block_comment("case: hx > 0 && ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t; ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_POSITIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fsub_d(v2, v2, v27); // z-= pio2_2 ++ fsub_d(v4, v2, v6); // y[0] = z - pio2_2t ++ fsub_d(v5, v2, v4); ++ fsub_d(v5, v5, v6); // v5 = (z - y[0]) - pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("case: hx <= 0)"); { ++ bind(X_IS_NEGATIVE); ++ fadd_d(v2, v0, v1); // v2 = z = x + pio2_1 ++ srli_d(SCR1, SCR1, 32); ++ li(n, -1); ++ beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI); ++ ++ block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */ ++ li(SCR2, PIO2_1t); ++ movgr2fr_d(v27, SCR2); ++ fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t; ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t ++ b(REDUCTION_DONE); ++ } ++ ++ block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */ ++ bind(X_IS_NEGATIVE_LONG_PI); ++ li(SCR1, PIO2_2); ++ li(SCR2, PIO2_2t); ++ movgr2fr_d(v27, SCR1); ++ movgr2fr_d(v6, SCR2); ++ fadd_d(v2, v2, v27); // z += pio2_2 ++ fadd_d(v4, v2, v6); // y[0] = z + pio2_2t ++ fsub_d(v5, v2, v4); ++ fadd_d(v5, v5, v6); // v5 = (z - y[0]) + pio2_2t ++ b(REDUCTION_DONE); ++ } ++ } ++ } ++ bind(X_IS_MEDIUM_OR_LARGE); ++ li(SCR1, 0x413921fb); ++ blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ? ++ ++ block_comment("|x| ~<= 2^19*(pi/2), medium size"); { ++ li(ih, npio2_hw); ++ fld_d(v4, ih, 0); ++ fld_d(v5, ih, 8); ++ fld_d(v6, ih, 16); ++ fld_d(v7, ih, 24); ++ fabs_d(v31, v0); // v31 = t = |x| ++ addi_d(ih, ih, 64); ++ fmadd_d(v2, v31, v5, v4); // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5) ++ ftintrz_w_d(vt, v2); // n = (int) v2 ++ movfr2gr_s(n, vt); ++ vfrintrz_d(v2, v2); ++ fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1 ++ fmul_d(v26, v2, v7); // v26 = w = fn * pio2_1t ++ fsub_d(v4, v3, v26); // y[0] = r - w. Calculated before branch ++ li(SCR1, 32); ++ blt(SCR1, n, LARGE_ELSE); ++ addi_w(tmp5, n, -1); // tmp5 = n - 1 ++ alsl_d(tmp5, tmp5, ih, 2 - 1); ++ ld_w(jv, tmp5, 0); ++ bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE); ++ ++ block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); { ++ bind(LARGE_ELSE); ++ movfr2gr_d(jx, v4); ++ srli_d(tmp5, ix, 20); // j = ix >> 20 ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>16)"); { ++ li(SCR1, 16); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // i > 16. 2nd iteration needed ++ fld_d(v6, ih, -32); ++ fld_d(v7, ih, -24); ++ fmov_d(v28, v3); // t = r ++ fmul_d(v29, v2, v6); // w = v29 = fn * pio2_2 ++ fsub_d(v3, v28, v29); // r = t - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_2t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ movfr2gr_d(jx, v4); ++ slli_d(jx, jx, 1); ++ srli_d(tmp3, jx, 32 + 20 + 1); // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff); ++ sub_d(tmp3, tmp5, tmp3); ++ ++ block_comment("if(i>49)"); { ++ li(SCR1, 49); ++ bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE); ++ // 3rd iteration need, 151 bits acc ++ fld_d(v6, ih, -16); ++ fld_d(v7, ih, -8); ++ fmov_d(v28, v3); // save "r" ++ fmul_d(v29, v2, v6); // v29 = fn * pio2_3 ++ fsub_d(v3, v28, v29); // r = r - w ++ fsub_d(v31, v28, v3); // v31 = (t - r) ++ fsub_d(v31, v29, v31); // v31 = w - (t - r) = - ((t - r) - w) ++ fmadd_d(v26, v2, v7, v31); // v26 = w = fn*pio2_3t - ((t - r) - w) ++ fsub_d(v4, v3, v26); // y[0] = r - w ++ } ++ } ++ } ++ block_comment("medium x tail"); { ++ bind(X_IS_MEDIUM_BRANCH_DONE); ++ fsub_d(v5, v3, v4); // v5 = y[1] = (r - y[0]) ++ fsub_d(v5, v5, v26); // v5 = y[1] = (r - y[0]) - w ++ blt(R0, X, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ sub_w(n, R0, n); ++ fneg_d(v5, v5); ++ b(REDUCTION_DONE); ++ } ++ } ++ ++ block_comment("all other (large) arguments"); { ++ bind(X_IS_LARGE); ++ srli_d(SCR1, ix, 20); // ix >> 20 ++ li(tmp5, 0x4170000000000000); ++ addi_w(SCR1, SCR1, -1046); // e0 ++ movgr2fr_d(v24, tmp5); // init two24A value ++ slli_w(jv, SCR1, 20); // ix - (e0<<20) ++ sub_w(jv, ix, jv); ++ slli_d(jv, jv, 32); ++ addi_w(SCR2, SCR1, -3); ++ bstrins_d(jv, X, 31, 0); // jv = z ++ li(i, 24); ++ movgr2fr_d(v26, jv); // v26 = z ++ ++ block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); { ++ // tx[0,1,2] = v6,v7,v26 ++ vfrintrz_d(v6, v26); // v6 = (double)((int)v26) ++ div_w(jv, SCR2, i); // jv = (e0 - 3)/24 ++ fsub_d(v26, v26, v6); ++ addi_d(SP, SP, -560); ++ fmul_d(v26, v26, v24); ++ vfrintrz_d(v7, v26); // v7 = (double)((int)v26) ++ li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop ++ fsub_d(v26, v26, v7); ++ } ++ ++ block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); { ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v26, vt); // if NE then jx == 2. else it's 1 or 0 ++ addi_d(iqBase, SP, 480); // base of iq[] ++ fmul_d(v3, v26, v24); ++ bcnez(FCC0, NX_SET); ++ fcmp_cne_d(FCC0, v7, vt); // v7 == 0 => jx = 0. Else jx = 1 ++ movcf2gr(jx, FCC0); ++ } ++ bind(NX_SET); ++ generate__kernel_rem_pio2(two_over_pi, pio2); ++ // now we have y[0] = v4, y[1] = v5 and n = r2 ++ bge(X, R0, REDUCTION_DONE); ++ fneg_d(v4, v4); ++ fneg_d(v5, v5); ++ sub_w(n, R0, n); ++ } ++ bind(REDUCTION_DONE); ++ ++ pop2(S0, S1); ++} ++ ++///* ++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2) ++// * double x[],y[]; int e0,nx,prec; int ipio2[]; ++// * ++// * __kernel_rem_pio2 return the last three digits of N with ++// * y = x - N*pi/2 ++// * so that |y| < pi/2. ++// * ++// * The method is to compute the integer (mod 8) and fraction parts of ++// * (2/pi)*x without doing the full multiplication. In general we ++// * skip the part of the product that are known to be a huge integer ( ++// * more accurately, = 0 mod 8 ). Thus the number of operations are ++// * independent of the exponent of the input. ++// * ++// * NOTE: 2/pi int representation is converted to double ++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[]. ++// * ++// * Input parameters: ++// * x[] The input value (must be positive) is broken into nx ++// * pieces of 24-bit integers in double precision format. ++// * x[i] will be the i-th 24 bit of x. The scaled exponent ++// * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 ++// * match x's up to 24 bits. ++// * ++// * Example of breaking a double positive z into x[0]+x[1]+x[2]: ++// * e0 = ilogb(z)-23 ++// * z = scalbn(z,-e0) ++// * for i = 0,1,2 ++// * x[i] = floor(z) ++// * z = (z-x[i])*2**24 ++// * ++// * ++// * y[] ouput result in an array of double precision numbers. ++// * The dimension of y[] is: ++// * 24-bit precision 1 ++// * 53-bit precision 2 ++// * 64-bit precision 2 ++// * 113-bit precision 3 ++// * The actual value is the sum of them. Thus for 113-bit ++// * precsion, one may have to do something like: ++// * ++// * long double t,w,r_head, r_tail; ++// * t = (long double)y[2] + (long double)y[1]; ++// * w = (long double)y[0]; ++// * r_head = t+w; ++// * r_tail = w - (r_head - t); ++// * ++// * e0 The exponent of x[0] ++// * ++// * nx dimension of x[] ++// * ++// * prec an interger indicating the precision: ++// * 0 24 bits (single) ++// * 1 53 bits (double) ++// * 2 64 bits (extended) ++// * 3 113 bits (quad) ++// * ++// * NOTE: ipio2[] array below is converted to double representation ++// * //ipio2[] ++// * // integer array, contains the (24*i)-th to (24*i+23)-th ++// * // bit of 2/pi after binary point. The corresponding ++// * // floating value is ++// * ++// * ipio2[i] * 2^(-24(i+1)). ++// * ++// * Here is the description of some local variables: ++// * ++// * jk jk+1 is the initial number of terms of ipio2[] needed ++// * in the computation. The recommended value is 2,3,4, ++// * 6 for single, double, extended,and quad. ++// * ++// * jz local integer variable indicating the number of ++// * terms of ipio2[] used. ++// * ++// * jx nx - 1 ++// * ++// * jv index for pointing to the suitable ipio2[] for the ++// * computation. In general, we want ++// * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 ++// * is an integer. Thus ++// * e0-3-24*jv >= 0 or (e0-3)/24 >= jv ++// * Hence jv = max(0,(e0-3)/24). ++// * ++// * jp jp+1 is the number of terms in PIo2[] needed, jp = jk. ++// * ++// * q[] double array with integral value, representing the ++// * 24-bits chunk of the product of x and 2/pi. ++// * ++// * q0 the corresponding exponent of q[0]. Note that the ++// * exponent for q[i] would be q0-24*i. ++// * ++// * PIo2[] double precision array, obtained by cutting pi/2 ++// * into 24 bits chunks. ++// * ++// * f[] ipio2[] in floating point ++// * ++// * iq[] integer array by breaking up q[] in 24-bits chunk. ++// * ++// * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] ++// * ++// * ih integer. If >0 it indicates q[] is >= 0.5, hence ++// * it also indicates the *sign* of the result. ++// * ++// */ ++// ++// Use PIo2 table(see stubRoutines_loongarch64.cpp) ++// ++// BEGIN __kernel_rem_pio2 PSEUDO CODE ++// ++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) { ++// int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih; ++// double z,fw,f[20],fq[20],q[20]; ++// ++// /* initialize jk*/ ++// // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4. ++// jp = jk; // NOTE: always 4 ++// ++// /* determine jx,jv,q0, note that 3>q0 */ ++// jx = nx-1; ++// jv = (e0-3)/24; if(jv<0) jv=0; ++// q0 = e0-24*(jv+1); ++// ++// /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ ++// j = jv-jx; m = jx+jk; ++// ++// // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It ++// // allows the use of wider loads/stores ++// for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j]; ++// ++// // NOTE: unrolled and vectorized "for". See comments in asm code ++// /* compute q[0],q[1],...q[jk] */ ++// for (i=0;i<=jk;i++) { ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++// } ++// ++// jz = jk; ++//recompute: ++// /* distill q[] into iq[] reversingly */ ++// for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++// fw = (double)((int)(twon24* z)); ++// iq[i] = (int)(z-two24B*fw); ++// z = q[j-1]+fw; ++// } ++// ++// /* compute n */ ++// z = scalbnA(z,q0); /* actual value of z */ ++// z -= 8.0*floor(z*0.125); /* trim off integer >= 8 */ ++// n = (int) z; ++// z -= (double)n; ++// ih = 0; ++// if(q0>0) { /* need iq[jz-1] to determine n */ ++// i = (iq[jz-1]>>(24-q0)); n += i; ++// iq[jz-1] -= i<<(24-q0); ++// ih = iq[jz-1]>>(23-q0); ++// } ++// else if(q0==0) ih = iq[jz-1]>>23; ++// else if(z>=0.5) ih=2; ++// ++// if(ih>0) { /* q > 0.5 */ ++// n += 1; carry = 0; ++// for(i=0;i0) { /* rare case: chance is 1 in 12 */ ++// switch(q0) { ++// case 1: ++// iq[jz-1] &= 0x7fffff; break; ++// case 2: ++// iq[jz-1] &= 0x3fffff; break; ++// } ++// } ++// if(ih==2) { ++// z = one - z; ++// if(carry!=0) z -= scalbnA(one,q0); ++// } ++// } ++// ++// /* check if recomputation is needed */ ++// if(z==zeroB) { ++// j = 0; ++// for (i=jz-1;i>=jk;i--) j |= iq[i]; ++// if(j==0) { /* need recomputation */ ++// for(k=1;iq[jk-k]==0;k++); /* k = no. of terms needed */ ++// ++// for(i=jz+1;i<=jz+k;i++) { /* add q[jz+1] to q[jz+k] */ ++// f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++// for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++// q[i] = fw; ++// } ++// jz += k; ++// goto recompute; ++// } ++// } ++// ++// /* chop off zero terms */ ++// if(z==0.0) { ++// jz -= 1; q0 -= 24; ++// while(iq[jz]==0) { jz--; q0-=24;} ++// } else { /* break z into 24-bit if necessary */ ++// z = scalbnA(z,-q0); ++// if(z>=two24B) { ++// fw = (double)((int)(twon24*z)); ++// iq[jz] = (int)(z-two24B*fw); ++// jz += 1; q0 += 24; ++// iq[jz] = (int) fw; ++// } else iq[jz] = (int) z ; ++// } ++// ++// /* convert integer "bit" chunk to floating-point value */ ++// fw = scalbnA(one,q0); ++// for(i=jz;i>=0;i--) { ++// q[i] = fw*(double)iq[i]; fw*=twon24; ++// } ++// ++// /* compute PIo2[0,...,jp]*q[jz,...,0] */ ++// for(i=jz;i>=0;i--) { ++// for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++// fq[jz-i] = fw; ++// } ++// ++// // NOTE: switch below is eliminated, because prec is always 2 for doubles ++// /* compress fq[] into y[] */ ++// //switch(prec) { ++// //case 0: ++// // fw = 0.0; ++// // for (i=jz;i>=0;i--) fw += fq[i]; ++// // y[0] = (ih==0)? fw: -fw; ++// // break; ++// //case 1: ++// //case 2: ++// fw = 0.0; ++// for (i=jz;i>=0;i--) fw += fq[i]; ++// y[0] = (ih==0)? fw: -fw; ++// fw = fq[0]-fw; ++// for (i=1;i<=jz;i++) fw += fq[i]; ++// y[1] = (ih==0)? fw: -fw; ++// // break; ++// //case 3: /* painful */ ++// // for (i=jz;i>0;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (i=jz;i>1;i--) { ++// // fw = fq[i-1]+fq[i]; ++// // fq[i] += fq[i-1]-fw; ++// // fq[i-1] = fw; ++// // } ++// // for (fw=0.0,i=jz;i>=2;i--) fw += fq[i]; ++// // if(ih==0) { ++// // y[0] = fq[0]; y[1] = fq[1]; y[2] = fw; ++// // } else { ++// // y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw; ++// // } ++// //} ++// return n&7; ++//} ++// ++// END __kernel_rem_pio2 PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. One loop is unrolled and vectorized (see comments in code) ++// 2. One loop is split into 2 loops (see comments in code) ++// 3. Non-double code is removed(last switch). Sevaral variables became ++// constants because of that (see comments in code) ++// 4. Use of jx, which is nx-1 instead of nx ++// Assumptions: ++// 1. Assume |X| >= PI/4 ++// Input and output: ++// 1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1 ++// 2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5 ++// NOTE: general purpose register names match local variable names in C code ++// NOTE: fpu registers are actively reused. See comments in code about their usage ++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) { ++ Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT, ++ RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ, ++ INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE, ++ Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION, ++ RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED, ++ CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE, ++ IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY, ++ RECOMP_FOR1_CHECK; ++ Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6, ++ tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3, ++ jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8; ++ FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7, ++ vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8, ++ v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15; ++ // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4 ++ // jx = nx - 1 ++ li(twoOverPiBase, two_over_pi); ++ slti(SCR2, jv, 0); ++ addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6] ++ masknez(jv, jv, SCR2); ++ if (UseLASX) ++ xvxor_v(v26, v26, v26); ++ else ++ vxor_v(v26, v26, v26); ++ addi_w(tmp5, jv, 1); // jv+1 ++ sub_w(j, jv, jx); ++ addi_d(qBase, SP, 320); // base of q[] ++ mul_w(SCR2, i, tmp5); // q0 = e0-24*(jv+1) ++ sub_w(SCR1, SCR1, SCR2); ++ // use double f[20], fq[20], q[20], iq[20] on stack, which is ++ // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it ++ // will contain f[20], fq[20], q[20], iq[20] ++ // now initialize f[20] indexes 0..m (inclusive) ++ // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j]; ++ move(tmp5, SP); ++ ++ block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); { ++ xorr(i, i, i); ++ bge(j, R0, INIT_F_COPY); ++ bind(INIT_F_ZERO); ++ if (UseLASX) { ++ xvst(v26, tmp5, 0); ++ } else { ++ vst(v26, tmp5, 0); ++ vst(v26, tmp5, 16); ++ } ++ addi_d(tmp5, tmp5, 32); ++ addi_w(i, i, 4); ++ addi_w(j, j, 4); ++ blt(j, R0, INIT_F_ZERO); ++ sub_w(i, i, j); ++ move(j, R0); ++ bind(INIT_F_COPY); ++ alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address ++ if (UseLASX) { ++ xvld(v18, tmp1, 0); ++ xvld(v19, tmp1, 32); ++ } else { ++ vld(v18, tmp1, 0); ++ vld(v19, tmp1, 16); ++ vld(v20, tmp1, 32); ++ vld(v21, tmp1, 48); ++ } ++ alsl_d(tmp5, i, SP, 3 - 1); ++ if (UseLASX) { ++ xvst(v18, tmp5, 0); ++ xvst(v19, tmp5, 32); ++ } else { ++ vst(v18, tmp5, 0); ++ vst(v19, tmp5, 16); ++ vst(v20, tmp5, 32); ++ vst(v21, tmp5, 48); ++ } ++ } ++ // v18..v21 can actually contain f[0..7] ++ beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded ++ if (UseLASX) { ++ xvld(v18, SP, 0); // load f[0..7] ++ xvld(v19, SP, 32); ++ } else { ++ vld(v18, SP, 0); // load f[0..7] ++ vld(v19, SP, 16); ++ vld(v20, SP, 32); ++ vld(v21, SP, 48); ++ } ++ bind(SKIP_F_LOAD); ++ // calculate 2^q0 and 2^-q0, which we'll need further. ++ // q0 is exponent. So, calculate biased exponent(q0+1023) ++ sub_w(tmp4, R0, SCR1); ++ addi_w(tmp5, SCR1, 1023); ++ addi_w(tmp4, tmp4, 1023); ++ // Unroll following for(s) depending on jx in [0,1,2] ++ // for (i=0;i<=jk;i++) { ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw; ++ // } ++ // Unrolling for jx == 0 case: ++ // q[0] = x[0] * f[0] ++ // q[1] = x[0] * f[1] ++ // q[2] = x[0] * f[2] ++ // q[3] = x[0] * f[3] ++ // q[4] = x[0] * f[4] ++ // ++ // Vectorization for unrolled jx == 0 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[0] ++ // {q[2], q[3]} = {f[2], f[3]} * x[0] ++ // q[4] = f[4] * x[0] ++ // ++ // Unrolling for jx == 1 case: ++ // q[0] = x[0] * f[1] + x[1] * f[0] ++ // q[1] = x[0] * f[2] + x[1] * f[1] ++ // q[2] = x[0] * f[3] + x[1] * f[2] ++ // q[3] = x[0] * f[4] + x[1] * f[3] ++ // q[4] = x[0] * f[5] + x[1] * f[4] ++ // ++ // Vectorization for unrolled jx == 1 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[1] ++ // {q[2], q[3]} = {f[2], f[3]} * x[1] ++ // q[4] = f[4] * x[1] ++ // {q[0], q[1]} += {f[1], f[2]} * x[0] ++ // {q[2], q[3]} += {f[3], f[4]} * x[0] ++ // q[4] += f[5] * x[0] ++ // ++ // Unrolling for jx == 2 case: ++ // q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0] ++ // q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1] ++ // q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2] ++ // q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3] ++ // q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4] ++ // ++ // Vectorization for unrolled jx == 2 case: ++ // {q[0], q[1]} = {f[0], f[1]} * x[2] ++ // {q[2], q[3]} = {f[2], f[3]} * x[2] ++ // q[4] = f[4] * x[2] ++ // {q[0], q[1]} += {f[1], f[2]} * x[1] ++ // {q[2], q[3]} += {f[3], f[4]} * x[1] ++ // q[4] += f[5] * x[1] ++ // {q[0], q[1]} += {f[2], f[3]} * x[0] ++ // {q[2], q[3]} += {f[4], f[5]} * x[0] ++ // q[4] += f[6] * x[0] ++ block_comment("unrolled and vectorized computation of q[0]..q[jk]"); { ++ li(SCR2, 1); ++ slli_d(tmp5, tmp5, 52); // now it's 2^q0 double value ++ slli_d(tmp4, tmp4, 52); // now it's 2^-q0 double value ++ if (UseLASX) ++ xvpermi_d(v6, v6, 0); ++ else ++ vreplvei_d(v6, v6, 0); ++ blt(jx, SCR2, JX_IS_0); ++ addi_d(i, SP, 8); ++ if (UseLASX) { ++ xvld(v26, i, 0); // load f[1..4] ++ xvpermi_d(v3, v3, 0); ++ xvpermi_d(v7, v7, 0); ++ xvpermi_d(v20, v19, 85); ++ xvpermi_d(v21, v19, 170); ++ } else { ++ vld(v26, i, 0); // load f[1..4] ++ vld(v27, i, 16); ++ vreplvei_d(v3, v3, 0); ++ vreplvei_d(v7, v7, 0); ++ vreplvei_d(vt, v20, 1); ++ vreplvei_d(v21, v21, 0); ++ } ++ blt(SCR2, jx, JX_IS_2); ++ // jx == 1 ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v7); // f[0,3] * x[1] ++ fmul_d(v30, v19, v7); // f[4] * x[1] ++ xvfmadd_d(v28, v26, v6, v28); ++ fmadd_d(v30, v6, v20, v30); // v30 += f[5] * x[0] ++ } else { ++ vfmul_d(v28, v18, v7); // f[0,1] * x[1] ++ vfmul_d(v29, v19, v7); // f[2,3] * x[1] ++ fmul_d(v30, v20, v7); // f[4] * x[1] ++ vfmadd_d(v28, v26, v6, v28); ++ vfmadd_d(v29, v27, v6, v29); ++ fmadd_d(v30, v6, vt, v30); // v30 += f[5] * x[0] ++ } ++ b(Q_DONE); ++ bind(JX_IS_2); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v3); // f[0,3] * x[2] ++ fmul_d(v30, v19, v3); // f[4] * x[2] ++ xvfmadd_d(v28, v26, v7, v28); ++ fmadd_d(v30, v7, v20, v30); // v30 += f[5] * x[1] ++ xvpermi_q(v18, v19, 3); ++ xvfmadd_d(v28, v18, v6, v28); ++ } else { ++ vfmul_d(v28, v18, v3); // f[0,1] * x[2] ++ vfmul_d(v29, v19, v3); // f[2,3] * x[2] ++ fmul_d(v30, v20, v3); // f[4] * x[2] ++ vfmadd_d(v28, v26, v7, v28); ++ vfmadd_d(v29, v27, v7, v29); ++ fmadd_d(v30, v7, vt, v30); // v30 += f[5] * x[1] ++ vfmadd_d(v28, v19, v6, v28); ++ vfmadd_d(v29, v20, v6, v29); ++ } ++ fmadd_d(v30, v6, v21, v30); // v30 += f[6] * x[0] ++ b(Q_DONE); ++ bind(JX_IS_0); ++ if (UseLASX) { ++ xvfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ fmul_d(v30, v19, v6); // f[4] * x[0] ++ } else { ++ vfmul_d(v28, v18, v6); // f[0,1] * x[0] ++ vfmul_d(v29, v19, v6); // f[2,3] * x[0] ++ fmul_d(v30, v20, v6); // f[4] * x[0] ++ } ++ bind(Q_DONE); ++ if (UseLASX) { ++ xvst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ } else { ++ vst(v28, qBase, 0); // save calculated q[0]...q[jk] ++ vst(v29, qBase, 16); ++ } ++ fst_d(v30, qBase, 32); ++ } ++ li(i, 0x3E70000000000000); ++ li(jz, 4); ++ movgr2fr_d(v17, i); // v17 = twon24 ++ movgr2fr_d(v30, tmp5); // 2^q0 ++ vldi(v21, -960); // 0.125 (0x3fc0000000000000) ++ vldi(v20, -992); // 8.0 (0x4020000000000000) ++ movgr2fr_d(v22, tmp4); // 2^-q0 ++ ++ block_comment("recompute loop"); { ++ bind(RECOMPUTE); ++ // for(i=0,j=jz,z=q[jz];j>0;i++,j--) { ++ // fw = (double)((int)(twon24* z)); ++ // iq[i] = (int)(z-two24A*fw); ++ // z = q[j-1]+fw; ++ // } ++ block_comment("distill q[] into iq[] reversingly"); { ++ xorr(i, i, i); ++ move(j, jz); ++ alsl_d(tmp2, jz, qBase, 3 - 1); // q[jz] address ++ fld_d(v18, tmp2, 0); // z = q[j] and moving address to q[j-1] ++ addi_d(tmp2, tmp2, -8); ++ bind(RECOMP_FIRST_FOR); ++ fld_d(v27, tmp2, 0); ++ addi_d(tmp2, tmp2, -8); ++ fmul_d(v29, v17, v18); // twon24*z ++ vfrintrz_d(v29, v29); // (double)(int) ++ fnmsub_d(v28, v24, v29, v18); // v28 = z-two24A*fw ++ ftintrz_w_d(vt, v28); // (int)(z-two24A*fw) ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fst_s(vt, SCR2, 0); ++ fadd_d(v18, v27, v29); ++ addi_w(i, i, 1); ++ addi_w(j, j, -1); ++ blt(R0, j, RECOMP_FIRST_FOR); ++ } ++ // compute n ++ fmul_d(v18, v18, v30); ++ fmul_d(v2, v18, v21); ++ vfrintrm_d(v2, v2); // v2 = floor(v2) == rounding towards -inf ++ fnmsub_d(v18, v2, v20, v18); // z -= 8.0*floor(z*0.125); ++ li(ih, 2); ++ vfrintrz_d(v2, v18); // v2 = (double)((int)z) ++ ftintrz_w_d(vt, v18); // n = (int) z; ++ movfr2gr_s(n, vt); ++ fsub_d(v18, v18, v2); // z -= (double)n; ++ ++ block_comment("q0-dependent initialization"); { ++ blt(SCR1, R0, Q0_ZERO_CMP_LT); // if (q0 > 0) ++ addi_w(j, jz, -1); // j = jz - 1 ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ ld_w(tmp2, SCR2, 0); // tmp2 = iq[jz-1] ++ beq(SCR1, R0, Q0_ZERO_CMP_EQ); ++ li(tmp4, 24); ++ sub_w(tmp4, tmp4, SCR1); // == 24 - q0 ++ srl_w(i, tmp2, tmp4); // i = iq[jz-1] >> (24-q0) ++ sll_w(tmp5, i, tmp4); ++ sub_w(tmp2, tmp2, tmp5); // iq[jz-1] -= i<<(24-q0); ++ alsl_d(SCR2, j, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); // store iq[jz-1] ++ addi_w(SCR2, tmp4, -1); // == 23 - q0 ++ add_w(n, n, i); // n+=i ++ srl_w(ih, tmp2, SCR2); // ih = iq[jz-1] >> (23-q0) ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_EQ); ++ srli_d(ih, tmp2, 23); // ih = iq[z-1] >> 23 ++ b(Q0_ZERO_CMP_DONE); ++ bind(Q0_ZERO_CMP_LT); ++ vldi(v4, -928); // 0.5 (0x3fe0000000000000) ++ fcmp_clt_d(FCC0, v18, v4); ++ movcf2gr(SCR2, FCC0); ++ masknez(ih, ih, SCR2); // if (z<0.5) ih = 0 ++ } ++ bind(Q0_ZERO_CMP_DONE); ++ bge(R0, ih, IH_HANDLED); ++ ++ block_comment("if(ih>) {"); { ++ // use rscratch2 as carry ++ ++ block_comment("for(i=0;i0) {"); { ++ bge(R0, SCR1, IH_AFTER_SWITCH); ++ // tmp3 still has iq[jz-1] value. no need to reload ++ // now, zero high tmp3 bits (rscratch1 number of bits) ++ li(j, 0xffffffff); ++ addi_w(i, jz, -1); // set i to jz-1 ++ srl_d(j, j, SCR1); ++ srli_w(tmp1, j, 8); ++ andr(tmp3, tmp3, tmp1); // we have 24-bit-based constants ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ st_w(tmp3, tmp1, 0); // save iq[jz-1] ++ } ++ bind(IH_AFTER_SWITCH); ++ li(tmp1, 2); ++ bne(ih, tmp1, IH_HANDLED); ++ ++ block_comment("if(ih==2) {"); { ++ vldi(v25, -912); // 1.0 (0x3ff0000000000000) ++ fsub_d(v18, v25, v18); // z = one - z; ++ beqz(SCR2, IH_HANDLED); ++ fsub_d(v18, v18, v30); // z -= scalbnA(one,q0); ++ } ++ } ++ bind(IH_HANDLED); ++ // check if recomputation is needed ++ vxor_v(vt, vt, vt); ++ fcmp_cne_d(FCC0, v18, vt); ++ bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO); ++ ++ block_comment("if(z==zeroB) {"); { ++ ++ block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); { ++ addi_w(i, jz, -1); ++ xorr(j, j, j); ++ b(RECOMP_FOR1_CHECK); ++ bind(RECOMP_FOR1); ++ alsl_d(tmp1, i, iqBase, 2 - 1); ++ ld_w(tmp1, tmp1, 0); ++ orr(j, j, tmp1); ++ addi_w(i, i, -1); ++ bind(RECOMP_FOR1_CHECK); ++ li(SCR2, 4); ++ bge(i, SCR2, RECOMP_FOR1); ++ } ++ bnez(j, RECOMP_CHECK_DONE); ++ ++ block_comment("if(j==0) {"); { ++ // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read ++ // iq[3], iq[2], iq[1], iq[0] until non-zero value ++ ld_d(tmp1, iqBase, 0); // iq[0..3] ++ ld_d(tmp3, iqBase, 8); ++ li(j, 2); ++ masknez(tmp1, tmp1, tmp3); // set register for further consideration ++ orr(tmp1, tmp1, tmp3); ++ masknez(j, j, tmp3); // set initial k. Use j as k ++ srli_d(SCR2, tmp1, 32); ++ sltu(SCR2, R0, SCR2); ++ addi_w(i, jz, 1); ++ add_w(j, j, SCR2); ++ ++ block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); { ++ add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz) ++ bind(RECOMP_FOR2); ++ add_w(tmp1, jv, i); ++ alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1); ++ fld_d(v29, SCR2, 0); ++ add_w(tmp2, jx, i); ++ alsl_d(SCR2, tmp2, SP, 3 - 1); ++ fst_d(v29, SCR2, 0); ++ // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i]; ++ // since jx = 0, 1 or 2 we can unroll it: ++ // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; ++ // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29 ++ alsl_d(tmp2, tmp2, SP, 3 - 1); // address of f[jx+i] ++ fld_d(v4, tmp2, -16); // load f[jx+i-2] and f[jx+i-1] ++ fld_d(v5, tmp2, -8); ++ fmul_d(v26, v6, v29); // initial fw ++ beqz(jx, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v7, v5, v26); ++ li(SCR2, 1); ++ beq(jx, SCR2, RECOMP_FW_UPDATED); ++ fmadd_d(v26, v3, v4, v26); ++ bind(RECOMP_FW_UPDATED); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v26, SCR2, 0); // q[i] = fw; ++ addi_w(i, i, 1); ++ bge(jz, i, RECOMP_FOR2); // jz here is "old jz" + k ++ } ++ b(RECOMPUTE); ++ } ++ } ++ } ++ bind(RECOMP_CHECK_DONE); ++ // chop off zero terms ++ vxor_v(vt, vt, vt); ++ fcmp_ceq_d(FCC0, v18, vt); ++ bcnez(FCC0, Z_IS_ZERO); ++ ++ block_comment("else block of if(z==0.0) {"); { ++ bind(RECOMP_CHECK_DONE_NOT_ZERO); ++ fmul_d(v18, v18, v22); ++ fcmp_clt_d(FCC0, v18, v24); // v24 is stil two24A ++ bcnez(FCC0, Z_IS_LESS_THAN_TWO24B); ++ fmul_d(v1, v18, v17); // twon24*z ++ vfrintrz_d(v1, v1); // v1 = (double)(int)(v1) ++ fnmsub_d(v2, v24, v1, v18); ++ ftintrz_w_d(vt, v1); // (int)fw ++ movfr2gr_s(tmp3, vt); ++ ftintrz_w_d(vt, v2); // double to int ++ movfr2gr_s(tmp2, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp2, SCR2, 0); ++ addi_w(SCR1, SCR1, 24); ++ addi_w(jz, jz, 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) fw ++ b(Z_ZERO_CHECK_DONE); ++ bind(Z_IS_LESS_THAN_TWO24B); ++ ftintrz_w_d(vt, v18); // (int)z ++ movfr2gr_s(tmp3, vt); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ st_w(tmp3, SCR2, 0); // iq[jz] = (int) z ++ b(Z_ZERO_CHECK_DONE); ++ } ++ ++ block_comment("if(z==0.0) {"); { ++ bind(Z_IS_ZERO); ++ addi_w(jz, jz, -1); ++ alsl_d(SCR2, jz, iqBase, 2 - 1); ++ ld_w(tmp1, SCR2, 0); ++ addi_w(SCR1, SCR1, -24); ++ beqz(tmp1, Z_IS_ZERO); ++ } ++ bind(Z_ZERO_CHECK_DONE); ++ // convert integer "bit" chunk to floating-point value ++ // v17 = twon24 ++ // update v30, which was scalbnA(1.0, ); ++ addi_w(tmp2, SCR1, 1023); // biased exponent ++ slli_d(tmp2, tmp2, 52); // put at correct position ++ move(i, jz); ++ movgr2fr_d(v30, tmp2); ++ ++ block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); { ++ bind(CONVERTION_FOR); ++ alsl_d(SCR2, i, iqBase, 2 - 1); ++ fld_s(v31, SCR2, 0); ++ vffintl_d_w(v31, v31); ++ fmul_d(v31, v31, v30); ++ alsl_d(SCR2, i, qBase, 3 - 1); ++ fst_d(v31, SCR2, 0); ++ fmul_d(v30, v30, v17); ++ addi_w(i, i, -1); ++ bge(i, R0, CONVERTION_FOR); ++ } ++ addi_d(SCR2, SP, 160); // base for fq ++ // reusing twoOverPiBase ++ li(twoOverPiBase, pio2); ++ ++ block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); { ++ move(i, jz); ++ move(tmp2, R0); // tmp2 will keep jz - i == 0 at start ++ bind(COMP_FOR); ++ // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k]; ++ vxor_v(v30, v30, v30); ++ alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0 ++ li(tmp3, 4); ++ slti(tmp4, tmp2, 5); ++ alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address ++ masknez(tmp3, tmp3, tmp4); // min(jz - i, jp); ++ maskeqz(tmp4, tmp2, tmp4); ++ orr(tmp3, tmp3, tmp4); ++ move(tmp4, R0); // used as k ++ ++ block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); { ++ bind(COMP_INNER_LOOP); ++ alsl_d(tmp5, tmp4, tmp1, 3 - 1); ++ fld_d(v18, tmp5, 0); // q[i+k] ++ alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1); ++ fld_d(v19, tmp5, 0); // PIo2[k] ++ fmadd_d(v30, v18, v19, v30); // fw += PIo2[k]*q[i+k]; ++ addi_w(tmp4, tmp4, 1); // k++ ++ bge(tmp3, tmp4, COMP_INNER_LOOP); ++ } ++ alsl_d(tmp5, tmp2, SCR2, 3 - 1); ++ fst_d(v30, tmp5, 0); // fq[jz-i] ++ addi_d(tmp2, tmp2, 1); ++ addi_w(i, i, -1); ++ bge(i, R0, COMP_FOR); ++ } ++ ++ block_comment("switch(prec) {...}. case 2:"); { ++ // compress fq into y[] ++ // remember prec == 2 ++ ++ block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); { ++ vxor_v(v4, v4, v4); ++ move(i, jz); ++ bind(FW_FOR1); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, -1); ++ fadd_d(v4, v4, v1); ++ bge(i, R0, FW_FOR1); ++ } ++ bind(FW_FOR1_DONE); ++ // v1 contains fq[0]. so, keep it so far ++ fsub_d(v5, v1, v4); // fw = fq[0] - fw ++ beqz(ih, FW_Y0_NO_NEGATION); ++ fneg_d(v4, v4); ++ bind(FW_Y0_NO_NEGATION); ++ ++ block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); { ++ li(i, 1); ++ blt(jz, i, FW_FOR2_DONE); ++ bind(FW_FOR2); ++ alsl_d(tmp5, i, SCR2, 3 - 1); ++ fld_d(v1, tmp5, 0); ++ addi_w(i, i, 1); ++ fadd_d(v5, v5, v1); ++ bge(jz, i, FW_FOR2); ++ } ++ bind(FW_FOR2_DONE); ++ beqz(ih, FW_Y1_NO_NEGATION); ++ fneg_d(v5, v5); ++ bind(FW_Y1_NO_NEGATION); ++ addi_d(SP, SP, 560); ++ } ++} ++ ++///* __kernel_sin( x, y, iy) ++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0). ++// * ++// * Algorithm ++// * 1. Since sin(-x) = -sin(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0. ++// * 3. sin(x) is approximated by a polynomial of degree 13 on ++// * [0,pi/4] ++// * 3 13 ++// * sin(x) ~ x + S1*x + ... + S6*x ++// * where ++// * ++// * |sin(x) 2 4 6 8 10 12 | -58 ++// * |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x +S6*x )| <= 2 ++// * | x | ++// * ++// * 4. sin(x+y) = sin(x) + sin'(x')*y ++// * ~ sin(x) + (1-x*x/2)*y ++// * For better accuracy, let ++// * 3 2 2 2 2 ++// * r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6)))) ++// * then 3 2 ++// * sin(x) = x + (S1*x + (x *(r-y/2)+y)) ++// */ ++//static const double ++//S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ ++//S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ ++//S3 = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */ ++//S4 = 2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */ ++//S5 = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */ ++//S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ ++// ++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef ++// ++// BEGIN __kernel_sin PSEUDO CODE ++// ++//static double __kernel_sin(double x, double y, bool iy) ++//{ ++// double z,r,v; ++// ++// // NOTE: not needed. moved to dsin/dcos ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* high word of x */ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) /* |x| < 2**-27 */ ++// // {if((int)x==0) return x;} /* generate inexact */ ++// ++// z = x*x; ++// v = z*x; ++// r = S2+z*(S3+z*(S4+z*(S5+z*S6))); ++// if(iy==0) return x+v*(S1+z*r); ++// else return x-((z*(half*y-v*r)-y)-v*S1); ++//} ++// ++// END __kernel_sin PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dsin_coef ++// 3. C code parameter "int iy" was modified to "bool iyIsOne", because ++// iy is always 0 or 1. Also, iyIsOne branch was moved into ++// generation phase instead of taking it during code execution ++// Input ans output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, iyIsOne ++// = flag to use low argument low part or not, dsin_coef = coefficients ++// table address ++// 3. Return sin(x) value in FA0 ++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) { ++ FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2, ++ s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7; ++ li(SCR2, dsin_coef); ++ fld_d(s5, SCR2, 32); ++ fld_d(s6, SCR2, 40); ++ fmul_d(z, x, x); // z = x*x; ++ fld_d(s1, SCR2, 0); ++ fld_d(s2, SCR2, 8); ++ fld_d(s3, SCR2, 16); ++ fld_d(s4, SCR2, 24); ++ fmul_d(v, z, x); // v = z*x; ++ ++ block_comment("calculate r = S2+z*(S3+z*(S4+z*(S5+z*S6)))"); { ++ fmadd_d(r, z, s6, s5); ++ // initialize "half" in current block to utilize 2nd FPU. However, it's ++ // not a part of this block ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, s4); ++ fmadd_d(r, z, r, s3); ++ fmadd_d(r, z, r, s2); ++ } ++ ++ if (!iyIsOne) { ++ // return x+v*(S1+z*r); ++ fmadd_d(s1, z, r, s1); ++ fmadd_d(FA0, v, s1, x); ++ } else { ++ // return x-((z*(half*y-v*r)-y)-v*S1); ++ fmul_d(s6, half, y); // half*y ++ fnmsub_d(s6, v, r, s6); // half*y-v*r ++ fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y) ++ fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1) ++ fadd_d(FA0, x, s6); ++ } ++} ++ ++///* ++// * __kernel_cos( x, y ) ++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164 ++// * Input x is assumed to be bounded by ~pi/4 in magnitude. ++// * Input y is the tail of x. ++// * ++// * Algorithm ++// * 1. Since cos(-x) = cos(x), we need only to consider positive x. ++// * 2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0. ++// * 3. cos(x) is approximated by a polynomial of degree 14 on ++// * [0,pi/4] ++// * 4 14 ++// * cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x ++// * where the remez error is ++// * ++// * | 2 4 6 8 10 12 14 | -58 ++// * |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x +C6*x )| <= 2 ++// * | | ++// * ++// * 4 6 8 10 12 14 ++// * 4. let r = C1*x +C2*x +C3*x +C4*x +C5*x +C6*x , then ++// * cos(x) = 1 - x*x/2 + r ++// * since cos(x+y) ~ cos(x) - sin(x)*y ++// * ~ cos(x) - x*y, ++// * a correction term is necessary in cos(x) and hence ++// * cos(x+y) = 1 - (x*x/2 - (r - x*y)) ++// * For better accuracy when x > 0.3, let qx = |x|/4 with ++// * the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125. ++// * Then ++// * cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)). ++// * Note that 1-qx and (x*x/2-qx) is EXACT here, and the ++// * magnitude of the latter is at least a quarter of x*x/2, ++// * thus, reducing the rounding error in the subtraction. ++// */ ++// ++//static const double ++//C1 = 4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */ ++//C2 = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */ ++//C3 = 2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */ ++//C4 = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */ ++//C5 = 2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */ ++//C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ ++// ++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef ++// ++// BEGIN __kernel_cos PSEUDO CODE ++// ++//static double __kernel_cos(double x, double y) ++//{ ++// double a,h,z,r,qx=0; ++// ++// // NOTE: ix is already initialized in dsin/dcos. Reuse value from register ++// //int ix; ++// //ix = high(x)&0x7fffffff; /* ix = |x|'s high word*/ ++// ++// // NOTE: moved to dsin/dcos ++// //if(ix<0x3e400000) { /* if x < 2**27 */ ++// // if(((int)x)==0) return one; /* generate inexact */ ++// //} ++// ++// z = x*x; ++// r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6))))); ++// if(ix < 0x3FD33333) /* if |x| < 0.3 */ ++// return one - (0.5*z - (z*r - x*y)); ++// else { ++// if(ix > 0x3fe90000) { /* x > 0.78125 */ ++// qx = 0.28125; ++// } else { ++// set_high(&qx, ix-0x00200000); /* x/4 */ ++// set_low(&qx, 0); ++// } ++// h = 0.5*z-qx; ++// a = one-qx; ++// return a - (h - (z*r-x*y)); ++// } ++//} ++// ++// END __kernel_cos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos ++// 2. Constants are now loaded from table dcos_coef ++// Input and output: ++// 1. Input for generated function: X argument = x ++// 2. Input for generator: x = register to read argument from, dcos_coef ++// = coefficients table address ++// 3. Return cos(x) value in FA0 ++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) { ++ Register ix = A3; ++ FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0, ++ C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7; ++ Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET; ++ li(SCR2, dcos_coef); ++ fld_d(C1, SCR2, 0); ++ fld_d(C2, SCR2, 8); ++ fld_d(C3, SCR2, 16); ++ fld_d(C4, SCR2, 24); ++ fld_d(C5, SCR2, 32); ++ fld_d(C6, SCR2, 40); ++ fmul_d(z, x, x); // z=x^2 ++ block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); { ++ fmadd_d(r, z, C6, C5); ++ vldi(half, -928); // 0.5 (0x3fe0000000000000) ++ fmadd_d(r, z, r, C4); ++ fmul_d(y, x, y); ++ fmadd_d(r, z, r, C3); ++ li(SCR1, 0x3FD33333); ++ fmadd_d(r, z, r, C2); ++ fmul_d(x, z, z); // x = z^2 ++ fmadd_d(r, z, r, C1); // r = C1+z(C2+z(C4+z(C5+z*C6))) ++ } ++ // need to multiply r by z to have "final" r value ++ vldi(one, -912); // 1.0 (0x3ff0000000000000) ++ bge(ix, SCR1, IX_IS_LARGE); ++ block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); { ++ // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r)) ++ fnmsub_d(FA0, x, r, y); ++ fmadd_d(FA0, half, z, FA0); ++ fsub_d(FA0, one, FA0); ++ b(DONE); ++ } ++ block_comment("if(ix >= 0x3FD33333)"); { ++ bind(IX_IS_LARGE); ++ li(SCR2, 0x3FE90000); ++ blt(SCR2, ix, SET_QX_CONST); ++ block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); { ++ li(SCR2, 0x00200000); ++ sub_w(SCR2, ix, SCR2); ++ slli_d(SCR2, SCR2, 32); ++ movgr2fr_d(qx, SCR2); ++ } ++ b(QX_SET); ++ bind(SET_QX_CONST); ++ block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); { ++ vldi(qx, -942); // 0.28125 (0x3fd2000000000000) ++ } ++ bind(QX_SET); ++ fmsub_d(C6, x, r, y); // z*r - xy ++ fmsub_d(h, half, z, qx); // h = 0.5*z - qx ++ fsub_d(a, one, qx); // a = 1-qx ++ fsub_d(C6, h, C6); // = h - (z*r - x*y) ++ fsub_d(FA0, a, C6); ++ } ++ bind(DONE); ++} ++ ++// generate_dsin_dcos creates stub for dsin and dcos ++// Generation is done via single call because dsin and dcos code is almost the ++// same(see C code below). These functions work as follows: ++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27 ++// 2) perform argument reduction if required ++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial ++// ++// BEGIN dsin/dcos PSEUDO CODE ++// ++//dsin_dcos(jdouble x, bool isCos) { ++// double y[2],z=0.0; ++// int n, ix; ++// ++// /* High word of x. */ ++// ix = high(x); ++// ++// /* |x| ~< pi/4 */ ++// ix &= 0x7fffffff; ++// if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0); ++// ++// /* sin/cos(Inf or NaN) is NaN */ ++// else if (ix>=0x7ff00000) return x-x; ++// else if (ix<0x3e400000) { /* if ix < 2**27 */ ++// if(((int)x)==0) return isCos ? one : x; /* generate inexact */ ++// } ++// /* argument reduction needed */ ++// else { ++// n = __ieee754_rem_pio2(x,y); ++// switch(n&3) { ++// case 0: return isCos ? __kernel_cos(y[0],y[1]) : __kernel_sin(y[0],y[1], true); ++// case 1: return isCos ? -__kernel_sin(y[0],y[1],true) : __kernel_cos(y[0],y[1]); ++// case 2: return isCos ? -__kernel_cos(y[0],y[1]) : -__kernel_sin(y[0],y[1], true); ++// default: ++// return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]); ++// } ++// } ++//} ++// END dsin/dcos PSEUDO CODE ++// ++// Changes between fdlibm and intrinsic: ++// 1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos ++// 2. Final switch use equivalent bit checks(tbz/tbnz) ++// Input ans output: ++// 1. Input for generated function: X = A0 ++// 2. Input for generator: isCos = generate sin or cos, npio2_hw = address ++// of npio2_hw table, two_over_pi = address of two_over_pi table, ++// pio2 = address if pio2 table, dsin_coef = address if dsin_coef table, ++// dcos_coef = address of dcos_coef table ++// 3. Return result in FA0 ++// NOTE: general purpose register names match local variable names in C code ++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw, ++ address two_over_pi, address pio2, ++ address dsin_coef, address dcos_coef) { ++ Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE; ++ Register X = A0, absX = A1, n = A2, ix = A3; ++ FloatRegister y0 = FA4, y1 = FA5; ++ ++ block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); { ++ movfr2gr_d(X, FA0); ++ li(SCR2, 0x3e400000); ++ li(SCR1, 0x3fe921fb); // high word of pi/4. ++ bstrpick_d(absX, X, 62, 0); // absX ++ li(T0, 0x7ff0000000000000); ++ srli_d(ix, absX, 32); // set ix ++ blt(ix, SCR2, TINY_X); // handle tiny x (|x| < 2^-27) ++ bge(SCR1, ix, EARLY_CASE); // if(ix <= 0x3fe921fb) return ++ blt(absX, T0, ARG_REDUCTION); ++ // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0). ++ // Set last bit unconditionally to make it NaN ++ ori(T0, T0, 1); ++ movgr2fr_d(FA0, T0); ++ jr(RA); ++ } ++ block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {}"); { ++ bind(TINY_X); ++ if (isCos) { ++ vldi(FA0, -912); // 1.0 (0x3ff0000000000000) ++ } ++ jr(RA); ++ } ++ bind(ARG_REDUCTION); /* argument reduction needed */ ++ block_comment("n = __ieee754_rem_pio2(x,y);"); { ++ generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2); ++ } ++ block_comment("switch(n&3) {case ... }"); { ++ if (isCos) { ++ srli_w(T0, n, 1); ++ xorr(absX, n, T0); ++ andi(T0, n, 1); ++ bnez(T0, RETURN_SIN); ++ } else { ++ andi(T0, n, 1); ++ beqz(T0, RETURN_SIN); ++ } ++ generate_kernel_cos(y0, dcos_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ bind(RETURN_SIN); ++ generate_kernel_sin(y0, true, dsin_coef); ++ if (isCos) { ++ andi(T0, absX, 1); ++ beqz(T0, DONE); ++ } else { ++ andi(T0, n, 2); ++ beqz(T0, DONE); ++ } ++ fneg_d(FA0, FA0); ++ jr(RA); ++ } ++ bind(EARLY_CASE); ++ vxor_v(y1, y1, y1); ++ if (isCos) { ++ generate_kernel_cos(FA0, dcos_coef); ++ } else { ++ generate_kernel_sin(FA0, false, dsin_coef); ++ } ++ bind(DONE); ++ jr(RA); ++} diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp new file mode 100644 -index 0000000000..3fb9dce064 +index 0000000000..e517dcd415 --- /dev/null +++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp -@@ -0,0 +1,562 @@ +@@ -0,0 +1,564 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -38136,6 +39644,8 @@ index 0000000000..3fb9dce064 +#include "interpreter/interpreterRuntime.hpp" +#include "memory/allocation.inline.hpp" +#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "utilities/preserveException.hpp" + +#define __ _masm-> + @@ -38736,10 +40246,10 @@ index 0000000000..f84337424b + } diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp new file mode 100644 -index 0000000000..4d0ab4644a +index 0000000000..0ee3fba75b --- /dev/null +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp -@@ -0,0 +1,482 @@ +@@ -0,0 +1,505 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -38799,14 +40309,6 @@ index 0000000000..4d0ab4644a + ICache::invalidate_range(addr, 8); +} + -+static int illegal_instruction_bits = 0; -+ -+int NativeInstruction::illegal_instruction() { -+ //TODO: LA -+ guarantee(0, "LA not implemented yet"); -+ return 0; // mute compiler -+} -+ +bool NativeInstruction::is_int_branch() { + int op = Assembler::high(insn_word(), 6); + return op == Assembler::beqz_op || op == Assembler::bnez_op || @@ -38819,6 +40321,16 @@ index 0000000000..4d0ab4644a + return Assembler::high(insn_word(), 6) == Assembler::bccondz_op; +} + ++bool NativeInstruction::is_lu12iw_lu32id() const { ++ return Assembler::high(int_at(0), 7) == Assembler::lu12i_w_op && ++ Assembler::high(int_at(4), 7) == Assembler::lu32i_d_op; ++} ++ ++bool NativeInstruction::is_pcaddu12i_add() const { ++ return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op && ++ Assembler::high(int_at(4), 10) == Assembler::addi_d_op; ++} ++ +bool NativeCall::is_bl() const { + return Assembler::high(int_at(0), 6) == Assembler::bl_op; +} @@ -38923,6 +40435,27 @@ index 0000000000..4d0ab4644a + ICache::invalidate_range(addr_call, instruction_size); +} + ++// Generate a trampoline for a branch to dest. If there's no need for a ++// trampoline, simply patch the call directly to dest. ++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) { ++ MacroAssembler a(&cbuf); ++ address stub = NULL; ++ ++ if (a.far_branches() ++ && ! is_NativeCallTrampolineStub_at()) { ++ stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest); ++ } ++ ++ if (stub == NULL) { ++ // If we generated no stub, patch this call directly to dest. ++ // This will happen if we don't need far branches or if there ++ // already was a trampoline. ++ set_destination(dest); ++ } ++ ++ return stub; ++} ++ +void NativeCall::print() { + tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, + p2i(instruction_address()), p2i(destination())); @@ -39224,10 +40757,10 @@ index 0000000000..4d0ab4644a +} diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp new file mode 100644 -index 0000000000..fff7e67856 +index 0000000000..195a2df580 --- /dev/null +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp -@@ -0,0 +1,533 @@ +@@ -0,0 +1,521 @@ +/* + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -39253,8 +40786,8 @@ index 0000000000..fff7e67856 + * + */ + -+#ifndef CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP ++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP + +#include "asm/assembler.hpp" +#include "runtime/icache.hpp" @@ -39297,6 +40830,10 @@ index 0000000000..fff7e67856 + bool is_jump(); + bool is_safepoint_poll(); + ++ // Helper func for jvmci ++ bool is_lu12iw_lu32id() const; ++ bool is_pcaddu12i_add() const; ++ + // LoongArch has no instruction to generate a illegal instrucion exception? + // But `break 11` is not illegal instruction for LoongArch. + static int illegal_instruction(); @@ -39417,7 +40954,7 @@ index 0000000000..fff7e67856 + void set_destination_mt_safe(address dest, bool assert_lock = true); + + address get_trampoline(); -+ ++ address trampoline_jump(CodeBuffer &cbuf, address dest); +}; + +inline NativeCall* nativeCall_at(address address) { @@ -39443,9 +40980,12 @@ index 0000000000..fff7e67856 +class NativeFarCall: public NativeInstruction { + public: + enum loongarch_specific_constants { -+ instruction_size = 2 * BytesPerInstWord, ++ instruction_offset = 0, ++ instruction_size = 2 * BytesPerInstWord + }; + ++ address instruction_address() const { return addr_at(instruction_offset); } ++ + // We use MacroAssembler::patchable_call() for implementing a + // call-anywhere instruction. + bool is_short() const; @@ -39542,20 +41082,6 @@ index 0000000000..fff7e67856 + } +}; + -+// An interface for accessing/manipulating native moves of the form: -+// lui AT, split_high(offset) -+// addiu AT, split_low(offset) -+// add reg, reg, AT -+// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 -+// [lw/sw/lwc1/swc1 dest, reg, 4] -+// or -+// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset -+// [lw/sw/lwc1/swc1 dest, reg, offset+4] -+// -+// Warning: These routines must be able to handle any instruction sequences -+// that are generated as a result of the load/store byte,word,long -+// macros. -+ +class NativeMovRegMem: public NativeInstruction { + public: + enum loongarch_specific_constants { @@ -39568,11 +41094,6 @@ index 0000000000..fff7e67856 + }; + + address instruction_address() const { return addr_at(instruction_offset); } -+ address next_instruction_address() const { -+ //TODO: LA -+ guarantee(0, "LA not implemented yet"); -+ return NULL; // mute compiler -+ } + + int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; } + @@ -39760,10 +41281,10 @@ index 0000000000..fff7e67856 + assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found"); + return (NativeCallTrampolineStub*)addr; +} -+#endif // CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp new file mode 100644 -index 0000000000..4703fd6b7a +index 0000000000..e9f0fc280d --- /dev/null +++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp @@ -0,0 +1,47 @@ @@ -39792,8 +41313,8 @@ index 0000000000..4703fd6b7a + * + */ + -+#ifndef CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP + +// machine-dependent implemention for register maps + friend class frame; @@ -39813,7 +41334,7 @@ index 0000000000..4703fd6b7a + void pd_initialize() {} + void pd_initialize_from(const RegisterMap* map) {} + -+#endif // CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp new file mode 100644 index 0000000000..58f40b747c @@ -39925,13 +41446,13 @@ index 0000000000..58f40b747c +REGISTER_DEFINITION(FloatRegister, f31); diff --git a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp new file mode 100644 -index 0000000000..e0ea958edf +index 0000000000..54d90167a5 --- /dev/null +++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -39957,9 +41478,9 @@ index 0000000000..e0ea958edf +#include "precompiled.hpp" +#include "register_loongarch.hpp" + -+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register; +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + -+ 2 * FloatRegisterImpl::number_of_registers; ++ FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register; + + +const char* RegisterImpl::name() const { @@ -39990,7 +41511,7 @@ index 0000000000..e0ea958edf +} diff --git a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp new file mode 100644 -index 0000000000..3ff375bd3a +index 0000000000..8d99dc9688 --- /dev/null +++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp @@ -0,0 +1,428 @@ @@ -40019,8 +41540,8 @@ index 0000000000..3ff375bd3a + * + */ + -+#ifndef CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP ++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP + +#include "asm/register.hpp" +#include "utilities/formatBuffer.hpp" @@ -40057,7 +41578,7 @@ index 0000000000..3ff375bd3a + const char* name() const; +}; + -+// The integer registers of the LOONGARCH architecture ++// The integer registers of the LoongArch architecture +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); + + @@ -40213,7 +41734,7 @@ index 0000000000..3ff375bd3a + return (FloatRegister)(intptr_t) encoding; +} + -+// The implementation of floating point registers for the loongarch architecture ++// The implementation of floating point registers for the LoongArch architecture +class FloatRegisterImpl: public AbstractRegisterImpl { + public: + enum { @@ -40357,7 +41878,7 @@ index 0000000000..3ff375bd3a + return (ConditionalFlagRegister)(intptr_t) encoding; +} + -+// The implementation of floating point registers for the loongarch architecture ++// The implementation of floating point registers for the LoongArch architecture +class ConditionalFlagRegisterImpl: public AbstractRegisterImpl { + public: + enum { @@ -40421,16 +41942,16 @@ index 0000000000..3ff375bd3a + static const int max_fpr; +}; + -+#endif //CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP ++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp new file mode 100644 -index 0000000000..f213cf6d7c +index 0000000000..1caba43699 --- /dev/null +++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp -@@ -0,0 +1,135 @@ +@@ -0,0 +1,132 @@ +/* + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -40456,9 +41977,11 @@ index 0000000000..f213cf6d7c +#include "precompiled.hpp" +#include "asm/macroAssembler.hpp" +#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" +#include "nativeInst_loongarch.hpp" +#include "oops/compressedOops.inline.hpp" +#include "oops/oop.hpp" ++#include "oops/klass.inline.hpp" +#include "runtime/safepoint.hpp" + + @@ -40495,7 +42018,7 @@ index 0000000000..f213cf6d7c + } + } else { + // Note: Use runtime_call_type relocations for call32_operand. -+ assert(0, "call32_operand not supported in LOONGARCH64"); ++ assert(0, "call32_operand not supported in LoongArch64"); + } +} + @@ -40556,22 +42079,17 @@ index 0000000000..f213cf6d7c +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { +} + -+/* -+void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { -+} -+*/ -+ +void metadata_Relocation::pd_fix_value(address x) { +} diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp new file mode 100644 -index 0000000000..614c5aebaa +index 0000000000..c85ca4963f --- /dev/null +++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp @@ -0,0 +1,44 @@ +/* + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -40594,13 +42112,13 @@ index 0000000000..614c5aebaa + * + */ + -+#ifndef CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP ++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP + + // machine-dependent parts of class relocInfo + private: + enum { -+ // Since LOONGARCH instructions are whole words, ++ // Since LoongArch instructions are whole words, + // the two low-order offset bits can always be discarded. + offset_unit = 4, + @@ -40612,7 +42130,7 @@ index 0000000000..614c5aebaa + + static bool mustIterateImmediateOopsInCode() { return false; } + -+#endif // CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp new file mode 100644 index 0000000000..334c783b37 @@ -40812,13 +42330,13 @@ index 0000000000..334c783b37 +} diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp new file mode 100644 -index 0000000000..66026dc2d4 +index 0000000000..736ed0a85f --- /dev/null +++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp -@@ -0,0 +1,3576 @@ +@@ -0,0 +1,3621 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -40848,6 +42366,7 @@ index 0000000000..66026dc2d4 +#include "code/icBuffer.hpp" +#include "code/vtableStubs.hpp" +#include "interpreter/interpreter.hpp" ++#include "nativeInst_loongarch.hpp" +#include "oops/compiledICHolder.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/vframeArray.hpp" @@ -40855,6 +42374,9 @@ index 0000000000..66026dc2d4 +#ifdef COMPILER2 +#include "opto/runtime.hpp" +#endif ++#if INCLUDE_JVMCI ++#include "jvmci/jvmciJavaClasses.hpp" ++#endif + +#include + @@ -41486,7 +43008,7 @@ index 0000000000..66026dc2d4 + // number (all values in registers) or the maximum stack slot accessed. + // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); + // Convert 4-byte stack slots to words. -+ // did LA need round? FIXME aoqi ++ // did LA need round? FIXME + comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; + // Round up to miminum stack alignment, in wordSize + comp_words_on_stack = round_to(comp_words_on_stack, 2); @@ -41507,6 +43029,18 @@ index 0000000000..66026dc2d4 + // Pre-load the register-jump target early, to schedule it better. + __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset())); + ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ // check if this call should be routed towards a specific entry point ++ __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ Label no_alternative_target; ++ __ beqz(AT, no_alternative_target); ++ __ move(T4, AT); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset()))); ++ __ bind(no_alternative_target); ++ } ++#endif // INCLUDE_JVMCI ++ + // Now generate the shuffle code. Pick up all register args and move the + // rest through the floating point stack top. + for (int i = 0; i < total_args_passed; i++) { @@ -41519,8 +43053,7 @@ index 0000000000..66026dc2d4 + + // Pick up 0, 1 or 2 words from SP+offset. + -+ //FIXME. aoqi. just delete the assert -+ //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); + // Load in argument order going down. + int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; + // Point to interpreter value (vs. tag) @@ -41891,7 +43424,7 @@ index 0000000000..66026dc2d4 + __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { + if (dst.first() != src.first()){ -+ __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} @@ -41908,7 +43441,6 @@ index 0000000000..66026dc2d4 + + // must pass a handle. First figure out the location we use as a handle + -+ //FIXME, for LA, dst can be register + if (src.first()->is_stack()) { + // Oop is already on the stack as an argument + Register rHandle = T5; @@ -41920,14 +43452,11 @@ index 0000000000..66026dc2d4 + __ bind(nil); + if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); + else __ move( (dst.first())->as_Register(), rHandle); -+ //if dst is register -+ //FIXME, do LA need out preserve stack slots? -+ int offset_in_older_frame = src.first()->reg2stack() -+ + SharedRuntime::out_preserve_stack_slots(); ++ ++ int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots(); + map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); + if (is_receiver) { -+ *receiver_offset = (offset_in_older_frame -+ + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size; + } + } else { + // Oop is in an a register we must store it to the space we reserve @@ -41948,7 +43477,6 @@ index 0000000000..66026dc2d4 + // Store the handle parameter + if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first())); + else __ move((dst.first())->as_Register(), rHandle); -+ //if dst is register + + if (is_receiver) { + *receiver_offset = offset; @@ -41994,13 +43522,13 @@ index 0000000000..66026dc2d4 + __ ld_d(AT, FP, reg2offset_in(src.first())); + __ st_d(AT, SP, reg2offset_out(dst.first())); + } else { -+ __ ld_d( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); ++ __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first())); + } + } else { + if( dst.first()->is_stack()){ -+ __ st_d( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); ++ __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first())); + } else { -+ __ move( (dst.first())->as_Register() , (src.first())->as_Register()); ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); + } + } +} @@ -42845,8 +44373,6 @@ index 0000000000..66026dc2d4 + __ move(SP, S2); // use S2 as a sender SP holder + __ pop(S2); + __ addi_d(SP, SP, wordSize); -+ //add for compressedoops -+ __ reinit_heapbase(); + // Restore any method result value + restore_native_result(masm, ret_type, stack_slots); + @@ -43038,8 +44564,6 @@ index 0000000000..66026dc2d4 + relocInfo::runtime_call_type); + __ addi_d(SP, SP, 2*wordSize); + __ move(SP, S2); -+ //add for compressedoops -+ __ reinit_heapbase(); +#ifdef ASSERT + { + Label L; @@ -43066,8 +44590,6 @@ index 0000000000..66026dc2d4 + save_native_result(masm, ret_type, stack_slots); + __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), + relocInfo::runtime_call_type); -+ //add for compressedoops -+ __ reinit_heapbase(); + restore_native_result(masm, ret_type, stack_slots); + __ b(reguard_done); + @@ -43668,8 +45190,14 @@ index 0000000000..66026dc2d4 + // allocate space for the code + ResourceMark rm; + // setup code generation tools ++ int pad = 0; ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ pad += 512; // Increase the buffer size when compiling for JVMCI ++ } ++#endif + //CodeBuffer buffer ("deopt_blob", 4000, 2048); -+ CodeBuffer buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug ++ CodeBuffer buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug + MacroAssembler* masm = new MacroAssembler( & buffer); + int frame_size_in_words; + OopMap* map = NULL; @@ -43714,6 +45242,12 @@ index 0000000000..66026dc2d4 + __ b(cont); + + int reexecute_offset = __ pc() - start; ++#if INCLUDE_JVMCI && !defined(COMPILER1) ++ if (EnableJVMCI && UseJVMCICompiler) { ++ // JVMCI does not use this kind of deoptimization ++ __ should_not_reach_here(); ++ } ++#endif + + // Reexecute case + // return address is the pc describes what bci to do re-execute at @@ -43723,6 +45257,44 @@ index 0000000000..66026dc2d4 + __ li(reason, Deoptimization::Unpack_reexecute); + __ b(cont); + ++#if INCLUDE_JVMCI ++ Label after_fetch_unroll_info_call; ++ int implicit_exception_uncommon_trap_offset = 0; ++ int uncommon_trap_offset = 0; ++ ++ if (EnableJVMCI) { ++ implicit_exception_uncommon_trap_offset = __ pc() - start; ++ ++ __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset()))); ++ ++ uncommon_trap_offset = __ pc() - start; ++ ++ // Save everything in sight. ++ (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ addi_d(SP, SP, -additional_words * wordSize); ++ // fetch_unroll_info needs to call last_java_frame() ++ Label retaddr; ++ __ set_last_Java_frame(NOREG, NOREG, retaddr); ++ ++ __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ __ li(AT, -1); ++ __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset()))); ++ ++ __ li(reason, (int32_t)Deoptimization::Unpack_reexecute); ++ __ move(A0, TREG); ++ __ move(A2, reason); // exec mode ++ __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type); ++ __ bind(retaddr); ++ oop_maps->add_gc_map( __ pc()-start, map->deep_copy()); ++ __ addi_d(SP, SP, additional_words * wordSize); ++ ++ __ reset_last_Java_frame(false); ++ ++ __ b(after_fetch_unroll_info_call); ++ } // EnableJVMCI ++#endif // INCLUDE_JVMCI ++ + int exception_offset = __ pc() - start; + // Prolog for exception case + @@ -43806,6 +45378,12 @@ index 0000000000..66026dc2d4 +#endif + __ reset_last_Java_frame(false); + ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ __ bind(after_fetch_unroll_info_call); ++ } ++#endif ++ + // Load UnrollBlock into S7 + __ move(unroll, V0); + @@ -43888,26 +45466,6 @@ index 0000000000..66026dc2d4 + __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); + __ sub_d(SP, SP, AT); + -+ // Push interpreter frames in a loop -+ // -+ //Loop: -+ // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld -+ // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] -+ // 0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16 -+ // 0x000000555bd82d24: daddi sp, sp, 0xfffffff0 -+ // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp -+ // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at -+ // 0x000000555bd82d30: dadd fp, sp, zero ; fp <- sp -+ // 0x000000555bd82d34: dsub sp, sp, t2 ; sp -= t2 -+ // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); -+ // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); -+ // 0x000000555bd82d40: dadd s4, sp, zero ; move(sender_sp, SP); -+ // 0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count -- -+ // 0x000000555bd82d48: daddi t1, t1, 0x4 ; sizes += 4 -+ // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 -+ // 0x000000555bd82d50: daddi t0, t0, 0x4 ; <--- error t0 += 8 -+ // -+ // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split + Label loop; + __ bind(loop); + __ ld_d(T2, sizes, 0); // Load frame size @@ -43989,6 +45547,12 @@ index 0000000000..66026dc2d4 + masm->flush(); + _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); + _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++#if INCLUDE_JVMCI ++ if (EnableJVMCI) { ++ _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset); ++ _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset); ++ } ++#endif +} + +#ifdef COMPILER2 @@ -44311,7 +45875,7 @@ index 0000000000..66026dc2d4 + ResourceMark rm; + + //CodeBuffer buffer(name, 1000, 512); -+ //FIXME. aoqi. code_size ++ //FIXME. code_size + CodeBuffer buffer(name, 2000, 2048); + MacroAssembler* masm = new MacroAssembler(&buffer); + @@ -44361,8 +45925,7 @@ index 0000000000..66026dc2d4 + __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); + __ bne(AT, R0, pending); + // get the returned Method* -+ //FIXME, do LA need this ? -+ __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 ++ __ get_vm_result_2(Rmethod, thread); + __ st_ptr(Rmethod, SP, reg_save.s3_offset()); + __ st_ptr(V0, SP, reg_save.t5_offset()); + reg_save.restore_live_registers(masm); @@ -44394,10 +45957,10 @@ index 0000000000..66026dc2d4 +extern "C" int SpinPause() {return 0;} diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp new file mode 100644 -index 0000000000..f784092519 +index 0000000000..7b5c23b8b5 --- /dev/null +++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp -@@ -0,0 +1,4755 @@ +@@ -0,0 +1,4827 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -44462,7 +46025,6 @@ index 0000000000..f784092519 +#define T8 RT8 + +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) -+//#define a__ ((Assembler*)_masm)-> + +//#ifdef PRODUCT +//#define BLOCK_COMMENT(str) /* nothing */ @@ -44493,7 +46055,7 @@ index 0000000000..f784092519 + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] -+ // -1 [ BCP(S1) ] ++ // -1 [ BCP(S0) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp @@ -44512,7 +46074,10 @@ index 0000000000..f784092519 + // [ argument word n-1 ] <--- sp + // ... + // [ argument word 0 ] ++ //-22 [ F31 ] + // ... ++ //-15 [ F24 ] ++ //-14 [ S8 ] + //-13 [ thread ] + //-12 [ result_type ] <--- a2 + //-11 [ result ] <--- a1 @@ -44525,7 +46090,7 @@ index 0000000000..f784092519 + // -4 [ S1 ] + // -3 [ TSR(S2) ] + // -2 [ LVP(S7) ] -+ // -1 [ BCP(S1) ] ++ // -1 [ BCP(S0) ] + // 0 [ saved fp ] <--- fp_after_call + // 1 [ return address ] + // 2 [ ] <--- old sp @@ -44549,8 +46114,16 @@ index 0000000000..f784092519 + result_off = -11, + result_type_off = -12, + thread_off = -13, -+ total_off = thread_off - 1, + S8_off = -14, ++ F24_off = -15, ++ F25_off = -16, ++ F26_off = -17, ++ F27_off = -18, ++ F28_off = -19, ++ F29_off = -20, ++ F30_off = -21, ++ F31_off = -22, ++ total_off = F31_off, + }; + + address generate_call_stub(address& return_address) { @@ -44579,6 +46152,15 @@ index 0000000000..f784092519 + __ st_d(A7, FP, thread_off * wordSize); + __ st_d(S8, FP, S8_off * wordSize); + ++ __ fst_d(F24, FP, F24_off * wordSize); ++ __ fst_d(F25, FP, F25_off * wordSize); ++ __ fst_d(F26, FP, F26_off * wordSize); ++ __ fst_d(F27, FP, F27_off * wordSize); ++ __ fst_d(F28, FP, F28_off * wordSize); ++ __ fst_d(F29, FP, F29_off * wordSize); ++ __ fst_d(F30, FP, F30_off * wordSize); ++ __ fst_d(F31, FP, F31_off * wordSize); ++ + __ li(S8, (long)Interpreter::dispatch_table(itos)); + +#ifdef OPT_THREAD @@ -44670,6 +46252,15 @@ index 0000000000..f784092519 + __ ld_d(S5, FP, S5_off * wordSize); + __ ld_d(S6, FP, S6_off * wordSize); + ++ __ fld_d(F24, FP, F24_off * wordSize); ++ __ fld_d(F25, FP, F25_off * wordSize); ++ __ fld_d(F26, FP, F26_off * wordSize); ++ __ fld_d(F27, FP, F27_off * wordSize); ++ __ fld_d(F28, FP, F28_off * wordSize); ++ __ fld_d(F29, FP, F29_off * wordSize); ++ __ fld_d(F30, FP, F30_off * wordSize); ++ __ fld_d(F31, FP, F31_off * wordSize); ++ + __ leave(); + + // return @@ -44814,7 +46405,6 @@ index 0000000000..f784092519 + address generate_verify_oop() { + StubCodeMark mark(this, "StubRoutines", "verify_oop"); + address start = __ pc(); -+ __ reinit_heapbase(); + __ verify_oop_subroutine(); + address end = __ pc(); + return start; @@ -47673,6 +49263,25 @@ index 0000000000..f784092519 + return start; + } + ++ address generate_mulAdd() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "mulAdd"); ++ ++ address entry = __ pc(); ++ ++ const Register out = A0; ++ const Register in = A1; ++ const Register offset = A2; ++ const Register len = A3; ++ const Register k = A4; ++ ++ __ block_comment("Entry:"); ++ __ mul_add(out, in, offset, len, k); ++ __ jr(RA); ++ ++ return entry; ++ } ++ + // Arguments: + // + // Inputs: @@ -48320,6 +49929,18 @@ index 0000000000..f784092519 + return start; + } + ++ address generate_dsin_dcos(bool isCos) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin"); ++ address start = __ pc(); ++ __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw, ++ (address)StubRoutines::la::_two_over_pi, ++ (address)StubRoutines::la::_pio2, ++ (address)StubRoutines::la::_dsin_coef, ++ (address)StubRoutines::la::_dcos_coef); ++ return start; ++ } ++ + // add a function to implement SafeFetch32 and SafeFetchN + void generate_safefetch(const char* name, int size, address* entry, + address* fault_pc, address* continuation_pc) { @@ -49066,6 +50687,16 @@ index 0000000000..f784092519 + generate_throw_exception("delayed StackOverflowError throw_exception", + CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), + false); ++ ++ if (UseCRC32Intrinsics) { ++ // set table address before stub generation which use it ++ StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; ++ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); ++ } + } + + void generate_all() { @@ -49092,6 +50723,14 @@ index 0000000000..f784092519 + generate_arraycopy_stubs(); +#endif + ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) { ++ StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false); ++ } ++ ++ if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) { ++ StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true); ++ } ++ + // Safefetch stubs. + generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, + &StubRoutines::_safefetch32_fault_pc, @@ -49100,6 +50739,11 @@ index 0000000000..f784092519 + &StubRoutines::_safefetchN_fault_pc, + &StubRoutines::_safefetchN_continuation_pc); + ++#ifdef COMPILER2 ++ if (UseMulAddIntrinsic) { ++ StubRoutines::_mulAdd = generate_mulAdd(); ++ } ++ + if (UseMontgomeryMultiplyIntrinsic) { + StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply"); + MontgomeryMultiplyGenerator g(_masm, false /* squaring */); @@ -49113,6 +50757,7 @@ index 0000000000..f784092519 + // because it's faster for the sizes of modulus we care about. + StubRoutines::_montgomerySquare = g.generate_multiply(); + } ++#endif + + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false); @@ -49128,16 +50773,6 @@ index 0000000000..f784092519 + if (UseSHA256Intrinsics) { + generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB); + } -+ -+ if (UseCRC32Intrinsics) { -+ // set table address before stub generation which use it -+ StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table; -+ StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); -+ } -+ -+ if (UseCRC32CIntrinsics) { -+ StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C(); -+ } + } + + public: @@ -49155,13 +50790,13 @@ index 0000000000..f784092519 +} diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp new file mode 100644 -index 0000000000..6b6373c758 +index 0000000000..0ab07e1e9e --- /dev/null +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp -@@ -0,0 +1,60 @@ +@@ -0,0 +1,67 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -49184,8 +50819,8 @@ index 0000000000..6b6373c758 + * + */ + -+#ifndef CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP -+#define CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP ++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP + +// This file holds the platform specific parts of the StubRoutines +// definition. See stubRoutines.hpp for a description on how to @@ -49210,6 +50845,13 @@ index 0000000000..6b6373c758 + // stack. The variable holds that location. + static address _call_stub_compiled_return; + static juint _crc_table[]; ++ // begin trigonometric tables block. See comments in .cpp file ++ static juint _npio2_hw[]; ++ static jdouble _two_over_pi[]; ++ static jdouble _pio2[]; ++ static jdouble _dsin_coef[]; ++ static jdouble _dcos_coef[]; ++ // end trigonometric tables block + +public: + // Call back points for traps in compiled code @@ -49218,283 +50860,197 @@ index 0000000000..6b6373c758 + +}; + -+#endif // CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP ++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp new file mode 100644 -index 0000000000..51c627c786 +index 0000000000..1a6ea3bcde --- /dev/null +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp -@@ -0,0 +1,264 @@ -+/* -+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+#include "precompiled.hpp" -+#include "runtime/deoptimization.hpp" -+#include "runtime/frame.inline.hpp" -+#include "runtime/stubRoutines.hpp" -+#include "runtime/thread.inline.hpp" -+ -+// a description of how to extend it, see the stubRoutines.hpp file. -+ -+//find the last fp value -+address StubRoutines::la::_call_stub_compiled_return = NULL; -+ -+/** -+ * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h -+ */ -+ATTRIBUTE_ALIGNED(4096) juint StubRoutines::la::_crc_table[] = -+{ -+ // Table 0 -+ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, -+ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, -+ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, -+ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, -+ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, -+ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, -+ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, -+ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, -+ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, -+ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, -+ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, -+ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, -+ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, -+ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, -+ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, -+ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, -+ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, -+ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, -+ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, -+ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, -+ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, -+ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, -+ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, -+ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, -+ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, -+ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, -+ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, -+ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, -+ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, -+ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, -+ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, -+ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, -+ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, -+ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, -+ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, -+ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, -+ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, -+ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, -+ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, -+ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, -+ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, -+ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, -+ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, -+ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, -+ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, -+ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, -+ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, -+ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, -+ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, -+ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, -+ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, -+ 0x2d02ef8dUL, -+ -+ // Table 1 -+ 0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL, -+ 0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL, -+ 0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL, -+ 0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL, -+ 0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL, -+ 0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL, -+ 0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL, -+ 0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL, -+ 0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL, -+ 0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL, -+ 0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL, -+ 0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL, -+ 0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL, -+ 0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL, -+ 0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL, -+ 0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL, -+ 0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL, -+ 0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL, -+ 0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL, -+ 0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL, -+ 0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL, -+ 0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL, -+ 0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL, -+ 0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL, -+ 0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL, -+ 0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL, -+ 0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL, -+ 0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL, -+ 0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL, -+ 0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL, -+ 0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL, -+ 0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL, -+ 0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL, -+ 0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL, -+ 0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL, -+ 0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL, -+ 0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL, -+ 0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL, -+ 0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL, -+ 0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL, -+ 0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL, -+ 0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL, -+ 0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL, -+ 0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL, -+ 0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL, -+ 0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL, -+ 0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL, -+ 0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL, -+ 0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL, -+ 0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL, -+ 0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL, -+ 0x9324fd72UL, -+ -+ // Table 2 -+ 0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL, -+ 0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL, -+ 0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL, -+ 0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL, -+ 0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL, -+ 0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL, -+ 0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL, -+ 0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL, -+ 0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL, -+ 0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL, -+ 0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL, -+ 0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL, -+ 0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL, -+ 0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL, -+ 0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL, -+ 0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL, -+ 0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL, -+ 0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL, -+ 0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL, -+ 0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL, -+ 0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL, -+ 0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL, -+ 0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL, -+ 0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL, -+ 0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL, -+ 0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL, -+ 0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL, -+ 0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL, -+ 0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL, -+ 0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL, -+ 0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL, -+ 0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL, -+ 0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL, -+ 0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL, -+ 0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL, -+ 0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL, -+ 0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL, -+ 0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL, -+ 0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL, -+ 0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL, -+ 0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL, -+ 0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL, -+ 0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL, -+ 0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL, -+ 0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL, -+ 0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL, -+ 0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL, -+ 0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL, -+ 0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL, -+ 0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL, -+ 0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL, -+ 0xbe9834edUL, -+ -+ // Table 3 -+ 0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL, -+ 0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL, -+ 0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL, -+ 0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL, -+ 0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL, -+ 0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL, -+ 0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL, -+ 0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL, -+ 0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL, -+ 0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL, -+ 0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL, -+ 0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL, -+ 0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL, -+ 0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL, -+ 0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL, -+ 0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL, -+ 0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL, -+ 0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL, -+ 0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL, -+ 0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL, -+ 0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL, -+ 0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL, -+ 0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL, -+ 0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL, -+ 0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL, -+ 0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL, -+ 0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL, -+ 0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL, -+ 0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL, -+ 0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL, -+ 0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL, -+ 0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL, -+ 0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL, -+ 0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL, -+ 0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL, -+ 0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL, -+ 0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL, -+ 0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL, -+ 0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL, -+ 0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL, -+ 0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL, -+ 0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL, -+ 0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL, -+ 0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL, -+ 0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL, -+ 0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL, -+ 0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL, -+ 0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL, -+ 0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL, -+ 0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL, -+ 0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL, -+ 0xde0506f1UL, -+ // Constants for Neon CRC232 implementation -+ // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed -+ // k4 = 0xED627DAE = x^256 mod poly - bit reversed -+ 0x78ED02D5UL, 0xED627DAEUL, // k4:k3 -+ 0xED78D502UL, 0x62EDAE7DUL, // byte swap -+ 0x02D578EDUL, 0x7DAEED62UL, // word swap -+ 0xD502ED78UL, 0xAE7D62EDUL, // byte swap of word swap -+}; -diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp -new file mode 100644 -index 0000000000..05791e1a0c ---- /dev/null -+++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp -@@ -0,0 +1,2214 @@ +@@ -0,0 +1,178 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::la::_call_stub_compiled_return = NULL; ++ ++/** ++ * crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h ++ */ ++juint StubRoutines::la::_crc_table[] = ++{ ++ 0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL, ++ 0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL, ++ 0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL, ++ 0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL, ++ 0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL, ++ 0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL, ++ 0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL, ++ 0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL, ++ 0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL, ++ 0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL, ++ 0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL, ++ 0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL, ++ 0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL, ++ 0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL, ++ 0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL, ++ 0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL, ++ 0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL, ++ 0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL, ++ 0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL, ++ 0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL, ++ 0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL, ++ 0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL, ++ 0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL, ++ 0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL, ++ 0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL, ++ 0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL, ++ 0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL, ++ 0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL, ++ 0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL, ++ 0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL, ++ 0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL, ++ 0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL, ++ 0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL, ++ 0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL, ++ 0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL, ++ 0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL, ++ 0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL, ++ 0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL, ++ 0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL, ++ 0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL, ++ 0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL, ++ 0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL, ++ 0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL, ++ 0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL, ++ 0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL, ++ 0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL, ++ 0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL, ++ 0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL, ++ 0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL, ++ 0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL, ++ 0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL, ++ 0x2d02ef8dUL ++}; ++ ++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = { ++ // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2, ++ // pio2_2t, pio2_3, pio2_3t ++ // This is a small optimization wich keeping double[8] values in int[] table ++ // to have less address calculation instructions ++ // ++ // invpio2: 53 bits of 2/pi (enough for cases when trigonometric argument is small) ++ // pio2_1: first 33 bit of pi/2 ++ // pio2_1t: pi/2 - pio2_1 ++ // pio2_2: second 33 bit of pi/2 ++ // pio2_2t: pi/2 - (pio2_1+pio2_2) ++ // pio2_3: third 33 bit of pi/2 ++ // pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) ++ 0x00000000, 0x3fe00000, // 0.5 ++ 0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01 ++ 0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00 ++ 0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11 ++ 0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11 ++ 0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21 ++ 0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21 ++ 0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32 ++ // now, npio2_hw itself ++ 0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C, ++ 0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C, ++ 0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A, ++ 0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C, ++ 0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB, ++ 0x404858EB, 0x404921FB ++}; ++ ++// Coefficients for sin(x) polynomial approximation: S1..S6. ++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = { ++ -1.66666666666666324348e-01, // 0xBFC5555555555549 ++ 8.33333333332248946124e-03, // 0x3F8111111110F8A6 ++ -1.98412698298579493134e-04, // 0xBF2A01A019C161D5 ++ 2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D ++ -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB ++ 1.58969099521155010221e-10 // 0x3DE5D93A5ACFD57C ++}; ++ ++// Coefficients for cos(x) polynomial approximation: C1..C6. ++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = { ++ 4.16666666666666019037e-02, // c0x3FA555555555554C ++ -1.38888888888741095749e-03, // 0xBF56C16C16C15177 ++ 2.48015872894767294178e-05, // 0x3EFA01A019CB1590 ++ -2.75573143513906633035e-07, // 0xBE927E4F809C52AD ++ 2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4 ++ -1.13596475577881948265e-11 // 0xBDA8FAE9BE8838D4 ++}; ++ ++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi. ++// Used in cases of very large argument. 396 hex digits is enough to support ++// required precision. ++// Converted to double to avoid unnecessary conversion in code ++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with ++// only (double) conversion added ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = { ++ (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62, ++ (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A, ++ (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129, ++ (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41, ++ (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8, ++ (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF, ++ (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5, ++ (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08, ++ (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3, ++ (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880, ++ (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B, ++}; ++ ++// Pi over 2 value ++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = { ++ 1.57079625129699707031e+00, // 0x3FF921FB40000000 ++ 7.54978941586159635335e-08, // 0x3E74442D00000000 ++ 5.39030252995776476554e-15, // 0x3CF8469880000000 ++ 3.28200341580791294123e-22, // 0x3B78CC5160000000 ++ 1.27065575308067607349e-29, // 0x39F01B8380000000 ++ 1.22933308981111328932e-36, // 0x387A252040000000 ++ 2.73370053816464559624e-44, // 0x36E3822280000000 ++ 2.16741683877804819444e-51, // 0x3569F31D00000000 ++}; +diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp +new file mode 100644 +index 0000000000..ee2e522466 +--- /dev/null ++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp +@@ -0,0 +1,2224 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -49525,6 +51081,7 @@ index 0000000000..05791e1a0c +#include "interpreter/bytecodeHistogram.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" +#include "interpreter/templateInterpreterGenerator.hpp" +#include "interpreter/templateTable.hpp" +#include "oops/arrayOop.hpp" @@ -49978,15 +51535,12 @@ index 0000000000..05791e1a0c +#endif // ASSERT + // Restore bcp under the assumption that the current frame is still + // interpreted -+ // FIXME: please change the func restore_bcp -+ // S0 is the conventional register for bcp + __ restore_bcp(); + + // expression stack must be empty before entering the VM if an + // exception happened + __ empty_expression_stack(); + // throw exception -+ // FIXME: why do not pass parameter thread ? + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); + return entry; +} @@ -50094,6 +51648,32 @@ index 0000000000..05791e1a0c + __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); + __ restore_bcp(); + __ restore_locals(); ++ ++#if INCLUDE_JVMCI ++ // Check if we need to take lock at entry of synchronized method. This can ++ // only occur on method entry so emit it only for vtos with step 0. ++ if (EnableJVMCI && state == vtos && step == 0) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ // Clear flag. ++ __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ // Take lock. ++ lock_method(); ++ __ bind(L); ++ } else { ++#ifdef ASSERT ++ if (EnableJVMCI) { ++ Label L; ++ __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset())); ++ __ beqz(AT, L); ++ __ stop("unexpected pending monitor in deopt entry"); ++ __ bind(L); ++ } ++#endif ++ } ++#endif ++ + // handle exceptions + { + Label L; @@ -50680,7 +52260,7 @@ index 0000000000..05791e1a0c + Label L, Lstatic; + __ ld_d(t,method,in_bytes(Method::const_offset())); + __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); -+ // LOONGARCH ABI: caller does not reserve space for the register auguments. ++ // LoongArch ABI: caller does not reserve space for the register auguments. + // A0 and A1(if needed) + __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset())); + __ andi(AT, AT, JVM_ACC_STATIC); @@ -50743,7 +52323,7 @@ index 0000000000..05791e1a0c + // + // if native function is static, and its second parameter has type length of double word, + // and first parameter has type length of word, we have to reserve one word -+ // for the first parameter, according to loongarch abi. ++ // for the first parameter, according to LoongArch abi. + // if native function is not static, and its third parameter has type length of double word, + // and second parameter has type length of word, we have to reserve one word for the second + // parameter. @@ -51612,20 +53192,6 @@ index 0000000000..05791e1a0c + generate_and_dispatch(t); +} + -+ -+/* -+//----------------------------------------------------------------------------- -+// Generation of individual instructions -+ -+// helpers for generate_and_dispatch -+ -+ -+InterpreterGenerator::InterpreterGenerator(StubQueue* code) -+ : TemplateInterpreterGenerator(code) { -+ generate_all(); // down here so it can be "virtual" -+} -+*/ -+ +//----------------------------------------------------------------------------- + +// Non-product code @@ -51711,13 +53277,13 @@ index 0000000000..05791e1a0c +#endif // !PRODUCT diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp new file mode 100644 -index 0000000000..a17ea5db5f +index 0000000000..ddb38faf44 --- /dev/null +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -51740,8 +53306,8 @@ index 0000000000..a17ea5db5f + * + */ + -+#ifndef CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP -+#define CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP ++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP ++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP + + static void prepare_invoke(int byte_no, + Register method, // linked method (or i-klass) @@ -51757,16 +53323,16 @@ index 0000000000..a17ea5db5f + static void index_check(Register array, Register index); + static void index_check_without_pop(Register array, Register index); + -+#endif // CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP ++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp new file mode 100644 -index 0000000000..c259cb69b3 +index 0000000000..8ad7c5f76e --- /dev/null +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp -@@ -0,0 +1,4335 @@ +@@ -0,0 +1,4147 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -51793,12 +53359,14 @@ index 0000000000..c259cb69b3 +#include "asm/macroAssembler.hpp" +#include "interpreter/interpreter.hpp" +#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" +#include "interpreter/templateTable.hpp" +#include "memory/universe.hpp" +#include "oops/methodData.hpp" +#include "oops/objArrayKlass.hpp" +#include "oops/oop.inline.hpp" +#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" +#include "runtime/sharedRuntime.hpp" +#include "runtime/stubRoutines.hpp" +#include "runtime/synchronizer.hpp" @@ -51820,7 +53388,7 @@ index 0000000000..c259cb69b3 +// Platform-dependent initialization + +void TemplateTable::pd_initialize() { -+ // No loongarch specific initialization ++ // No LoongArch specific initialization +} + +// Address computation: local variables @@ -51851,8 +53419,8 @@ index 0000000000..c259cb69b3 +static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } +static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } + -+// At top of Java expression stack which may be different than sp(). It -+// isn't for category 1 objects. ++// At top of Java expression stack which may be different than sp(). ++// It isn't for category 1 objects. +static inline Address at_tos () { + Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); + return tos; @@ -52307,7 +53875,6 @@ index 0000000000..c259cb69b3 +} + +// we compute the actual local variable address here -+// the x86 dont do so for it has scaled index memory access model, we dont have, so do here +void TemplateTable::locals_index(Register reg, int offset) { + __ ld_bu(reg, at_bcp(offset)); + __ slli_d(reg, reg, Address::times_8); @@ -52480,7 +54047,6 @@ index 0000000000..c259cb69b3 + // check index + Label ok; + __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); -+#ifndef OPT_RANGECHECK + __ bltu(index, AT, ok); + + //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 @@ -52488,99 +54054,34 @@ index 0000000000..c259cb69b3 + if (A2 != index) __ move(A2, index); + __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); + __ bind(ok); -+#else -+ __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes()); -+ __ move(A2, index); -+ __ stop("LA not implemented yet"); -+ //__ tgeu(A2, AT, 29); -+#endif +} + +void TemplateTable::iaload() { + transition(itos, itos); -+ if(UseBoundCheckInstruction) { -+ __ pop(SSR); //SSR:array FSR: index -+ __ alsl_d(FSR, FSR, SSR, 1); -+ __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); -+ -+ __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound -+ __ alsl_d(AT, AT, SSR, 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); -+ -+ __ warn("iaload Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gslwle(FSR, FSR, AT); -+ } else { -+ index_check(SSR, FSR); -+ __ alsl_d(FSR, FSR, SSR, 1); -+ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); -+ } ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, 1); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); +} + +void TemplateTable::laload() { + transition(itos, ltos); -+ if(UseBoundCheckInstruction) { -+ __ pop(SSR); //SSR:array FSR: index -+ __ alsl_d(FSR, FSR, SSR, Address::times_8 - 1); -+ __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); -+ -+ __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound -+ __ alsl_d(AT, AT, SSR, Address::times_8 - 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); -+ -+ __ warn("laload Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gsldle(FSR, FSR, AT); -+ } else { -+ index_check(SSR, FSR); -+ __ alsl_d(T4, FSR, SSR, Address::times_8 - 1); -+ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); -+ } ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, Address::times_8 - 1); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); +} + +void TemplateTable::faload() { + transition(itos, ftos); -+ if(UseBoundCheckInstruction) { -+ __ pop(SSR); //SSR:array FSR: index -+ __ shl(FSR, 2); -+ __ add_d(FSR, SSR, FSR); -+ __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); -+ -+ __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound -+ __ shl(AT, 2); -+ __ add_d(AT, SSR, AT); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); -+ -+ __ warn("faload Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gslwlec1(FSF, FSR, AT); -+ } else { -+ index_check(SSR, FSR); -+ __ shl(FSR, 2); -+ __ add_d(FSR, SSR, FSR); -+ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); -+ } ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); +} + +void TemplateTable::daload() { + transition(itos, dtos); -+ if(UseBoundCheckInstruction) { -+ __ pop(SSR); //SSR:array FSR: index -+ __ alsl_d(FSR, FSR, SSR, 2); -+ __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); -+ -+ __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound -+ __ alsl_d(AT, AT, SSR, 2); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); -+ -+ __ warn("daload Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gsldlec1(FSF, FSR, AT); -+ } else { -+ index_check(SSR, FSR); -+ __ alsl_d(T4, FSR, SSR, 2); -+ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); -+ } ++ index_check(SSR, FSR); ++ __ alsl_d(T4, FSR, SSR, 2); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); +} + +void TemplateTable::aaload() { @@ -52596,23 +54097,9 @@ index 0000000000..c259cb69b3 + +void TemplateTable::baload() { + transition(itos, itos); -+ if(UseBoundCheckInstruction) { -+ __ pop(SSR); //SSR:array FSR:index -+ __ add_d(FSR, SSR, FSR); -+ __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base -+ -+ __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes()); -+ __ add_d(AT, SSR, AT); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound -+ -+ __ warn("baload Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gslble(FSR, FSR, AT); -+ } else { -+ index_check(SSR, FSR); -+ __ add_d(FSR, SSR, FSR); -+ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); -+ } ++ index_check(SSR, FSR); ++ __ add_d(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); +} + +void TemplateTable::caload() { @@ -52637,23 +54124,9 @@ index 0000000000..c259cb69b3 + +void TemplateTable::saload() { + transition(itos, itos); -+ if(UseBoundCheckInstruction) { -+ __ pop(SSR); //SSR:array FSR: index -+ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); -+ __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); -+ -+ __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound -+ __ alsl_d(AT, AT, SSR, Address::times_2 - 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); -+ -+ __ warn("saload Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gslhle(FSR, FSR, AT); -+ } else { -+ index_check(SSR, FSR); -+ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); -+ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); -+ } ++ index_check(SSR, FSR); ++ __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); +} + +void TemplateTable::iload(int n) { @@ -52831,92 +54304,36 @@ index 0000000000..c259cb69b3 +void TemplateTable::iastore() { + transition(itos, vtos); + __ pop_i(SSR); // T2: array SSR: index -+ if(UseBoundCheckInstruction) { -+ __ pop_ptr(T2); -+ __ alsl_d(SSR, SSR, T2, Address::times_4 - 1); -+ __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base -+ -+ __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes()); -+ __ alsl_d(AT, AT, T2, Address::times_4 - 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound -+ -+ __ warn("iastore Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gsswle(FSR, SSR, AT); -+ } else { -+ index_check(T2, SSR); // prefer index in SSR -+ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); -+ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); -+ } ++ index_check(T2, SSR); // prefer index in SSR ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); +} + +// used register T2, T3 +void TemplateTable::lastore() { + transition(ltos, vtos); + __ pop_i (T2); -+ if(UseBoundCheckInstruction) { -+ __ pop_ptr(T3); -+ __ alsl_d(T2, T2, T3, Address::times_8 - 1); -+ __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base -+ -+ __ ld_w(AT, T3, arrayOopDesc::length_offset_in_bytes()); -+ __ alsl_d(AT, AT, T3, Address::times_8 - 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound -+ -+ __ warn("lastore Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gssdle(FSR, T2, AT); -+ } else { -+ index_check(T3, T2); -+ __ alsl_d(T3, T2, T3, Address::times_8 - 1); -+ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); -+ } ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); +} + +// used register T2 +void TemplateTable::fastore() { + transition(ftos, vtos); + __ pop_i(SSR); -+ if(UseBoundCheckInstruction) { -+ __ pop_ptr(T2); -+ __ alsl_d(SSR, SSR, T2, Address::times_4 - 1); -+ __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base -+ -+ __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes()); -+ __ alsl_d(AT, AT, T2, Address::times_4 - 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound -+ -+ __ warn("fastore Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gsswlec1(FSF, SSR, AT); -+ } else { -+ index_check(T2, SSR); -+ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); -+ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); -+ } ++ index_check(T2, SSR); ++ __ alsl_d(T2, SSR, T2, Address::times_4 - 1); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); +} + +// used register T2, T3 +void TemplateTable::dastore() { + transition(dtos, vtos); + __ pop_i (T2); -+ if(UseBoundCheckInstruction) { -+ __ pop_ptr(T3); -+ __ alsl_d(T2, T2, T3, Address::times_8 - 1); -+ __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base -+ -+ __ ld_w(AT, T3, arrayOopDesc::length_offset_in_bytes()); -+ __ alsl_d(AT, AT, T3, Address::times_8 - 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound -+ -+ __ warn("dastore Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gssdlec1(FSF, T2, AT); -+ } else { -+ index_check(T3, T2); -+ __ alsl_d(T3, T2, T3, Address::times_8 - 1); -+ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); -+ } ++ index_check(T3, T2); ++ __ alsl_d(T3, T2, T3, Address::times_8 - 1); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); +} + +// used register : T2, T3, T8 @@ -52971,61 +54388,32 @@ index 0000000000..c259cb69b3 +void TemplateTable::bastore() { + transition(itos, vtos); + __ pop_i(SSR); -+ if(UseBoundCheckInstruction) { -+ guarantee(false, "unimplemented yet!"); -+ __ pop_ptr(T2); -+ __ add_d(SSR, T2, SSR); -+ __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base -+ -+ __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes()); -+ __ add_d(AT, T2, AT); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound -+ -+ __ warn("bastore Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gssble(FSR, SSR, AT); -+ } else { -+ index_check(T2, SSR); ++ index_check(T2, SSR); + -+ // Need to check whether array is boolean or byte -+ // since both types share the bastore bytecode. -+ __ load_klass(T4, T2); -+ __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T4, T2); ++ __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset())); + -+ int diffbit = Klass::layout_helper_boolean_diffbit(); -+ __ li(AT, diffbit); ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ li(AT, diffbit); + -+ Label L_skip; -+ __ andr(AT, T4, AT); -+ __ beq(AT, R0, L_skip); -+ __ andi(FSR, FSR, 0x1); -+ __ bind(L_skip); ++ Label L_skip; ++ __ andr(AT, T4, AT); ++ __ beq(AT, R0, L_skip); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); + -+ __ add_d(SSR, T2, SSR); -+ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); -+ } ++ __ add_d(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); +} + +void TemplateTable::castore() { + transition(itos, vtos); + __ pop_i(SSR); -+ if(UseBoundCheckInstruction) { -+ __ pop_ptr(T2); -+ __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); -+ __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base -+ -+ __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes()); -+ __ alsl_d(AT, AT, T2, Address::times_2 - 1); -+ __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound -+ -+ __ warn("castore Unimplemented yet"); -+ __ stop("LA not implemented yet"); -+ //__ gsshle(FSR, SSR, AT); -+ } else { -+ index_check(T2, SSR); -+ __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); -+ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); -+ } ++ index_check(T2, SSR); ++ __ alsl_d(SSR, SSR, T2, Address::times_2 - 1); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); +} + +void TemplateTable::sastore() { @@ -53756,8 +55144,8 @@ index 0000000000..c259cb69b3 + + // pop the interpreter frame + __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); -+ //FIXME, shall we keep the return address on the stack? -+ __ leave(); // remove frame anchor ++ // remove frame anchor ++ __ leave(); + __ move(LVP, RA); + __ move(SP, A7); + @@ -53765,7 +55153,7 @@ index 0000000000..c259cb69b3 + __ andr(SP , SP , AT); + + // push the (possibly adjusted) return address -+ //refer to osr_entry in c1_LIRAssembler_loongarch.cpp ++ // refer to osr_entry in c1_LIRAssembler_loongarch.cpp + __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset()); + __ jr(AT); + } @@ -54317,9 +55705,6 @@ index 0000000000..c259cb69b3 + size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); + resolve_cache_and_index(byte_no, cache, index, index_size); + -+ //assert(wordSize == 8, "adjust code below"); -+ // note we shift 4 not 2, for we get is the true inde -+ // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version + __ alsl_d(AT, index, cache, Address::times_ptr - 1); + __ ld_d(method, AT, method_offset); + @@ -54353,8 +55738,8 @@ index 0000000000..c259cb69b3 + + // cache entry pointer + __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); -+ __ shl(tmp3, LogBytesPerWord); -+ __ add_d(tmp2, tmp2, tmp3); ++ __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1); ++ + if (is_static) { + __ move(tmp1, R0); + } else { @@ -54363,10 +55748,9 @@ index 0000000000..c259cb69b3 + } + // tmp1: object pointer or NULL + // tmp2: cache entry pointer -+ // tmp3: jvalue object on the stack + __ call_VM(NOREG, CAST_FROM_FN_PTR(address, + InterpreterRuntime::post_field_access), -+ tmp1, tmp2, tmp3); ++ tmp1, tmp2); + __ get_cache_and_index_at_bcp(cache, index, 1); + __ bind(L1); + } @@ -54518,7 +55902,6 @@ index 0000000000..c259cb69b3 + __ li(AT, ltos); + __ bne(flags, AT, notLong); + -+ // FIXME : the load/store should be atomic, we have no simple method to do this in loongarch32 + // ltos + __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); + __ push(ltos); @@ -54636,8 +56019,7 @@ index 0000000000..c259cb69b3 + } + // cache entry pointer + __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset)); -+ __ shl(tmp4, LogBytesPerWord); -+ __ add_d(tmp2, tmp2, tmp4); ++ __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1); + // object (tos) + __ move(tmp3, SP); + // tmp1: object pointer set up above (NULL if static) @@ -55096,7 +56478,6 @@ index 0000000000..c259cb69b3 + __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); + break; + case Bytecodes::_fast_agetfield: -+ //add for compressedoops + do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); + __ verify_oop(FSR); + break; @@ -55368,7 +56749,7 @@ index 0000000000..c259cb69b3 +void TemplateTable::fast_invokevfinal(int byte_no) { + transition(vtos, vtos); + assert(byte_no == f2_byte, "use this argument"); -+ __ stop("fast_invokevfinal not used on loongarch64"); ++ __ stop("fast_invokevfinal not used on LoongArch64"); +} + +// used registers : T0, T1, T2, T3, T1, A7 @@ -55552,7 +56933,6 @@ index 0000000000..c259cb69b3 + transition(vtos, vtos); + assert(byte_no == f1_byte, "use this argument"); + -+ //const Register Rmethod = T2; + const Register T2_callsite = T2; + + prepare_invoke(byte_no, Rmethod, T2_callsite); @@ -55708,7 +57088,7 @@ index 0000000000..c259cb69b3 + __ bne(T1, FSR, loop); // dont clear header + } + -+ //klass in T3, ++ // klass in T3, + // initialize object header only. + __ bind(initialize_header); + if (UseBiasedLocking) { @@ -55748,7 +57128,7 @@ index 0000000000..c259cb69b3 +void TemplateTable::newarray() { + transition(itos, atos); + __ ld_bu(A1, at_bcp(1)); -+ //type, count ++ // type, count + call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); + __ membar(__ StoreStore); +} @@ -55815,7 +57195,6 @@ index 0000000000..c259cb69b3 + __ bind(resolved); + + // get subklass in T2 -+ //add for compressedoops + __ load_klass(T2, FSR); + // Superklass in T3. Subklass in T2. + __ gen_subtype_check(T3, T2, ok_is_subtype); @@ -55838,7 +57217,7 @@ index 0000000000..c259cb69b3 + __ bind(done); +} + -+// i use T3 as cpool, T1 as tags, T2 as index ++// T3 as cpool, T1 as tags, T2 as index +// object always in FSR, superklass in T3, subklass in T2 +void TemplateTable::instanceof() { + transition(atos, itos); @@ -55878,7 +57257,6 @@ index 0000000000..c259cb69b3 + + __ bind(resolved); + // get subklass in T2 -+ //add for compressedoops + __ load_klass(T2, FSR); + + // Superklass in T3. Subklass in T2. @@ -56101,13 +57479,13 @@ index 0000000000..c259cb69b3 +#endif // !CC_INTERP diff --git a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp new file mode 100644 -index 0000000000..4a25cd760b +index 0000000000..5b9f7b7898 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp -@@ -0,0 +1,68 @@ +@@ -0,0 +1,61 @@ +/* + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -56130,22 +57508,15 @@ index 0000000000..4a25cd760b + * + */ + -+#ifndef CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP + +// These are the CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is +// referenced by vmStructs.cpp. + +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ -+ \ -+ /******************************/ \ -+ /* JavaCallWrapper */ \ -+ /******************************/ \ -+ /******************************/ \ -+ /* JavaFrameAnchor */ \ -+ /******************************/ \ -+ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ + \ + + /* NOTE that we do not use the last_entry() macro here; it is used */ @@ -56172,7 +57543,7 @@ index 0000000000..4a25cd760b + /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ + /* be present there) */ + -+#endif // CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp new file mode 100644 index 0000000000..eb8f075c71 @@ -56266,13 +57637,13 @@ index 0000000000..eb8f075c71 +} diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp new file mode 100644 -index 0000000000..9776f1b056 +index 0000000000..1a93123134 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -56295,8 +57666,8 @@ index 0000000000..9776f1b056 + * + */ + -+#ifndef CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP + +#include "runtime/vm_version.hpp" +#include "utilities/macros.hpp" @@ -56323,13 +57694,13 @@ index 0000000000..9776f1b056 + static void initialize_cpu_information(void); +}; + -+#endif // CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp new file mode 100644 -index 0000000000..6817f9ce8b +index 0000000000..b954fa3e1a --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp -@@ -0,0 +1,437 @@ +@@ -0,0 +1,414 @@ +/* + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -56565,33 +57936,6 @@ index 0000000000..6817f9ce8b + } +#endif + -+ if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { -+ if (FLAG_IS_DEFAULT(UseSyncLevel)) { -+ FLAG_SET_DEFAULT(UseSyncLevel, 1000); -+ } -+ } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { -+ if (FLAG_IS_DEFAULT(UseSyncLevel)) { -+ FLAG_SET_DEFAULT(UseSyncLevel, 2000); -+ } -+ } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { -+ if (FLAG_IS_DEFAULT(UseSyncLevel)) { -+ FLAG_SET_DEFAULT(UseSyncLevel, 3000); -+ } -+ } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { -+ if (FLAG_IS_DEFAULT(UseSyncLevel)) { -+ FLAG_SET_DEFAULT(UseSyncLevel, 4000); -+ } -+ } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { -+ if (FLAG_IS_DEFAULT(UseSyncLevel)) { -+ FLAG_SET_DEFAULT(UseSyncLevel, 10000); -+ } -+ } else { -+ assert(false, "Should Not Reach Here, what is the cpu type?"); -+ if (FLAG_IS_DEFAULT(UseSyncLevel)) { -+ FLAG_SET_DEFAULT(UseSyncLevel, 10000); -+ } -+ } -+ + char buf[256]; + + // A note on the _features_string format: @@ -56603,8 +57947,7 @@ index 0000000000..6817f9ce8b + // Furthermore, use one, and only one, separator space between features. + // Multiple spaces are considered separate tokens, messing up everything. + jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, " -+ "0x%lx, fp_ver: %d, lvz_ver: %d, " -+ "usesynclevel:%d", ++ "0x%lx, fp_ver: %d, lvz_ver: %d, ", + (is_la64() ? "la64" : ""), + (is_la32() ? "la32" : ""), + (supports_lsx() ? ", lsx" : ""), @@ -56623,12 +57966,11 @@ index 0000000000..6817f9ce8b + (needs_ulsync() ? ", needs_ulsync": ""), + _cpuid_info.cpucfg_info_id0.bits.PRID, + _cpuid_info.cpucfg_info_id2.bits.FP_VER, -+ _cpuid_info.cpucfg_info_id2.bits.LVZ_VER, -+ UseSyncLevel); ++ _cpuid_info.cpucfg_info_id2.bits.LVZ_VER); + _features_str = strdup(buf); + + assert(!is_la32(), "Should Not Reach Here, what is the cpu type?"); -+ assert( is_la64(), "Should be loongarch64"); ++ assert( is_la64(), "Should be LoongArch64"); + + if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { + FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); @@ -56729,12 +58071,18 @@ index 0000000000..6817f9ce8b + } + } + ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) { ++ FLAG_SET_DEFAULT(UseMulAddIntrinsic, true); ++ } ++ + if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { + UseMontgomeryMultiplyIntrinsic = true; + } + if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { + UseMontgomerySquareIntrinsic = true; + } ++#endif + + // This machine allows unaligned memory accesses + if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) { @@ -56769,10 +58117,10 @@ index 0000000000..6817f9ce8b +} diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp new file mode 100644 -index 0000000000..2cb8ec7fd3 +index 0000000000..8b5bc4a4c8 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp -@@ -0,0 +1,290 @@ +@@ -0,0 +1,292 @@ +/* + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. @@ -56798,14 +58146,16 @@ index 0000000000..2cb8ec7fd3 + * + */ + -+#ifndef CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP ++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP + +#include "runtime/abstract_vm_version.hpp" +#include "runtime/globals_extension.hpp" +#include "utilities/sizes.hpp" + +class VM_Version: public Abstract_VM_Version { ++ friend class JVMCIVMStructs; ++ +public: + + union LoongArch_Cpucfg_Id0 { @@ -57062,16 +58412,16 @@ index 0000000000..2cb8ec7fd3 + static const char* cpu_features() { return _features_str; } +}; + -+#endif // CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp new file mode 100644 -index 0000000000..b606d8550c +index 0000000000..43caba5187 --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp -@@ -0,0 +1,51 @@ +@@ -0,0 +1,53 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -57104,15 +58454,17 @@ index 0000000000..b606d8550c + Register reg = ::as_Register(0); + int i; + for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { -+ regName[i++] = reg->name(); -+ regName[i++] = reg->name(); ++ for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = reg->name(); ++ } + reg = reg->successor(); + } + + FloatRegister freg = ::as_FloatRegister(0); + for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { -+ regName[i++] = freg->name(); -+ regName[i++] = freg->name(); ++ for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) { ++ regName[i++] = freg->name(); ++ } + freg = freg->successor(); + } + @@ -57122,7 +58474,7 @@ index 0000000000..b606d8550c +} diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp new file mode 100644 -index 0000000000..2377eed4f6 +index 0000000000..819eaff0bb --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp @@ -0,0 +1,58 @@ @@ -57151,8 +58503,8 @@ index 0000000000..2377eed4f6 + * + */ + -+#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP -+#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP + +inline bool is_Register() { + return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; @@ -57160,7 +58512,7 @@ index 0000000000..2377eed4f6 + +inline Register as_Register() { + assert( is_Register(), "must be"); -+ return ::as_Register(value() >> 1); ++ return ::as_Register(value() / RegisterImpl::max_slots_per_register); +} + +inline bool is_FloatRegister() { @@ -57168,9 +58520,9 @@ index 0000000000..2377eed4f6 +} + +inline FloatRegister as_FloatRegister() { -+ assert( is_FloatRegister(), "must be" ); -+ assert( is_even(value()), "must be" ); -+ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++ assert( is_FloatRegister() && is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) / ++ FloatRegisterImpl::max_slots_per_register); +} + +inline bool is_concrete() { @@ -57183,13 +58535,13 @@ index 0000000000..2377eed4f6 + } +} + -+#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp new file mode 100644 -index 0000000000..a752e2c80d +index 0000000000..edb78e36da --- /dev/null +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp -@@ -0,0 +1,38 @@ +@@ -0,0 +1,39 @@ +/* + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -57215,25 +58567,26 @@ index 0000000000..a752e2c80d + * + */ + -+#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP -+#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP ++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP ++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP + +inline VMReg RegisterImpl::as_VMReg() { + if( this==noreg ) return VMRegImpl::Bad(); -+ return VMRegImpl::as_VMReg(encoding() << 1 ); ++ return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register); +} + +inline VMReg FloatRegisterImpl::as_VMReg() { -+ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++ return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) + ++ ConcreteRegisterImpl::max_gpr); +} + -+#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP ++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP diff --git a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp new file mode 100644 -index 0000000000..e75f7c4f94 +index 0000000000..2c4b60653b --- /dev/null +++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp -@@ -0,0 +1,323 @@ +@@ -0,0 +1,322 @@ +/* + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -57338,7 +58691,6 @@ index 0000000000..e75f7c4f94 + + // get receiver klass + address npe_addr = __ pc(); -+ //add for compressedoops + __ load_klass(t1, T0); + +#ifndef PRODUCT @@ -57557,432 +58909,45807 @@ index 0000000000..e75f7c4f94 + const unsigned int icache_line_size = wordSize; + return icache_line_size; +} -diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -index 847f7d61d2..f570946090 100644 ---- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp -@@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { - } - } - -+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { -+ ShouldNotReachHere(); -+} - - void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { - Bytecodes::Code code = op->bytecode(); -@@ -1608,6 +1611,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L - __ bind(skip); - } - -+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { -+ ShouldNotReachHere(); -+} -+ - - void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, - CodeEmitInfo* info, bool pop_fpu_stack) { -diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp -index d34ea45c0b..f6b6dbdee3 100644 ---- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp -+++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp -@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { - __ move(temp, addr); - } - -- --void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { -+template -+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { - LIR_Opr tmp = FrameMap::R0_opr; - __ load(new LIR_Address(base, disp, T_INT), tmp, info); -- __ cmp(condition, tmp, c); -+ __ cmp_branch(condition, tmp, c, T_INT, tgt); - } - -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); - --void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, -- int disp, BasicType type, CodeEmitInfo* info) { -+template -+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { - LIR_Opr tmp = FrameMap::R0_opr; - __ load(new LIR_Address(base, disp, type), tmp, info); -- __ cmp(condition, reg, tmp); -+ __ cmp_branch(condition, reg, tmp, type, tgt); - } - -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); - - bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { - assert(left != result, "should be different registers"); -diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp -index ef9b0833d3..c6b25bf10e 100644 ---- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp -+++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp -@@ -62,3 +62,24 @@ void LIR_Address::verify() const { - #endif - } - #endif // PRODUCT -+ -+template -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { -+ cmp(condition, left, right, info); -+ branch(condition, type, tgt); -+} -+ -+// Explicit instantiation for all supported types. -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); -+ -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { -+ cmp(condition, left, right); -+ branch(condition, type, block, unordered); -+} -+ -+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { -+ cmp(condition, left, right); -+ cmove(condition, src1, src2, dst, type); -+} -diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -index 897be2209e..0c27cc20f3 100644 ---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp -@@ -379,6 +379,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { - } - } - -+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { -+ ShouldNotReachHere(); -+} - - void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { - LIR_Opr src = op->in_opr(); -@@ -1503,6 +1506,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L - } - } - -+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { -+ ShouldNotReachHere(); -+} -+ - void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, - CodeEmitInfo* info, bool pop_fpu_stack) { - assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); -diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp -index ae297ac635..c786803e0f 100644 ---- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp -+++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp -@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { - __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); - } - --void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { -+template -+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { - LIR_Opr scratch = FrameMap::Z_R1_opr; - __ load(new LIR_Address(base, disp, T_INT), scratch, info); -- __ cmp(condition, scratch, c); -+ __ cmp_branch(condition, scratch, c, T_INT, tgt); - } - --void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); -+ -+template -+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { - __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); -+ __ branch(condition, type, tgt); - } - -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); -+ - bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { - if (tmp->is_valid()) { - if (is_power_of_2(c + 1)) { -diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp -index 9507ca0856..2116e9af2b 100644 ---- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp -+++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp -@@ -56,3 +56,23 @@ void LIR_Address::verify() const { - } - #endif // PRODUCT - -+template -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { -+ cmp(condition, left, right, info); -+ branch(condition, type, tgt); -+} -+ -+// Explicit instantiation for all supported types. -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); -+ -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { -+ cmp(condition, left, right); -+ branch(condition, type, block, unordered); -+} -+ -+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { -+ cmp(condition, left, right); -+ cmove(condition, src1, src2, dst, type); -+} -diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp -index e503159eb7..2e5609fec8 100644 ---- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp -+++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp -@@ -599,6 +599,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { - // The peephole pass fills the delay slot - } - -+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { -+ ShouldNotReachHere(); -+} - - void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { - Bytecodes::Code code = op->bytecode(); -@@ -1638,6 +1641,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L - __ bind(skip); - } - -+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { -+ ShouldNotReachHere(); -+} - - void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { - assert(info == NULL, "unused on this code path"); -diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp -index a09a159722..a02ffafc77 100644 ---- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp -+++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp -@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { - __ move(temp, addr); - } - --void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { -+template -+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { - LIR_Opr o7opr = FrameMap::O7_opr; - __ load(new LIR_Address(base, disp, T_INT), o7opr, info); -- __ cmp(condition, o7opr, c); -+ __ cmp_branch(condition, o7opr, c, T_INT, tgt); - } - -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); - --void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { -+template -+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { - LIR_Opr o7opr = FrameMap::O7_opr; - __ load(new LIR_Address(base, disp, type), o7opr, info); -- __ cmp(condition, reg, o7opr); -+ __ cmp_branch(condition, reg, o7opr, type, tgt); - } - -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); - - bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { - assert(left != result, "should be different registers"); -diff --git a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp -index c21d2c1d9a..9cebb387e2 100644 ---- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp -+++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp -@@ -54,3 +54,24 @@ void LIR_Address::verify() const { - "wrong type for addresses"); - } - #endif // PRODUCT -+ -+template -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { -+ cmp(condition, left, right, info); -+ branch(condition, type, tgt); -+} -+ -+// Explicit instantiation for all supported types. -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); -+ -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { -+ cmp(condition, left, right); -+ branch(condition, type, block, unordered); -+} -+ -+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { -+ cmp(condition, left, right); -+ cmove(condition, src1, src2, dst, type); -+} -diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -index cee3140f4f..7b76eb0b9e 100644 ---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp -@@ -1442,6 +1442,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { - } - } - -+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { -+ ShouldNotReachHere(); -+} -+ - void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { - LIR_Opr src = op->in_opr(); - LIR_Opr dest = op->result_opr(); -@@ -2030,6 +2034,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L - } - } - -+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { -+ ShouldNotReachHere(); -+} - - void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { - assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); -diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp -index 905708a9fa..1c6774e1d6 100644 ---- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp -+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp -@@ -255,15 +255,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { - __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); - } - --void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { -+template -+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { - __ cmp_mem_int(condition, base, disp, c, info); -+ __ branch(condition, T_INT, tgt); - } - -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); - --void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { -+template -+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { - __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); -+ __ branch(condition, type, tgt); - } - -+// Explicit instantiation for all supported types. -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); -+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); - - bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { - if (tmp->is_valid() && c > 0 && c < max_jint) { -diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp -index 92277ee063..20e283e302 100644 ---- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp -+++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp -@@ -72,3 +72,24 @@ void LIR_Address::verify() const { - #endif - } - #endif // PRODUCT -+ -+template -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { -+ cmp(condition, left, right, info); -+ branch(condition, type, tgt); -+} -+ -+// Explicit instantiation for all supported types. -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); -+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); -+ -+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { -+ cmp(condition, left, right); -+ branch(condition, type, block, unordered); -+} -+ -+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { -+ cmp(condition, left, right); -+ cmove(condition, src1, src2, dst, type); -+} -diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -index d2290a6706..3e88d609b5 100644 ---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp -@@ -261,7 +261,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, - #define __ ce->masm()-> - - void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, -- LIR_Opr ref) const { -+ LIR_Opr ref, -+ LIR_Opr res) const { - __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread)); - } - -diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp -index 3687754e71..791e4ed43f 100644 ---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp -+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp -@@ -77,7 +77,8 @@ public: - - #ifdef COMPILER1 - void generate_c1_load_barrier_test(LIR_Assembler* ce, -- LIR_Opr ref) const; -+ LIR_Opr ref, -+ LIR_Opr res) const; - - void generate_c1_load_barrier_stub(LIR_Assembler* ce, - ZLoadBarrierStubC1* stub) const; -diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp -index 476b1c2175..006e29d0a2 100644 ---- a/src/hotspot/os/linux/os_linux.cpp -+++ b/src/hotspot/os/linux/os_linux.cpp -@@ -22,6 +22,12 @@ - * - */ - -+/* -+ * This file has been modified by Loongson Technology in 2021. These -+ * modifications are Copyright (c) 2021 Loongson Technology, and are made -+ * available on the same license terms set forth above. -+ */ -+ - // no precompiled headers - #include "jvm.h" - #include "classfile/classLoader.hpp" -@@ -3837,6 +3843,8 @@ size_t os::Linux::find_large_page_size() { - IA64_ONLY(256 * M) - PPC_ONLY(4 * M) - S390_ONLY(1 * M) -+ MIPS64_ONLY(4 * M) -+ LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA - SPARC_ONLY(4 * M); - #endif // ZERO - -diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +diff --git a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp new file mode 100644 -index 0000000000..30719a0340 +index 0000000000..73f021c9b7 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp -@@ -0,0 +1,24 @@ ++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp +@@ -0,0 +1,132 @@ +/* -+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "ci/ciMethod.hpp" ++#include "interpreter/interpreter.hpp" ++#include "runtime/frame.inline.hpp" ++ ++// asm based interpreter deoptimization helpers ++int AbstractInterpreter::size_activation(int max_stack, ++ int temps, ++ int extra_args, ++ int monitors, ++ int callee_params, ++ int callee_locals, ++ bool is_top_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ ++ // fixed size of an interpreter frame: ++ int overhead = frame::sender_sp_offset - ++ frame::interpreter_frame_initial_sp_offset; ++ // Our locals were accounted for by the caller (or last_frame_adjust ++ // on the transistion) Since the callee parameters already account ++ // for the callee's params we only need to account for the extra ++ // locals. ++ int size = overhead + ++ (callee_locals - callee_params)*Interpreter::stackElementWords + ++ monitors * frame::interpreter_frame_monitor_size() + ++ temps* Interpreter::stackElementWords + extra_args; ++ ++ return size; ++} ++ ++// How much stack a method activation needs in words. ++int AbstractInterpreter::size_top_interpreter_activation(Method* method) { ++ ++ const int entry_size = frame::interpreter_frame_monitor_size(); ++ ++ // total overhead size: entry_size + (saved ebp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size; ++ ++ const int stub_code = 6; // see generate_call_stub ++ // return overhead_size + method->max_locals() + method->max_stack() + stub_code; ++ const int method_stack = (method->max_locals() + method->max_stack()) * ++ Interpreter::stackElementWords; ++ return overhead_size + method_stack + stub_code; ++} ++ ++void AbstractInterpreter::layout_activation(Method* method, ++ int tempcount, ++ int popframe_extra_args, ++ int moncount, ++ int caller_actual_parameters, ++ int callee_param_count, ++ int callee_locals, ++ frame* caller, ++ frame* interpreter_frame, ++ bool is_top_frame, ++ bool is_bottom_frame) { ++ // Note: This calculation must exactly parallel the frame setup ++ // in AbstractInterpreterGenerator::generate_method_entry. ++ // If interpreter_frame!=NULL, set up the method, locals, and monitors. ++ // The frame interpreter_frame, if not NULL, is guaranteed to be the ++ // right size, as determined by a previous call to this method. ++ // It is also guaranteed to be walkable even though it is in a skeletal state ++ ++ // fixed size of an interpreter frame: ++ ++ int max_locals = method->max_locals() * Interpreter::stackElementWords; ++ int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords; ++ ++#ifdef ASSERT ++ assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)"); ++#endif ++ ++ interpreter_frame->interpreter_frame_set_method(method); ++ // NOTE the difference in using sender_sp and interpreter_frame_sender_sp ++ // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp) ++ // and sender_sp is fp+8 ++ intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1; ++ ++#ifdef ASSERT ++ if (caller->is_interpreted_frame()) { ++ assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement"); ++ } ++#endif ++ ++ interpreter_frame->interpreter_frame_set_locals(locals); ++ BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin(); ++ BasicObjectLock* monbot = montop - moncount; ++ interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount); ++ ++ //set last sp; ++ intptr_t* esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords - ++ popframe_extra_args; ++ interpreter_frame->interpreter_frame_set_last_sp(esp); ++ // All frames but the initial interpreter frame we fill in have a ++ // value for sender_sp that allows walking the stack but isn't ++ // truly correct. Correct the value here. ++ // ++ if (extra_locals != 0 && ++ interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) { ++ interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals); ++ } ++ *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache(); ++ *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror(); ++} ++ +diff --git a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp +new file mode 100644 +index 0000000000..5f02077d0e +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.cpp +@@ -0,0 +1,733 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) block_comment(str) ++#define STOP(error) block_comment(error); stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++// Implementation of AddressLiteral ++ ++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) { ++ _is_lval = false; ++ _target = target; ++ _rspec = rspec_from_rtype(rtype, target); ++} ++ ++// Implementation of Address ++ ++Address Address::make_array(ArrayAddress adr) { ++ AddressLiteral base = adr.base(); ++ Address index = adr.index(); ++ assert(index._disp == 0, "must not have disp"); // maybe it can? ++ Address array(index._base, index._index, index._scale, (intptr_t) base.target()); ++ array._rspec = base._rspec; ++ return array; ++} ++ ++// exceedingly dangerous constructor ++Address::Address(address loc, RelocationHolder spec) { ++ _base = noreg; ++ _index = noreg; ++ _scale = no_scale; ++ _disp = (intptr_t) loc; ++ _rspec = spec; ++} ++ ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of Assembler ++const char *Assembler::ops_name[] = { ++ "special", "regimm", "j", "jal", "beq", "bne", "blez", "bgtz", ++ "addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui", ++ "cop0", "cop1", "cop2", "cop3", "beql", "bnel", "bleql", "bgtzl", ++ "daddi", "daddiu", "ldl", "ldr", "", "", "", "", ++ "lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr", "lwu", ++ "sb", "sh", "swl", "sw", "sdl", "sdr", "swr", "cache", ++ "ll", "lwc1", "", "", "lld", "ldc1", "", "ld", ++ "sc", "swc1", "", "", "scd", "sdc1", "", "sd" ++}; ++ ++const char* Assembler::special_name[] = { ++ "sll", "", "srl", "sra", "sllv", "", "srlv", "srav", ++ "jr", "jalr", "movz", "movn", "syscall", "break", "", "sync", ++ "mfhi", "mthi", "mflo", "mtlo", "dsll", "", "dsrl", "dsra", ++ "mult", "multu", "div", "divu", "dmult", "dmultu", "ddiv", "ddivu", ++ "add", "addu", "sub", "subu", "and", "or", "xor", "nor", ++ "", "", "slt", "sltu", "dadd", "daddu", "dsub", "dsubu", ++ "tge", "tgeu", "tlt", "tltu", "teq", "", "tne", "", ++ "dsll", "", "dsrl", "dsra", "dsll32", "", "dsrl32", "dsra32" ++}; ++ ++const char* Assembler::cop1_name[] = { ++ "add", "sub", "mul", "div", "sqrt", "abs", "mov", "neg", ++ "round.l", "trunc.l", "ceil.l", "floor.l", "round.w", "trunc.w", "ceil.w", "floor.w", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "c.f", "c.un", "c.eq", "c.ueq", "c.olt", "c.ult", "c.ole", "c.ule", ++ "c.sf", "c.ngle", "c.seq", "c.ngl", "c.lt", "c.nge", "c.le", "c.ngt" ++}; ++ ++const char* Assembler::cop1x_name[] = { ++ "lwxc1", "ldxc1", "", "", "", "luxc1", "", "", ++ "swxc1", "sdxc1", "", "", "", "suxc1", "", "prefx", ++ "", "", "", "", "", "", "alnv.ps", "", ++ "", "", "", "", "", "", "", "", ++ "madd.s", "madd.d", "", "", "", "", "madd.ps", "", ++ "msub.s", "msub.d", "", "", "", "", "msub.ps", "", ++ "nmadd.s", "nmadd.d", "", "", "", "", "nmadd.ps", "", ++ "nmsub.s", "nmsub.d", "", "", "", "", "nmsub.ps", "" ++}; ++ ++const char* Assembler::special2_name[] = { ++ "madd", "", "mul", "", "msub", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "gsdmult", "", "", "gsdiv", "gsddiv", "", "", ++ "", "", "", "", "gsmod", "gsdmod", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "" ++}; ++ ++const char* Assembler::special3_name[] = { ++ "ext", "", "", "", "ins", "dinsm", "dinsu", "dins", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "bshfl", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++}; ++ ++const char* Assembler::regimm_name[] = { ++ "bltz", "bgez", "bltzl", "bgezl", "", "", "", "", ++ "tgei", "tgeiu", "tlti", "tltiu", "teqi", "", "tnei", "", ++ "bltzal", "bgezal", "bltzall", "bgezall" ++}; ++ ++const char* Assembler::gs_ldc2_name[] = { ++ "gslbx", "gslhx", "gslwx", "gsldx", "", "", "gslwxc1", "gsldxc1" ++}; ++ ++ ++const char* Assembler::gs_lwc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gslble", "gslbgt", "gslhle", "gslhgt", "gslwle", "gslwgt", "gsldle", "gsldgt", ++ "", "", "", "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/ ++ "gslq", "" ++}; ++ ++const char* Assembler::gs_sdc2_name[] = { ++ "gssbx", "gsshx", "gsswx", "gssdx", "", "", "gsswxc1", "gssdxc1" ++}; ++ ++const char* Assembler::gs_swc2_name[] = { ++ "", "", "", "", "", "", "", "", ++ "", "", "", "", "", "", "", "", ++ "gssble", "gssbgt", "gsshle", "gsshgt", "gsswle", "gsswgt", "gssdle", "gssdgt", ++ "", "", "", "", "gsswlec1", "gsswgtc1", "gssdlec1", "gssdgtc1", ++ "gssq", "" ++}; ++ ++//misleading name, print only branch/jump instruction ++void Assembler::print_instruction(int inst) { ++ const char *s; ++ switch( opcode(inst) ) { ++ default: ++ s = ops_name[opcode(inst)]; ++ break; ++ case special_op: ++ s = special_name[special(inst)]; ++ break; ++ case regimm_op: ++ s = special_name[rt(inst)]; ++ break; ++ } ++ ++ ::tty->print("%s", s); ++} ++ ++int Assembler::is_int_mask(int x) { ++ int xx = x; ++ int count = 0; ++ ++ while (x != 0) { ++ x &= (x - 1); ++ count++; ++ } ++ ++ if ((1<>2; ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ case lui_op: ++ case ori_op: ++ case daddiu_op: ++ ShouldNotReachHere(); ++ break; ++ default: ++ assert(is_simm16(v), "must be simm16"); ++#ifndef PRODUCT ++ if (!is_simm16(v)) { ++ tty->print_cr("must be simm16"); ++ tty->print_cr("Inst: %x", inst); ++ } ++#endif ++ ++ v = low16(v); ++ inst &= 0xffff0000; ++ break; ++ } ++ ++ return inst | v; ++} ++ ++int Assembler::branch_destination(int inst, int pos) { ++ int off = 0; ++ ++ switch(opcode(inst)) { ++ case j_op: ++ case jal_op: ++ assert(false, "should not use j/jal here"); ++ break; ++ default: ++ off = expand(low16(inst), 15); ++ break; ++ } ++ ++ return off ? pos + 4 + (off<<2) : 0; ++} ++ ++int AbstractAssembler::code_fill_byte() { ++ return 0x00; // illegal instruction 0x00000000 ++} ++ ++// Now the Assembler instruction (identical for 32/64 bits) ++ ++void Assembler::lb(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lb(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lbu(Register rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lbu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ld(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsldx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gsldx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ ld(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsldx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ ld(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gsldx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ ld(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ ld(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsldx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ ld(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::ldl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lh(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lh(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lhu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lhu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ll(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ll(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lld(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lld(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lw(Register rt, Address dst){ ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (Assembler::is_simm16(disp)) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gslwx(src, base, index, disp); ++ } else { ++ dsll(AT, index, scale); ++ gslwx(src, base, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ lw(src, AT, disp); ++ } ++ } else { ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gslwx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ lw(src, AT, 0); ++ } ++ } else { ++ assert_different_registers(src, AT); ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(src, split_low(disp >> 16)); ++ if (split_low(disp)) ori(src, src, split_low(disp)); ++ if (UseLEXT1) { ++ gslwx(src, AT, src, 0); ++ } else { ++ daddu(AT, AT, src); ++ lw(src, AT, 0); ++ } ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ lw(src, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gslwx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ lw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::lea(Register rt, Address src) { ++ Register dst = rt; ++ Register base = src.base(); ++ Register index = src.index(); ++ ++ int scale = src.scale(); ++ int disp = src.disp(); ++ ++ if (index == noreg) { ++ if (is_simm16(disp)) { ++ daddiu(dst, base, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(dst, base, AT); ++ } ++ } else { ++ if (scale == 0) { ++ if (is_simm16(disp)) { ++ daddu(AT, base, index); ++ daddiu(dst, AT, disp); ++ } else { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, base, AT); ++ daddu(dst, AT, index); ++ } ++ } else { ++ if (is_simm16(disp)) { ++ dsll(AT, index, scale); ++ daddu(AT, AT, base); ++ daddiu(dst, AT, disp); ++ } else { ++ assert_different_registers(dst, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ dsll(dst, index, scale); ++ daddu(dst, dst, AT); ++ } ++ } ++ } ++} ++ ++void Assembler::lwl(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwl(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwr(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwr(rt, src.base(), src.disp()); ++} ++ ++void Assembler::lwu(Register rt, Address src){ ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwu(rt, src.base(), src.disp()); ++} ++ ++void Assembler::sb(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sb(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sc(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sc(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::scd(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ scd(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sd(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if (is_simm16(disp)) { ++ if ( UseLEXT1 && is_simm(disp, 8)) { ++ if (scale == 0) { ++ gssdx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gssdx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sd(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gssdx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sd(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sd(src, AT, 0); ++ } ++ } ++ } else { ++ if (is_simm16(disp)) { ++ sd(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gssdx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sd(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::sdl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sh(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sh(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sw(Register rt, Address dst) { ++ Register src = rt; ++ Register base = dst.base(); ++ Register index = dst.index(); ++ ++ int scale = dst.scale(); ++ int disp = dst.disp(); ++ ++ if (index != noreg) { ++ if ( Assembler::is_simm16(disp) ) { ++ if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) { ++ if (scale == 0) { ++ gsswx(src, base, index, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ dsll(AT, index, scale); ++ gsswx(src, base, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ daddu(AT, base, index); ++ } else { ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ } ++ sw(src, AT, disp); ++ } ++ } else { ++ assert_different_registers(rt, AT); ++ if (scale == 0) { ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ daddu(AT, AT, base); ++ if (UseLEXT1) { ++ gsswx(src, AT, index, 0); ++ } else { ++ daddu(AT, AT, index); ++ sw(src, AT, 0); ++ } ++ } else { ++ daddiu(SP, SP, -wordSize); ++ sd(T9, SP, 0); ++ ++ dsll(AT, index, scale); ++ daddu(AT, base, AT); ++ lui(T9, split_low(disp >> 16)); ++ if (split_low(disp)) ori(T9, T9, split_low(disp)); ++ daddu(AT, AT, T9); ++ ld(T9, SP, 0); ++ daddiu(SP, SP, wordSize); ++ sw(src, AT, 0); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ sw(src, base, disp); ++ } else { ++ assert_different_registers(rt, AT); ++ lui(AT, split_low(disp >> 16)); ++ if (split_low(disp)) ori(AT, AT, split_low(disp)); ++ ++ if (UseLEXT1) { ++ gsswx(src, base, AT, 0); ++ } else { ++ daddu(AT, base, AT); ++ sw(src, AT, 0); ++ } ++ } ++ } ++} ++ ++void Assembler::swl(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swl(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::swr(Register rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swr(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::lwc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ lwc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::ldc1(FloatRegister rt, Address src) { ++ assert(src.index() == NOREG, "index is unimplemented"); ++ ldc1(rt, src.base(), src.disp()); ++} ++ ++void Assembler::swc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ swc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::sdc1(FloatRegister rt, Address dst) { ++ assert(dst.index() == NOREG, "index is unimplemented"); ++ sdc1(rt, dst.base(), dst.disp()); ++} ++ ++void Assembler::j(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((j_op<<26) | dest); ++ has_delay_slot(); ++} ++ ++void Assembler::jal(address entry) { ++ int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2; ++ emit_long((jal_op<<26) | dest); ++ has_delay_slot(); ++} +diff --git a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp +new file mode 100644 +index 0000000000..7ef33cf592 +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.hpp +@@ -0,0 +1,1792 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "runtime/vm_version.hpp" ++ ++class BiasedLockingCounters; ++ ++ ++// Note: A register location is represented via a Register, not ++// via an address for efficiency & simplicity reasons. ++ ++class ArrayAddress; ++ ++class Address { ++ public: ++ enum ScaleFactor { ++ no_scale = -1, ++ times_1 = 0, ++ times_2 = 1, ++ times_4 = 2, ++ times_8 = 3, ++ times_ptr = times_8 ++ }; ++ static ScaleFactor times(int size) { ++ assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size"); ++ if (size == 8) return times_8; ++ if (size == 4) return times_4; ++ if (size == 2) return times_2; ++ return times_1; ++ } ++ ++ private: ++ Register _base; ++ Register _index; ++ ScaleFactor _scale; ++ int _disp; ++ RelocationHolder _rspec; ++ ++ // Easily misused constructors make them private ++ Address(address loc, RelocationHolder spec); ++ Address(int disp, address loc, relocInfo::relocType rtype); ++ Address(int disp, address loc, RelocationHolder spec); ++ ++ public: ++ ++ // creation ++ Address() ++ : _base(noreg), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(0) { ++ } ++ ++ // No default displacement otherwise Register can be implicitly ++ // converted to 0(Register) which is quite a different animal. ++ ++ Address(Register base, int disp = 0) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(disp) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, int disp = 0) ++ : _base (base), ++ _index(index), ++ _scale(scale), ++ _disp (disp) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++ ++ // The following two overloads are used in connection with the ++ // ByteSize type (see sizes.hpp). They simplify the use of ++ // ByteSize'd arguments in assembly code. Note that their equivalent ++ // for the optimized build are the member functions with int disp ++ // argument since ByteSize is mapped to an int type in that case. ++ // ++ // Note: DO NOT introduce similar overloaded functions for WordSize ++ // arguments as in the optimized mode, both ByteSize and WordSize ++ // are mapped to the same type and thus the compiler cannot make a ++ // distinction anymore (=> compiler errors). ++ ++#ifdef ASSERT ++ Address(Register base, ByteSize disp) ++ : _base(base), ++ _index(noreg), ++ _scale(no_scale), ++ _disp(in_bytes(disp)) { ++ assert_different_registers(_base, AT); ++ } ++ ++ Address(Register base, Register index, ScaleFactor scale, ByteSize disp) ++ : _base(base), ++ _index(index), ++ _scale(scale), ++ _disp(in_bytes(disp)) { ++ assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address"); ++ assert_different_registers(_base, _index, AT); ++ } ++#endif // ASSERT ++ ++ // accessors ++ bool uses(Register reg) const { return _base == reg || _index == reg; } ++ Register base() const { return _base; } ++ Register index() const { return _index; } ++ ScaleFactor scale() const { return _scale; } ++ int disp() const { return _disp; } ++ ++ static Address make_array(ArrayAddress); ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class LIR_Assembler; // base/index/scale/disp ++}; ++ ++// Calling convention ++class Argument { ++ private: ++ int _number; ++ public: ++ enum { ++ n_register_parameters = 8, // 8 integer registers used to pass parameters ++ n_float_register_parameters = 8 // 8 float registers used to pass parameters ++ }; ++ ++ Argument(int number):_number(number){ } ++ Argument successor() {return Argument(number() + 1);} ++ ++ int number()const {return _number;} ++ bool is_Register()const {return _number < n_register_parameters;} ++ bool is_FloatRegister()const {return _number < n_float_register_parameters;} ++ ++ Register as_Register()const { ++ assert(is_Register(), "must be a register argument"); ++ return ::as_Register(A0->encoding() + _number); ++ } ++ FloatRegister as_FloatRegister()const { ++ assert(is_FloatRegister(), "must be a float register argument"); ++ return ::as_FloatRegister(F12->encoding() + _number); ++ } ++ ++ Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);} ++}; ++ ++// ++// AddressLiteral has been split out from Address because operands of this type ++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out ++// the few instructions that need to deal with address literals are unique and the ++// MacroAssembler does not have to implement every instruction in the Assembler ++// in order to search for address literals that may need special handling depending ++// on the instruction and the platform. As small step on the way to merging i486/amd64 ++// directories. ++// ++class AddressLiteral { ++ friend class ArrayAddress; ++ RelocationHolder _rspec; ++ // Typically we use AddressLiterals we want to use their rval ++ // However in some situations we want the lval (effect address) of the item. ++ // We provide a special factory for making those lvals. ++ bool _is_lval; ++ ++ // If the target is far we'll need to load the ea of this to ++ // a register to reach it. Otherwise if near we can do rip ++ // relative addressing. ++ ++ address _target; ++ ++ protected: ++ // creation ++ AddressLiteral() ++ : _is_lval(false), ++ _target(NULL) ++ {} ++ ++ public: ++ ++ ++ AddressLiteral(address target, relocInfo::relocType rtype); ++ ++ AddressLiteral(address target, RelocationHolder const& rspec) ++ : _rspec(rspec), ++ _is_lval(false), ++ _target(target) ++ {} ++ ++ AddressLiteral addr() { ++ AddressLiteral ret = *this; ++ ret._is_lval = true; ++ return ret; ++ } ++ ++ ++ private: ++ ++ address target() { return _target; } ++ bool is_lval() { return _is_lval; } ++ ++ relocInfo::relocType reloc() const { return _rspec.type(); } ++ const RelocationHolder& rspec() const { return _rspec; } ++ ++ friend class Assembler; ++ friend class MacroAssembler; ++ friend class Address; ++ friend class LIR_Assembler; ++ RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) { ++ switch (rtype) { ++ case relocInfo::external_word_type: ++ return external_word_Relocation::spec(addr); ++ case relocInfo::internal_word_type: ++ return internal_word_Relocation::spec(addr); ++ case relocInfo::opt_virtual_call_type: ++ return opt_virtual_call_Relocation::spec(); ++ case relocInfo::static_call_type: ++ return static_call_Relocation::spec(); ++ case relocInfo::runtime_call_type: ++ return runtime_call_Relocation::spec(); ++ case relocInfo::poll_type: ++ case relocInfo::poll_return_type: ++ return Relocation::spec_simple(rtype); ++ case relocInfo::none: ++ case relocInfo::oop_type: ++ // Oops are a special case. Normally they would be their own section ++ // but in cases like icBuffer they are literals in the code stream that ++ // we don't have a section for. We use none so that we get a literal address ++ // which is always patchable. ++ return RelocationHolder(); ++ default: ++ ShouldNotReachHere(); ++ return RelocationHolder(); ++ } ++ } ++ ++}; ++ ++// Convience classes ++class RuntimeAddress: public AddressLiteral { ++ ++ public: ++ ++ RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {} ++ ++}; ++ ++class OopAddress: public AddressLiteral { ++ ++ public: ++ ++ OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){} ++ ++}; ++ ++class ExternalAddress: public AddressLiteral { ++ ++ public: ++ ++ ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){} ++ ++}; ++ ++class InternalAddress: public AddressLiteral { ++ ++ public: ++ ++ InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {} ++ ++}; ++ ++// x86 can do array addressing as a single operation since disp can be an absolute ++// address amd64 can't. We create a class that expresses the concept but does extra ++// magic on amd64 to get the final result ++ ++class ArrayAddress { ++ private: ++ ++ AddressLiteral _base; ++ Address _index; ++ ++ public: ++ ++ ArrayAddress() {}; ++ ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {}; ++ AddressLiteral base() { return _base; } ++ Address index() { return _index; } ++ ++}; ++ ++const int FPUStateSizeInWords = 512 / wordSize; ++ ++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction ++// level ; i.e., what you write is what you get. The Assembler is generating code into ++// a CodeBuffer. ++ ++class Assembler : public AbstractAssembler { ++ friend class AbstractAssembler; // for the non-virtual hack ++ friend class LIR_Assembler; // as_Address() ++ friend class StubGenerator; ++ ++ public: ++ enum Condition { ++ zero , ++ notZero , ++ equal , ++ notEqual , ++ less , ++ lessEqual , ++ greater , ++ greaterEqual , ++ below , ++ belowEqual , ++ above , ++ aboveEqual ++ }; ++ ++ static const int LogInstructionSize = 2; ++ static const int InstructionSize = 1 << LogInstructionSize; ++ ++ // opcode, highest 6 bits: bits[31...26] ++ enum ops { ++ special_op = 0x00, // special_ops ++ regimm_op = 0x01, // regimm_ops ++ j_op = 0x02, ++ jal_op = 0x03, ++ beq_op = 0x04, ++ bne_op = 0x05, ++ blez_op = 0x06, ++ bgtz_op = 0x07, ++ addiu_op = 0x09, ++ slti_op = 0x0a, ++ sltiu_op = 0x0b, ++ andi_op = 0x0c, ++ ori_op = 0x0d, ++ xori_op = 0x0e, ++ lui_op = 0x0f, ++ cop0_op = 0x10, // cop0_ops ++ cop1_op = 0x11, // cop1_ops ++ gs_cop2_op = 0x12, // gs_cop2_ops ++ cop1x_op = 0x13, // cop1x_ops ++ beql_op = 0x14, ++ bnel_op = 0x15, ++ blezl_op = 0x16, ++ bgtzl_op = 0x17, ++ daddiu_op = 0x19, ++ ldl_op = 0x1a, ++ ldr_op = 0x1b, ++ special2_op = 0x1c, // special2_ops ++ msa_op = 0x1e, // msa_ops ++ special3_op = 0x1f, // special3_ops ++ lb_op = 0x20, ++ lh_op = 0x21, ++ lwl_op = 0x22, ++ lw_op = 0x23, ++ lbu_op = 0x24, ++ lhu_op = 0x25, ++ lwr_op = 0x26, ++ lwu_op = 0x27, ++ sb_op = 0x28, ++ sh_op = 0x29, ++ swl_op = 0x2a, ++ sw_op = 0x2b, ++ sdl_op = 0x2c, ++ sdr_op = 0x2d, ++ swr_op = 0x2e, ++ cache_op = 0x2f, ++ ll_op = 0x30, ++ lwc1_op = 0x31, ++ gs_lwc2_op = 0x32, //gs_lwc2_ops ++ pref_op = 0x33, ++ lld_op = 0x34, ++ ldc1_op = 0x35, ++ gs_ldc2_op = 0x36, //gs_ldc2_ops ++ ld_op = 0x37, ++ sc_op = 0x38, ++ swc1_op = 0x39, ++ gs_swc2_op = 0x3a, //gs_swc2_ops ++ scd_op = 0x3c, ++ sdc1_op = 0x3d, ++ gs_sdc2_op = 0x3e, //gs_sdc2_ops ++ sd_op = 0x3f ++ }; ++ ++ static const char *ops_name[]; ++ ++ //special family, the opcode is in low 6 bits. ++ enum special_ops { ++ sll_op = 0x00, ++ movci_op = 0x01, ++ srl_op = 0x02, ++ sra_op = 0x03, ++ sllv_op = 0x04, ++ srlv_op = 0x06, ++ srav_op = 0x07, ++ jr_op = 0x08, ++ jalr_op = 0x09, ++ movz_op = 0x0a, ++ movn_op = 0x0b, ++ syscall_op = 0x0c, ++ break_op = 0x0d, ++ sync_op = 0x0f, ++ mfhi_op = 0x10, ++ mthi_op = 0x11, ++ mflo_op = 0x12, ++ mtlo_op = 0x13, ++ dsllv_op = 0x14, ++ dsrlv_op = 0x16, ++ dsrav_op = 0x17, ++ mult_op = 0x18, ++ multu_op = 0x19, ++ div_op = 0x1a, ++ divu_op = 0x1b, ++ dmult_op = 0x1c, ++ dmultu_op = 0x1d, ++ ddiv_op = 0x1e, ++ ddivu_op = 0x1f, ++ addu_op = 0x21, ++ subu_op = 0x23, ++ and_op = 0x24, ++ or_op = 0x25, ++ xor_op = 0x26, ++ nor_op = 0x27, ++ slt_op = 0x2a, ++ sltu_op = 0x2b, ++ daddu_op = 0x2d, ++ dsubu_op = 0x2f, ++ tge_op = 0x30, ++ tgeu_op = 0x31, ++ tlt_op = 0x32, ++ tltu_op = 0x33, ++ teq_op = 0x34, ++ tne_op = 0x36, ++ dsll_op = 0x38, ++ dsrl_op = 0x3a, ++ dsra_op = 0x3b, ++ dsll32_op = 0x3c, ++ dsrl32_op = 0x3e, ++ dsra32_op = 0x3f ++ }; ++ ++ static const char* special_name[]; ++ ++ //regimm family, the opcode is in rt[16...20], 5 bits ++ enum regimm_ops { ++ bltz_op = 0x00, ++ bgez_op = 0x01, ++ bltzl_op = 0x02, ++ bgezl_op = 0x03, ++ tgei_op = 0x08, ++ tgeiu_op = 0x09, ++ tlti_op = 0x0a, ++ tltiu_op = 0x0b, ++ teqi_op = 0x0c, ++ tnei_op = 0x0e, ++ bltzal_op = 0x10, ++ bgezal_op = 0x11, ++ bltzall_op = 0x12, ++ bgezall_op = 0x13, ++ bposge32_op = 0x1c, ++ bposge64_op = 0x1d, ++ synci_op = 0x1f, ++ }; ++ ++ static const char* regimm_name[]; ++ ++ //cop0 family, the ops is in bits[25...21], 5 bits ++ enum cop0_ops { ++ mfc0_op = 0x00, ++ dmfc0_op = 0x01, ++ // ++ mxgc0_op = 0x03, //MFGC0, DMFGC0, MTGC0 ++ mtc0_op = 0x04, ++ dmtc0_op = 0x05, ++ rdpgpr_op = 0x0a, ++ inter_op = 0x0b, ++ wrpgpr_op = 0x0c ++ }; ++ ++ //cop1 family, the ops is in bits[25...21], 5 bits ++ enum cop1_ops { ++ mfc1_op = 0x00, ++ dmfc1_op = 0x01, ++ cfc1_op = 0x02, ++ mfhc1_op = 0x03, ++ mtc1_op = 0x04, ++ dmtc1_op = 0x05, ++ ctc1_op = 0x06, ++ mthc1_op = 0x07, ++ bc1f_op = 0x08, ++ single_fmt = 0x10, ++ double_fmt = 0x11, ++ word_fmt = 0x14, ++ long_fmt = 0x15, ++ ps_fmt = 0x16 ++ }; ++ ++ ++ //2 bist (bits[17...16]) of bc1x instructions (cop1) ++ enum bc_ops { ++ bcf_op = 0x0, ++ bct_op = 0x1, ++ bcfl_op = 0x2, ++ bctl_op = 0x3, ++ }; ++ ++ // low 6 bits of c_x_fmt instructions (cop1) ++ enum c_conds { ++ f_cond = 0x30, ++ un_cond = 0x31, ++ eq_cond = 0x32, ++ ueq_cond = 0x33, ++ olt_cond = 0x34, ++ ult_cond = 0x35, ++ ole_cond = 0x36, ++ ule_cond = 0x37, ++ sf_cond = 0x38, ++ ngle_cond = 0x39, ++ seq_cond = 0x3a, ++ ngl_cond = 0x3b, ++ lt_cond = 0x3c, ++ nge_cond = 0x3d, ++ le_cond = 0x3e, ++ ngt_cond = 0x3f ++ }; ++ ++ // low 6 bits of cop1 instructions ++ enum float_ops { ++ fadd_op = 0x00, ++ fsub_op = 0x01, ++ fmul_op = 0x02, ++ fdiv_op = 0x03, ++ fsqrt_op = 0x04, ++ fabs_op = 0x05, ++ fmov_op = 0x06, ++ fneg_op = 0x07, ++ froundl_op = 0x08, ++ ftruncl_op = 0x09, ++ fceill_op = 0x0a, ++ ffloorl_op = 0x0b, ++ froundw_op = 0x0c, ++ ftruncw_op = 0x0d, ++ fceilw_op = 0x0e, ++ ffloorw_op = 0x0f, ++ movf_f_op = 0x11, ++ movt_f_op = 0x11, ++ movz_f_op = 0x12, ++ movn_f_op = 0x13, ++ frecip_op = 0x15, ++ frsqrt_op = 0x16, ++ fcvts_op = 0x20, ++ fcvtd_op = 0x21, ++ fcvtw_op = 0x24, ++ fcvtl_op = 0x25, ++ fcvtps_op = 0x26, ++ fcvtspl_op = 0x28, ++ fpll_op = 0x2c, ++ fplu_op = 0x2d, ++ fpul_op = 0x2e, ++ fpuu_op = 0x2f ++ }; ++ ++ static const char* cop1_name[]; ++ ++ //cop1x family, the opcode is in low 6 bits. ++ enum cop1x_ops { ++ lwxc1_op = 0x00, ++ ldxc1_op = 0x01, ++ luxc1_op = 0x05, ++ swxc1_op = 0x08, ++ sdxc1_op = 0x09, ++ suxc1_op = 0x0d, ++ prefx_op = 0x0f, ++ ++ alnv_ps_op = 0x1e, ++ madd_s_op = 0x20, ++ madd_d_op = 0x21, ++ madd_ps_op = 0x26, ++ msub_s_op = 0x28, ++ msub_d_op = 0x29, ++ msub_ps_op = 0x2e, ++ nmadd_s_op = 0x30, ++ nmadd_d_op = 0x31, ++ nmadd_ps_op = 0x36, ++ nmsub_s_op = 0x38, ++ nmsub_d_op = 0x39, ++ nmsub_ps_op = 0x3e ++ }; ++ ++ static const char* cop1x_name[]; ++ ++ //special2 family, the opcode is in low 6 bits. ++ enum special2_ops { ++ madd_op = 0x00, ++ maddu_op = 0x01, ++ mul_op = 0x02, ++ gs0x03_op = 0x03, ++ msub_op = 0x04, ++ msubu_op = 0x05, ++ gs0x06_op = 0x06, ++ gsemul2_op = 0x07, ++ gsemul3_op = 0x08, ++ gsemul4_op = 0x09, ++ gsemul5_op = 0x0a, ++ gsemul6_op = 0x0b, ++ gsemul7_op = 0x0c, ++ gsemul8_op = 0x0d, ++ gsemul9_op = 0x0e, ++ gsemul10_op = 0x0f, ++ gsmult_op = 0x10, ++ gsdmult_op = 0x11, ++ gsmultu_op = 0x12, ++ gsdmultu_op = 0x13, ++ gsdiv_op = 0x14, ++ gsddiv_op = 0x15, ++ gsdivu_op = 0x16, ++ gsddivu_op = 0x17, ++ gsmod_op = 0x1c, ++ gsdmod_op = 0x1d, ++ gsmodu_op = 0x1e, ++ gsdmodu_op = 0x1f, ++ clz_op = 0x20, ++ clo_op = 0x21, ++ xctx_op = 0x22, //ctz, cto, dctz, dcto, gsX ++ gsrxr_x_op = 0x23, //gsX ++ dclz_op = 0x24, ++ dclo_op = 0x25, ++ gsle_op = 0x26, ++ gsgt_op = 0x27, ++ gs86j_op = 0x28, ++ gsloop_op = 0x29, ++ gsaj_op = 0x2a, ++ gsldpc_op = 0x2b, ++ gs86set_op = 0x30, ++ gstm_op = 0x31, ++ gscvt_ld_op = 0x32, ++ gscvt_ud_op = 0x33, ++ gseflag_op = 0x34, ++ gscam_op = 0x35, ++ gstop_op = 0x36, ++ gssettag_op = 0x37, ++ gssdbbp_op = 0x38 ++ }; ++ ++ static const char* special2_name[]; ++ ++ // special3 family, the opcode is in low 6 bits. ++ enum special3_ops { ++ ext_op = 0x00, ++ dextm_op = 0x01, ++ dextu_op = 0x02, ++ dext_op = 0x03, ++ ins_op = 0x04, ++ dinsm_op = 0x05, ++ dinsu_op = 0x06, ++ dins_op = 0x07, ++ lxx_op = 0x0a, //lwx, lhx, lbux, ldx ++ insv_op = 0x0c, ++ dinsv_op = 0x0d, ++ ar1_op = 0x10, //MIPS DSP ++ cmp1_op = 0x11, //MIPS DSP ++ re1_op = 0x12, //MIPS DSP, re1_ops ++ sh1_op = 0x13, //MIPS DSP ++ ar2_op = 0x14, //MIPS DSP ++ cmp2_op = 0x15, //MIPS DSP ++ re2_op = 0x16, //MIPS DSP, re2_ops ++ sh2_op = 0x17, //MIPS DSP ++ ar3_op = 0x18, //MIPS DSP ++ bshfl_op = 0x20 //seb, seh ++ }; ++ ++ // re1_ops ++ enum re1_ops { ++ absq_s_qb_op = 0x01, ++ repl_qb_op = 0x02, ++ replv_qb_op = 0x03, ++ absq_s_ph_op = 0x09, ++ repl_ph_op = 0x0a, ++ replv_ph_op = 0x0b, ++ absq_s_w_op = 0x11, ++ bitrev_op = 0x1b ++ }; ++ ++ // re2_ops ++ enum re2_ops { ++ repl_ob_op = 0x02, ++ replv_ob_op = 0x03, ++ absq_s_qh_op = 0x09, ++ repl_qh_op = 0x0a, ++ replv_qh_op = 0x0b, ++ absq_s_pw_op = 0x11, ++ repl_pw_op = 0x12, ++ replv_pw_op = 0x13 ++ }; ++ ++ static const char* special3_name[]; ++ ++ // lwc2/gs_lwc2 family, the opcode is in low 6 bits. ++ enum gs_lwc2_ops { ++ gslble_op = 0x10, ++ gslbgt_op = 0x11, ++ gslhle_op = 0x12, ++ gslhgt_op = 0x13, ++ gslwle_op = 0x14, ++ gslwgt_op = 0x15, ++ gsldle_op = 0x16, ++ gsldgt_op = 0x17, ++ gslwlec1_op = 0x1c, ++ gslwgtc1_op = 0x1d, ++ gsldlec1_op = 0x1e, ++ gsldgtc1_op = 0x1f, ++ gslq_op = 0x20 ++ }; ++ ++ static const char* gs_lwc2_name[]; ++ ++ // ldc2/gs_ldc2 family, the opcode is in low 3 bits. ++ enum gs_ldc2_ops { ++ gslbx_op = 0x0, ++ gslhx_op = 0x1, ++ gslwx_op = 0x2, ++ gsldx_op = 0x3, ++ gslwxc1_op = 0x6, ++ gsldxc1_op = 0x7 ++ }; ++ ++ static const char* gs_ldc2_name[]; ++ ++ // swc2/gs_swc2 family, the opcode is in low 6 bits. ++ enum gs_swc2_ops { ++ gssble_op = 0x10, ++ gssbgt_op = 0x11, ++ gsshle_op = 0x12, ++ gsshgt_op = 0x13, ++ gsswle_op = 0x14, ++ gsswgt_op = 0x15, ++ gssdle_op = 0x16, ++ gssdgt_op = 0x17, ++ gsswlec1_op = 0x1c, ++ gsswgtc1_op = 0x1d, ++ gssdlec1_op = 0x1e, ++ gssdgtc1_op = 0x1f, ++ gssq_op = 0x20 ++ }; ++ ++ static const char* gs_swc2_name[]; ++ ++ // sdc2/gs_sdc2 family, the opcode is in low 3 bits. ++ enum gs_sdc2_ops { ++ gssbx_op = 0x0, ++ gsshx_op = 0x1, ++ gsswx_op = 0x2, ++ gssdx_op = 0x3, ++ gsswxc1_op = 0x6, ++ gssdxc1_op = 0x7 ++ }; ++ ++ static const char* gs_sdc2_name[]; ++ ++ enum WhichOperand { ++ // input to locate_operand, and format code for relocations ++ imm_operand = 0, // embedded 32-bit|64-bit immediate operand ++ disp32_operand = 1, // embedded 32-bit displacement or address ++ call32_operand = 2, // embedded 32-bit self-relative displacement ++ narrow_oop_operand = 3, // embedded 32-bit immediate narrow oop ++ _WhichOperand_limit = 4 ++ }; ++ ++ static int opcode(int insn) { return (insn>>26)&0x3f; } ++ static int rs(int insn) { return (insn>>21)&0x1f; } ++ static int rt(int insn) { return (insn>>16)&0x1f; } ++ static int rd(int insn) { return (insn>>11)&0x1f; } ++ static int sa(int insn) { return (insn>>6)&0x1f; } ++ static int special(int insn) { return insn&0x3f; } ++ static int imm_off(int insn) { return (short)low16(insn); } ++ ++ static int low (int x, int l) { return bitfield(x, 0, l); } ++ static int low16(int x) { return low(x, 16); } ++ static int low26(int x) { return low(x, 26); } ++ ++ protected: ++ //help methods for instruction ejection ++ ++ // I-Type (Immediate) ++ // 31 26 25 21 20 16 15 0 ++ //| opcode | rs | rt | immediat | ++ //| | | | | ++ // 6 5 5 16 ++ static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); } ++ ++ // R-Type (Register) ++ // 31 26 25 21 20 16 15 11 10 6 5 0 ++ //| special | rs | rt | rd | 0 | opcode | ++ //| 0 0 0 0 0 0 | | | | 0 0 0 0 0 | | ++ // 6 5 5 5 5 6 ++ static int insn_RRRO(int rs, int rt, int rd, int op) { return (rs<<21) | (rt<<16) | (rd<<11) | op; } ++ static int insn_RRSO(int rt, int rd, int sa, int op) { return (rt<<16) | (rd<<11) | (sa<<6) | op; } ++ static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; } ++ ++ static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); } ++ static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); } ++ ++ static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) { ++ return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func; ++ } ++ ++ static int high (int x, int l) { return bitfield(x, 32-l, l); } ++ static int high16(int x) { return high(x, 16); } ++ static int high6 (int x) { return high(x, 6); } ++ ++ //get the offset field of jump/branch instruction ++ int offset(address entry) { ++ assert(is_simm16((entry - pc() - 4) / 4), "change this code"); ++ if (!is_simm16((entry - pc() - 4) / 4)) { ++ tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4); ++ } ++ return (entry - pc() - 4) / 4; ++ } ++ ++ ++public: ++ using AbstractAssembler::offset; ++ ++ //sign expand with the sign bit is h ++ static int expand(int x, int h) { return -(x & (1<> 16; ++ } ++ ++ static int split_high(int x) { ++ return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff; ++ } ++ ++ static int merge(int low, int high) { ++ return expand(low, 15) + (high<<16); ++ } ++ ++ static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) { ++ return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0; ++ } ++ ++ // Test if x is within signed immediate range for nbits. ++ static bool is_simm (int x, int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int min = -( ((int)1) << nbits-1 ); ++ const int maxplus1 = ( ((int)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ static bool is_simm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong min = -( ((jlong)1) << nbits-1 ); ++ const jlong maxplus1 = ( ((jlong)1) << nbits-1 ); ++ return min <= x && x < maxplus1; ++ } ++ ++ // Test if x is within unsigned immediate range for nbits ++ static bool is_uimm(int x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 32, "out of bounds"); ++ const int maxplus1 = ( ((int)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_uimm(jlong x, unsigned int nbits) { ++ assert(0 < nbits && nbits < 64, "out of bounds"); ++ const jlong maxplus1 = ( ((jlong)1) << nbits ); ++ return 0 <= x && x < maxplus1; ++ } ++ ++ static bool is_simm16(int x) { return is_simm(x, 16); } ++ static bool is_simm16(long x) { return is_simm((jlong)x, (unsigned int)16); } ++ ++ static bool fit_in_jal(address target, address pc) { ++ intptr_t mask = 0xfffffffff0000000; ++ return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask); ++ } ++ ++ bool fit_int_branch(address entry) { ++ return is_simm16(offset(entry)); ++ } ++ ++protected: ++#ifdef ASSERT ++ #define CHECK_DELAY ++#endif ++#ifdef CHECK_DELAY ++ enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state; ++#endif ++ ++public: ++ void assert_not_delayed() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "next instruction should not be a delay slot"); ++#endif ++ } ++ ++protected: ++ // Delay slot helpers ++ // cti is called when emitting control-transfer instruction, ++ // BEFORE doing the emitting. ++ // Only effective when assertion-checking is enabled. ++ ++ // called when emitting cti with a delay slot, AFTER emitting ++ void has_delay_slot() { ++#ifdef CHECK_DELAY ++ assert(delay_state == no_delay, "just checking"); ++ delay_state = at_delay_slot; ++#endif ++ } ++ ++public: ++ Assembler* delayed() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot"); ++ delay_state = filling_delay_slot; ++#endif ++ return this; ++ } ++ ++ void flush() { ++#ifdef CHECK_DELAY ++ guarantee( delay_state == no_delay, "ending code with a delay slot"); ++#endif ++ AbstractAssembler::flush(); ++ } ++ ++ inline void emit_long(int); // shadows AbstractAssembler::emit_long ++ inline void emit_data(int x) { emit_long(x); } ++ inline void emit_data(int, RelocationHolder const&); ++ inline void emit_data(int, relocInfo::relocType rtype); ++ inline void check_delay(); ++ ++ ++ // Generic instructions ++ // Does 32bit or 64bit as needed for the platform. In some sense these ++ // belong in macro assembler but there is no need for both varieties to exist ++ ++ void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); } ++ void addiu32(Register rt, Register rs, int imm) { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void addiu(Register rt, Register rs, int imm) { daddiu (rt, rs, imm);} ++ void addu(Register rd, Register rs, Register rt) { daddu (rd, rs, rt); } ++ ++ void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); } ++ void andi(Register rt, Register rs, int imm) { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void beq (Register rs, Register rt, int off) { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void beql (Register rs, Register rt, int off) { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bgez (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); } ++ void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); } ++ void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); } ++ void bgezl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); } ++ void bgtz (Register rs, int off) { emit_long(insn_ORRI(bgtz_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bgtzl (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blez (Register rs, int off) { emit_long(insn_ORRI(blez_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void blezl (Register rs, int off) { emit_long(insn_ORRI(blezl_op, (int)rs->encoding(), 0, off)); has_delay_slot(); } ++ void bltz (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); } ++ void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); } ++ void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); } ++ void bltzl (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); } ++ void bne (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ void bnel (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); } ++ // two versions of brk: ++ // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set ++ // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27) ++ // both versions work ++ void brk (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); } ++ void brk (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); } ++ ++ void beq (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); } ++ void beql (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));} ++ void bgez (Register rs, address entry) { bgez (rs, offset(entry)); } ++ void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); } ++ void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); } ++ void bgezl (Register rs, address entry) { bgezl (rs, offset(entry)); } ++ void bgtz (Register rs, address entry) { bgtz (rs, offset(entry)); } ++ void bgtzl (Register rs, address entry) { bgtzl (rs, offset(entry)); } ++ void blez (Register rs, address entry) { blez (rs, offset(entry)); } ++ void blezl (Register rs, address entry) { blezl (rs, offset(entry)); } ++ void bltz (Register rs, address entry) { bltz (rs, offset(entry)); } ++ void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); } ++ void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); } ++ void bltzl (Register rs, address entry) { bltzl (rs, offset(entry)); } ++ void bne (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); } ++ void bnel (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); } ++ ++ void beq (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); } ++ void beql (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); } ++ void bgez (Register rs, Label& L){ bgez (rs, target(L)); } ++ void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); } ++ void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); } ++ void bgezl (Register rs, Label& L){ bgezl (rs, target(L)); } ++ void bgtz (Register rs, Label& L){ bgtz (rs, target(L)); } ++ void bgtzl (Register rs, Label& L){ bgtzl (rs, target(L)); } ++ void blez (Register rs, Label& L){ blez (rs, target(L)); } ++ void blezl (Register rs, Label& L){ blezl (rs, target(L)); } ++ void bltz (Register rs, Label& L){ bltz (rs, target(L)); } ++ void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); } ++ void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); } ++ void bltzl (Register rs, Label& L){ bltzl (rs, target(L)); } ++ void bne (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); } ++ void bnel (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); } ++ ++ void daddiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); } ++ void ddiv (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op)); } ++ void ddivu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); } ++ ++ void movz (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movz_op)); } ++ void movn (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), movn_op)); } ++ ++ void movt (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); } ++ void movf (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); } ++ ++ enum bshfl_ops { ++ seb_op = 0x10, ++ seh_op = 0x18 ++ }; ++ void seb (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); } ++ void seh (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); } ++ ++ void ext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op); ++ } ++ ++ void dext (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]"); ++ ++ int lsb = pos; ++ int msbd = size - 1; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op); ++ } ++ ++ void dextm (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((32 < size) && (size <= 64), "size must be in (32, 64]"); ++ guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]"); ++ ++ int lsb = pos; ++ int msbd = size - 1 - 32; ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op); ++ } ++ ++ void rotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op); ++ } ++ ++ void drotr (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op); ++ } ++ ++ void drotr32 (Register rd, Register rt, int sa) { ++ emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op); ++ } ++ ++ void rotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op); ++ } ++ ++ void drotrv (Register rd, Register rt, Register rs) { ++ emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op); ++ } ++ ++ void div (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); } ++ void divu (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); } ++ void dmult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); } ++ void dmultu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); } ++ void dsll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); } ++ void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); } ++ void dsll32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); } ++ void dsra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); } ++ void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); } ++ void dsra32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); } ++ void dsrl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); } ++ void dsrlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); } ++ void dsrl32(Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); } ++ void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); } ++ ++ void b(int off) { beq(R0, R0, off); } ++ void b(address entry) { b(offset(entry)); } ++ void b(Label& L) { b(target(L)); } ++ ++ void j(address entry); ++ void jal(address entry); ++ ++ void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); } ++ void jalr(Register rs) { jalr(RA, rs); } ++ void jalr() { jalr(RT9); } ++ ++ void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); } ++ void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); } ++ ++ void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lui(Register rt, int imm) { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); } ++ void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ ++ void lb (Register rt, Address src); ++ void lbu(Register rt, Address src); ++ void ld (Register rt, Address src); ++ void ldl(Register rt, Address src); ++ void ldr(Register rt, Address src); ++ void lh (Register rt, Address src); ++ void lhu(Register rt, Address src); ++ void ll (Register rt, Address src); ++ void lld(Register rt, Address src); ++ void lw (Register rt, Address src); ++ void lwl(Register rt, Address src); ++ void lwr(Register rt, Address src); ++ void lwu(Register rt, Address src); ++ void lea(Register rt, Address src); ++ void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); } ++ ++ void mfhi (Register rd) { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); } ++ void mflo (Register rd) { emit_long( ((int)rd->encoding()<<11) | mflo_op ); } ++ void mthi (Register rs) { emit_long( ((int)rs->encoding()<<21) | mthi_op ); } ++ void mtlo (Register rs) { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); } ++ ++ void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); } ++ void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); } ++ ++ void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); } ++ ++ void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); } ++ void ori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void sb (Register rt, Register base, int off) { emit_long(insn_ORRI(sb_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sc (Register rt, Register base, int off) { emit_long(insn_ORRI(sc_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void scd (Register rt, Register base, int off) { emit_long(insn_ORRI(scd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sd (Register rt, Register base, int off) { emit_long(insn_ORRI(sd_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdl (Register rt, Register base, int off) { emit_long(insn_ORRI(sdl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sdr (Register rt, Register base, int off) { emit_long(insn_ORRI(sdr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sh (Register rt, Register base, int off) { emit_long(insn_ORRI(sh_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void sll (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sll_op)); } ++ void sllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sllv_op)); } ++ void slt (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), slt_op)); } ++ void slti (Register rt, Register rs, int imm) { emit_long(insn_ORRI(slti_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltiu(Register rt, Register rs, int imm) { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); } ++ void sltu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), sltu_op)); } ++ void sra (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), sra_op)); } ++ void srav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srav_op)); } ++ void srl (Register rd, Register rt , int sa) { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), srl_op)); } ++ void srlv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), srlv_op)); } ++ ++ void subu (Register rd, Register rs, Register rt) { dsubu (rd, rs, rt); } ++ void subu32 (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), subu_op)); } ++ void sw (Register rt, Register base, int off) { emit_long(insn_ORRI(sw_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swl (Register rt, Register base, int off) { emit_long(insn_ORRI(swl_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void swr (Register rt, Register base, int off) { emit_long(insn_ORRI(swr_op, (int)base->encoding(), (int)rt->encoding(), off)); } ++ void synci(Register base, int off) { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); } ++ void sync () { ++ if (os::is_ActiveCoresMP()) ++ emit_long(0); ++ else ++ emit_long(sync_op); ++ } ++ void syscall(int code) { emit_long( (code<<6) | syscall_op ); } ++ ++ void sb(Register rt, Address dst); ++ void sc(Register rt, Address dst); ++ void scd(Register rt, Address dst); ++ void sd(Register rt, Address dst); ++ void sdl(Register rt, Address dst); ++ void sdr(Register rt, Address dst); ++ void sh(Register rt, Address dst); ++ void sw(Register rt, Address dst); ++ void swl(Register rt, Address dst); ++ void swr(Register rt, Address dst); ++ ++ void teq (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, teq_op)); } ++ void teqi (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); } ++ void tge (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tge_op)); } ++ void tgei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); } ++ void tgeiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); } ++ void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tgeu_op)); } ++ void tlt (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tlt_op)); } ++ void tlti (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); } ++ void tltiu(Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); } ++ void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tltu_op)); } ++ void tne (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(), (int)rt->encoding(), code, tne_op)); } ++ void tnei (Register rs, int imm) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); } ++ ++ void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); } ++ void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); } ++ ++ void nop() { emit_long(0); } ++ ++ ++ ++ void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void ldc1(FloatRegister ft, Address src); ++ void lwc1(FloatRegister ft, Address src); ++ ++ //COP0 ++ void mfc0 (Register rt, Register rd) { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmfc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet ++ void mtc0 (Register rt, Register rd) { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ void dmtc0 (Register rt, FloatRegister rd) { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); } ++ //COP0 end ++ ++ ++ //COP1 ++ void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void cfc1 (Register rt, int fs) { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); } ++ void mfhc1(Register rt, int fs) { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); } ++ void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); } ++ void ctc1 (Register rt, int fs) { emit_long(insn_COP1(ctc1_op, (int)rt->encoding(), fs)); } ++ void mthc1(Register rt, int fs) { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); } ++ ++ void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); } ++ void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); } ++ void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); } ++ void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off)); has_delay_slot(); } ++ ++ void bc1f (address entry) { bc1f(offset(entry)); } ++ void bc1fl(address entry) { bc1fl(offset(entry)); } ++ void bc1t (address entry) { bc1t(offset(entry)); } ++ void bc1tl(address entry) { bc1tl(offset(entry)); } ++ ++ void bc1f (Label& L) { bc1f(target(L)); } ++ void bc1fl(Label& L) { bc1fl(target(L)); } ++ void bc1t (Label& L) { bc1t(target(L)); } ++ void bc1tl(Label& L) { bc1tl(target(L)); } ++ ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_SINGLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void add_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)} ++ void sub_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)} ++ void mul_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)} ++ void div_s (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)} ++ void sqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)} ++ void abs_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)} ++ void mov_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)} ++ void neg_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)} ++ void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)} ++ void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)} ++ void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)} ++ void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)} ++ void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)} ++ void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)} ++ void movn_s (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)} ++ void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)} ++ //null ++ void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)} ++ void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)} ++ void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)} ++ //null ++ void c_f_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)} ++ void c_un_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)} ++ void c_eq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)} ++ void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)} ++ void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)} ++ void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)} ++ void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)} ++ void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)} ++ void c_sf_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)} ++ void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)} ++ void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)} ++ void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)} ++ void c_lt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)} ++ void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)} ++ void c_le_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)} ++ void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_SINGLE ++ ++ ++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags. ++#define INSN_DOUBLE(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)} ++ void sub_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)} ++ void mul_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)} ++ void div_d (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)} ++ void sqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)} ++ void abs_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)} ++ void mov_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)} ++ void neg_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)} ++ void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)} ++ void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)} ++ void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)} ++ void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)} ++ void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)} ++ void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)} ++ void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)} ++ void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)} ++ //null ++ void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) { ++ assert(cc >= 0 && cc <= 7, "cc is 3 bits"); ++ emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );} ++ void movz_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)} ++ void movn_d (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)} ++ //null ++ void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)} ++ void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)} ++ //null ++ void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)} ++ void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)} ++ //null ++ void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)} ++ //null ++ void c_f_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)} ++ void c_un_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)} ++ void c_eq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)} ++ void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)} ++ void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)} ++ void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)} ++ void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)} ++ void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)} ++ void c_sf_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)} ++ void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)} ++ void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)} ++ void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)} ++ void c_lt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)} ++ void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)} ++ void c_le_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)} ++ void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)} ++ ++#undef INSN_DOUBLE ++ ++ ++ //null ++ void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); } ++ void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); } ++ //null ++ ++ ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_PS(r1, r2, r3, op) \ ++ { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ ++ void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)} ++ void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)} ++ void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)} ++ //null ++ void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)} ++ void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)} ++ void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)} ++ //null ++ //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")} ++ //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") } ++ void movz_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)} ++ void movn_ps (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)} ++ //null ++ void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)} ++ //null ++ void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)} ++ //null ++ void pll_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)} ++ void plu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)} ++ void pul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)} ++ void puu_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)} ++ void c_f_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)} ++ void c_un_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)} ++ void c_eq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)} ++ void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)} ++ void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)} ++ void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)} ++ void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)} ++ void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)} ++ void c_sf_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)} ++ void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)} ++ void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)} ++ void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)} ++ void c_lt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)} ++ void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)} ++ void c_le_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)} ++ void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)} ++ //null ++#undef INSN_PS ++ //COP1 end ++ ++ ++ //COP1X ++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags. ++#define INSN_COP1X(r0, r1, r2, r3, op) \ ++ { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));} ++ void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) } ++ void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) } ++ void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) } ++ void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) } ++ void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) } ++ void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) } ++ void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) } ++ void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) } ++ void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) } ++ void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) } ++ void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) } ++ void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) } ++#undef INSN_COP1X ++ //COP1X end ++ ++ //SPECIAL2 ++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags. ++#define INSN_S2(op) \ ++ { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);} ++ ++ void madd (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); } ++ void maddu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); } ++ void mul (Register rd, Register rs, Register rt) { INSN_S2(mul_op) } ++ void gsandn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) } ++ void msub (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); } ++ void msubu (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); } ++ void gsorn (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) } ++ ++ void gsmult (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op) } ++ void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) } ++ void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) } ++ void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)} ++ void gsdiv (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op) } ++ void gsddiv (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op) } ++ void gsdivu (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op) } ++ void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) } ++ void gsmod (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op) } ++ void gsdmod (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op) } ++ void gsmodu (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op) } ++ void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) } ++ void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); } ++ void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); } ++ void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); } ++ void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); } ++ void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); } ++ void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); } ++ void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); } ++ void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); } ++ ++#undef INSN_S2 ++ ++ //SPECIAL3 ++/* ++// FIXME ++#define is_0_to_32(a, b) \ ++ assert (a >= 0, " just a check"); \ ++ assert (a <= 0, " just a check"); \ ++ assert (b >= 0, " just a check"); \ ++ assert (b <= 0, " just a check"); \ ++ assert (a+b >= 0, " just a check"); \ ++ assert (a+b <= 0, " just a check"); ++ */ ++#define is_0_to_32(a, b) ++ ++ void ins (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); } ++ void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); } ++ void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); } ++ void dins (Register rt, Register rs, int pos, int size) { ++ guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)"); ++ guarantee((0 < size) && (size <= 32), "size must be in (0, 32]"); ++ guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]"); ++ ++ emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op); ++ } ++ ++ void repl_qb (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_qb_op << 6 | re1_op); } ++ void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); } ++ void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_ph_op << 6 | re1_op); } ++ void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); } ++ ++ void repl_ob (Register rd, int const8) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16) | ((int)rd->encoding() << 11) | repl_ob_op << 6 | re2_op); } ++ void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); } ++ void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_qh_op << 6 | re2_op); } ++ void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); } ++ void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16) | ((int)rd->encoding() << 11) | repl_pw_op << 6 | re2_op); } ++ void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); } ++ ++ void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void sdc1(FloatRegister ft, Address dst); ++ void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); } ++ void swc1(FloatRegister ft, Address dst); ++ ++ ++ static void print_instruction(int); ++ int patched_branch(int dest_pos, int inst, int inst_pos); ++ int branch_destination(int inst, int pos); ++ ++ // Loongson extension ++ ++ // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4". ++ void gslble(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op); ++ } ++ ++ void gslbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op); ++ } ++ ++ void gslhle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op); ++ } ++ ++ void gslhgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op); ++ } ++ ++ void gslwle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op); ++ } ++ ++ void gslwgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op); ++ } ++ ++ void gsldle(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op); ++ } ++ ++ void gsldgt(Register rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op); ++ } ++ ++ void gslwlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op); ++ } ++ ++ void gslwgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op); ++ } ++ ++ void gsldlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op); ++ } ++ ++ void gsldgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op); ++ } ++ ++ void gslq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslq: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits"); ++ emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() ); ++ } ++ ++ void gssble(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op); ++ } ++ ++ void gssbgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op); ++ } ++ ++ void gsshle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op); ++ } ++ ++ void gsshgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op); ++ } ++ ++ void gsswle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op); ++ } ++ ++ void gsswgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op); ++ } ++ ++ void gssdle(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op); ++ } ++ ++ void gssdgt(Register rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op); ++ } ++ ++ void gsswlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op); ++ } ++ ++ void gsswgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op); ++ } ++ ++ void gssdlec1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op); ++ } ++ ++ void gssdgtc1(FloatRegister rt, Register base, Register bound) { ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op); ++ } ++ ++ void gssq(Register rq, Register rt, Register base, int off) { ++ assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssq: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) { ++ assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0"); ++ off = off >> 4; ++ assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits"); ++ emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() ); ++ } ++ ++ //LDC2 & SDC2 ++#define INSN(OPS, OP) \ ++ assert(is_simm(off, 8), "NAME: off exceeds 8 bits"); \ ++ assert(UseLEXT1, "check UseLEXT1"); \ ++ emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | \ ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP); ++ ++#define INSN_LDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_LDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_ldc2_op, op) \ ++ } ++ ++#define INSN_SDC2(NAME, op) \ ++ void NAME(Register rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++#define INSN_SDC2_F(NAME, op) \ ++ void NAME(FloatRegister rt, Register base, Register index, int off) { \ ++ INSN(gs_sdc2_op, op) \ ++ } ++ ++/* ++ void gslbx(Register rt, Register base, Register index, int off) { ++ assert(is_simm(off, 8), "gslbx: off exceeds 8 bits"); ++ assert(UseLEXT1, "check UseLEXT1"); ++ emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ++ ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op); ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);} ++ ++ INSN_LDC2(gslbx, gslbx_op) ++ INSN_LDC2(gslhx, gslhx_op) ++ INSN_LDC2(gslwx, gslwx_op) ++ INSN_LDC2(gsldx, gsldx_op) ++ INSN_LDC2_F(gslwxc1, gslwxc1_op) ++ INSN_LDC2_F(gsldxc1, gsldxc1_op) ++ ++ INSN_SDC2(gssbx, gssbx_op) ++ INSN_SDC2(gsshx, gsshx_op) ++ INSN_SDC2(gsswx, gsswx_op) ++ INSN_SDC2(gssdx, gssdx_op) ++ INSN_SDC2_F(gsswxc1, gsswxc1_op) ++ INSN_SDC2_F(gssdxc1, gssdxc1_op) ++*/ ++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) } ++ void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) } ++ void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) } ++ void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) } ++ void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) } ++ void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) } ++ ++ void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) } ++ void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) } ++ void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) } ++ void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) } ++ void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) } ++ void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) } ++ ++#undef INSN ++#undef INSN_LDC2 ++#undef INSN_LDC2_F ++#undef INSN_SDC2 ++#undef INSN_SDC2_F ++ ++ // cpucfg on Loongson CPUs above 3A4000 ++ void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);} ++ ++ enum Membar_mask_bits { ++ StoreStore = 1 << 3, ++ LoadStore = 1 << 2, ++ StoreLoad = 1 << 1, ++ LoadLoad = 1 << 0 ++ }; ++ ++ // Serializes memory and blows flags ++ void membar(Membar_mask_bits order_constraint) { ++ sync(); ++ } ++ ++public: ++ // Creation ++ Assembler(CodeBuffer* code) : AbstractAssembler(code) { ++#ifdef CHECK_DELAY ++ delay_state = no_delay; ++#endif ++ } ++ ++ // Decoding ++ static address locate_operand(address inst, WhichOperand which); ++ static address locate_next_instruction(address inst); ++}; ++ ++ ++#include "assembler_mips.inline.hpp" ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp +new file mode 100644 +index 0000000000..21c8a76156 +--- /dev/null ++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++#ifndef PRODUCT ++#include "compiler/disassembler.hpp" ++#endif ++ ++ ++ ++inline void Assembler::check_delay() { ++# ifdef CHECK_DELAY ++ guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot"); ++ delay_state = no_delay; ++# endif ++} ++ ++inline void Assembler::emit_long(int x) { ++ check_delay(); ++ AbstractAssembler::emit_int32(x); ++} ++ ++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) { ++ relocate(rtype); ++ emit_long(x); ++} ++ ++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) { ++ relocate(rspec); ++ emit_long(x); ++} ++ ++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp +new file mode 100644 +index 0000000000..4172db219b +--- /dev/null ++++ b/src/hotspot/cpu/mips/bytes_mips.hpp +@@ -0,0 +1,181 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP ++#define CPU_MIPS_VM_BYTES_MIPS_HPP ++ ++#include "memory/allocation.hpp" ++ ++class Bytes: AllStatic { ++ public: ++ // Returns true if the byte ordering used by Java is different from the native byte ordering ++ // of the underlying machine. For example, this is true for Intel x86, but false for Solaris ++ // on Sparc. ++ // we use mipsel, so return true ++ static inline bool is_Java_byte_ordering_different(){ return true; } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering ++ // (no special code is needed since x86 CPUs can access unaligned data) ++ static inline u2 get_native_u2(address p) { ++ if ((intptr_t)p & 0x1) { ++ return ((u2)p[1] << 8) | (u2)p[0]; ++ } else { ++ return *(u2*)p; ++ } ++ } ++ ++ static inline u4 get_native_u4(address p) { ++ if ((intptr_t)p & 3) { ++ u4 res; ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ ++ " lwr %[res], 0(%[addr]) \n" ++ " lwl %[res], 3(%[addr]) \n" ++ ++ " .set pop" ++ : [res] "=&r" (res) ++ : [addr] "r" (p) ++ : "memory" ++ ); ++ return res; ++ } else { ++ return *(u4*)p; ++ } ++ } ++ ++ static inline u8 get_native_u8(address p) { ++ u8 res; ++ u8 temp = 0; ++ // u4 tp;//tmp register ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips64\n" ++ " .set noreorder\n" ++ " .set noat\n" ++ " andi $1,%[addr],0x7 \n" ++ " beqz $1,1f \n" ++ " nop \n" ++ " ldr %[temp], 0(%[addr]) \n" ++ " ldl %[temp], 7(%[addr]) \n" ++ " b 2f \n" ++ " nop \n" ++ " 1:\t ld %[temp],0(%[addr]) \n" ++ " 2:\t sd %[temp], %[res] \n" ++ ++ " .set at\n" ++ " .set pop\n" ++ : [addr]"=r"(p), [temp]"=r" (temp) ++ : "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res) ++ : "memory" ++ ); ++ ++ return res; ++ } ++ ++ //use mips unaligned load instructions ++ static inline void put_native_u2(address p, u2 x) { ++ if((intptr_t)p & 0x1) { ++ p[0] = (u_char)(x); ++ p[1] = (u_char)(x>>8); ++ } else { ++ *(u2*)p = x; ++ } ++ } ++ ++ static inline void put_native_u4(address p, u4 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 3 ) { ++ case 0: *(u4*)p = x; ++ break; ++ ++ case 2: ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ break; ++ } ++ } ++ ++ static inline void put_native_u8(address p, u8 x) { ++ // refer to sparc implementation. ++ // Note that sparc is big-endian, while mips is little-endian ++ switch ( intptr_t(p) & 7 ) { ++ case 0: *(u8*)p = x; ++ break; ++ ++ case 4: ((u4*)p)[1] = x >> 32; ++ ((u4*)p)[0] = x; ++ break; ++ ++ case 2: ((u2*)p)[3] = x >> 48; ++ ((u2*)p)[2] = x >> 32; ++ ((u2*)p)[1] = x >> 16; ++ ((u2*)p)[0] = x; ++ break; ++ ++ default: ((u1*)p)[7] = x >> 56; ++ ((u1*)p)[6] = x >> 48; ++ ((u1*)p)[5] = x >> 40; ++ ((u1*)p)[4] = x >> 32; ++ ((u1*)p)[3] = x >> 24; ++ ((u1*)p)[2] = x >> 16; ++ ((u1*)p)[1] = x >> 8; ++ ((u1*)p)[0] = x; ++ } ++ } ++ ++ ++ // Efficient reading and writing of unaligned unsigned data in Java ++ // byte ordering (i.e. big-endian ordering). Byte-order reversal is ++ // needed since MIPS64EL CPUs use little-endian format. ++ static inline u2 get_Java_u2(address p) { return swap_u2(get_native_u2(p)); } ++ static inline u4 get_Java_u4(address p) { return swap_u4(get_native_u4(p)); } ++ static inline u8 get_Java_u8(address p) { return swap_u8(get_native_u8(p)); } ++ ++ static inline void put_Java_u2(address p, u2 x) { put_native_u2(p, swap_u2(x)); } ++ static inline void put_Java_u4(address p, u4 x) { put_native_u4(p, swap_u4(x)); } ++ static inline void put_Java_u8(address p, u8 x) { put_native_u8(p, swap_u8(x)); } ++ ++ ++ // Efficient swapping of byte ordering ++ static inline u2 swap_u2(u2 x); // compiler-dependent implementation ++ static inline u4 swap_u4(u4 x); // compiler-dependent implementation ++ static inline u8 swap_u8(u8 x); ++}; ++ ++ ++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base] ++#include OS_CPU_HEADER_INLINE(bytes) ++ ++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp +new file mode 100644 +index 0000000000..ef11827abf +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp +@@ -0,0 +1,95 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the server compiler. ++// (see c2_globals.hpp). Alpha-sorted. ++define_pd_global(bool, BackgroundCompilation, true); ++define_pd_global(bool, UseTLAB, true); ++define_pd_global(bool, ResizeTLAB, true); ++define_pd_global(bool, CICompileOSR, true); ++define_pd_global(bool, InlineIntrinsics, true); ++define_pd_global(bool, PreferInterpreterNativeStubs, false); ++define_pd_global(bool, ProfileTraps, true); ++define_pd_global(bool, UseOnStackReplacement, true); ++#ifdef CC_INTERP ++define_pd_global(bool, ProfileInterpreter, false); ++#else ++define_pd_global(bool, ProfileInterpreter, true); ++#endif // CC_INTERP ++// Disable C1 in server JIT ++define_pd_global(bool, TieredCompilation, false); ++define_pd_global(intx, CompileThreshold, 10000); ++define_pd_global(intx, BackEdgeThreshold, 100000); ++ ++define_pd_global(intx, OnStackReplacePercentage, 140); ++define_pd_global(intx, ConditionalMoveLimit, 3); ++define_pd_global(intx, FLOATPRESSURE, 6); ++define_pd_global(intx, FreqInlineSize, 325); ++define_pd_global(intx, MinJumpTableSize, 10); ++define_pd_global(intx, INTPRESSURE, 13); ++define_pd_global(intx, InteriorEntryAlignment, 16); ++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K)); ++define_pd_global(intx, LoopUnrollLimit, 60); ++define_pd_global(intx, LoopPercentProfileLimit, 10); ++// InitialCodeCacheSize derived from specjbb2000 run. ++define_pd_global(intx, InitialCodeCacheSize, 2496*K); // Integral multiple of CodeCacheExpansionSize ++define_pd_global(intx, CodeCacheExpansionSize, 64*K); ++ ++// Ergonomics related flags ++define_pd_global(uint64_t,MaxRAM, 128ULL*G); ++define_pd_global(intx, RegisterCostAreaRatio, 16000); ++ ++// Peephole and CISC spilling both break the graph, and so makes the ++// scheduler sick. ++define_pd_global(bool, OptoPeephole, false); ++define_pd_global(bool, UseCISCSpill, false); ++define_pd_global(bool, OptoScheduling, false); ++define_pd_global(bool, OptoBundling, false); ++define_pd_global(bool, OptoRegScheduling, false); ++define_pd_global(bool, SuperWordLoopUnrollAnalysis, true); ++define_pd_global(bool, IdealizeClearArrayNode, true); ++ ++define_pd_global(intx, ReservedCodeCacheSize, 120*M); ++define_pd_global(intx, NonProfiledCodeHeapSize, 57*M); ++define_pd_global(intx, ProfiledCodeHeapSize, 58*M); ++define_pd_global(intx, NonNMethodCodeHeapSize, 5*M ); ++define_pd_global(uintx, CodeCacheMinBlockLength, 4); ++define_pd_global(uintx, CodeCacheMinimumUseSpace, 400*K); ++ ++define_pd_global(bool, TrapBasedRangeChecks, false); ++ ++// Heap related flags ++define_pd_global(uintx,MetaspaceSize, ScaleForWordSize(16*M)); ++ ++// Ergonomics related flags ++define_pd_global(bool, NeverActAsServerClassMachine, false); ++ ++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp +new file mode 100644 +index 0000000000..e6d5815f42 +--- /dev/null ++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "opto/compile.hpp" ++#include "opto/node.hpp" ++ ++// processor dependent initialization for mips ++ ++void Compile::pd_compiler2_init() { ++ guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" ); ++} +diff --git a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp +new file mode 100644 +index 0000000000..3cc191006d +--- /dev/null ++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP ++ ++private: ++ void pd_initialize() {} ++ ++public: ++ void flush_bundle(bool start_new_bundle) {} ++ ++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp +new file mode 100644 +index 0000000000..068ca4799d +--- /dev/null ++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp +@@ -0,0 +1,151 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/compiledIC.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nmethod.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/safepoint.hpp" ++ ++// ---------------------------------------------------------------------------- ++ ++#define __ _masm. ++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) { ++ ++ if (mark == NULL) { ++ mark = cbuf.insts_mark(); // get mark within main instrs section ++ } ++ ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a stub. ++ MacroAssembler _masm(&cbuf); ++ ++ address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size()); ++ if (base == NULL) return NULL; // CodeBuffer::expand failed ++ // static stub relocation stores the instruction address of the call ++ ++ __ relocate(static_stub_Relocation::spec(mark), 0); ++ ++ // Code stream for loading method may be changed. ++ __ synci(R0, 0); ++ ++ // Rmethod contains methodOop, it should be relocated for GC ++ // static stub relocation also tags the methodOop in the code-stream. ++ __ mov_metadata(Rmethod, NULL); ++ // This is recognized as unresolved by relocs/nativeInst/ic code ++ ++ __ relocate(relocInfo::runtime_call_type); ++ ++ cbuf.set_insts_mark(); ++ address call_pc = (address)-1; ++ __ patchable_jump(call_pc); ++ __ align(16); ++ // Update current stubs pointer and restore code_end. ++ __ end_a_stub(); ++ return base; ++} ++#undef __ ++ ++int CompiledStaticCall::to_interp_stub_size() { ++ int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size; ++ return round_to(size, 16); ++} ++ ++int CompiledStaticCall::to_trampoline_stub_size() { ++ return NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size; ++} ++ ++// Relocation entries for call stub, compiled java to interpreter. ++int CompiledStaticCall::reloc_to_interp_stub() { ++ return 16; ++} ++ ++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) { ++ address stub = find_stub(false /* is_aot */); ++ guarantee(stub != NULL, "stub not found"); ++ ++ if (TraceICs) { ++ ResourceMark rm; ++ tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s", ++ p2i(instruction_address()), ++ callee->name_and_sig_as_C_string()); ++ } ++ ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(), ++ "a) MT-unsafe modification of inline cache"); ++ assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry, ++ "b) MT-unsafe modification of inline cache"); ++ ++ // Update stub. ++ method_holder->set_data((intptr_t)callee()); ++ jump->set_jump_destination(entry); ++ ++ // Update jump to call. ++ set_destination_mt_safe(stub); ++} ++ ++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) { ++ assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call"); ++ // Reset stub. ++ address stub = static_stub->addr(); ++ assert(stub != NULL, "stub not found"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ method_holder->set_data(0); ++ jump->set_jump_destination((address)-1); ++} ++ ++//----------------------------------------------------------------------------- ++// Non-product mode code ++#ifndef PRODUCT ++ ++void CompiledDirectStaticCall::verify() { ++ // Verify call. ++ _call->verify(); ++ if (os::is_MP()) { ++ _call->verify_alignment(); ++ } ++ ++ // Verify stub. ++ address stub = find_stub(false /* is_aot */); ++ assert(stub != NULL, "no stub found for static call"); ++ // Creation also verifies the object. ++ NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size); ++ NativeGeneralJump* jump = nativeGeneralJump_at(method_holder->next_instruction_address()); ++ ++ ++ // Verify state. ++ assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check"); ++} ++ ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp +new file mode 100644 +index 0000000000..dcc77adfec +--- /dev/null ++++ b/src/hotspot/cpu/mips/copy_mips.hpp +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP ++#define CPU_MIPS_VM_COPY_MIPS_HPP ++ ++// Inline functions for memory copy and fill. ++ ++// Contains inline asm implementations ++#include OS_CPU_HEADER_INLINE(copy) ++ ++// Template for atomic, element-wise copy. ++template ++static void copy_conjoint_atomic(const T* from, T* to, size_t count) { ++ if (from > to) { ++ while (count-- > 0) { ++ // Copy forwards ++ *to++ = *from++; ++ } ++ } else { ++ from += count - 1; ++ to += count - 1; ++ while (count-- > 0) { ++ // Copy backwards ++ *to-- = *from--; ++ } ++ } ++} ++ ++ ++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) { ++ julong* to = (julong*) tohw; ++ julong v = ((julong) value << 32) | value; ++ while (count-- > 0) { ++ *to++ = v; ++ } ++} ++ ++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) { ++ pd_fill_to_words(tohw, count, value); ++} ++ ++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) { ++ (void)memset(to, value, count); ++} ++ ++static void pd_zero_to_words(HeapWord* tohw, size_t count) { ++ pd_fill_to_words(tohw, count, 0); ++} ++ ++static void pd_zero_to_bytes(void* to, size_t count) { ++ (void)memset(to, 0, count); ++} ++ ++#endif //CPU_MIPS_VM_COPY_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp +new file mode 100644 +index 0000000000..756ccb68f9 +--- /dev/null ++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp +@@ -0,0 +1,30 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "compiler/disassembler.hpp" ++#include "depChecker_mips.hpp" ++ ++// Nothing to do on mips +diff --git a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp +new file mode 100644 +index 0000000000..11e52b4e8f +--- /dev/null ++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp +@@ -0,0 +1,31 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP ++ ++// Nothing to do on MIPS ++ ++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp +new file mode 100644 +index 0000000000..c5f3a8888d +--- /dev/null ++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP ++ ++ static int pd_instruction_alignment() { ++ return sizeof(int); ++ } ++ ++ static const char* pd_cpu_opts() { ++ return "gpr-names=64"; ++ } ++ ++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp +new file mode 100644 +index 0000000000..d49bd6290d +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.cpp +@@ -0,0 +1,690 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/markOop.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/monitorChunk.hpp" ++#include "runtime/signature.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "vmreg_mips.inline.hpp" ++ ++#ifdef ASSERT ++void RegisterMap::check_location_valid() { ++} ++#endif ++ ++ ++// Profiling/safepoint support ++// for Profiling - acting on another frame. walks sender frames ++// if valid. ++// frame profile_find_Java_sender_frame(JavaThread *thread); ++ ++bool frame::safe_for_sender(JavaThread *thread) { ++ address sp = (address)_sp; ++ address fp = (address)_fp; ++ address unextended_sp = (address)_unextended_sp; ++ ++ // consider stack guards when trying to determine "safe" stack pointers ++ static size_t stack_guard_size = os::uses_stack_guard_pages() ? ++ JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0; ++ size_t usable_stack_size = thread->stack_size() - stack_guard_size; ++ ++ // sp must be within the usable part of the stack (not in guards) ++ bool sp_safe = (sp < thread->stack_base()) && ++ (sp >= thread->stack_base() - usable_stack_size); ++ ++ ++ if (!sp_safe) { ++ return false; ++ } ++ ++ // unextended sp must be within the stack and above or equal sp ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && ++ (unextended_sp >= sp); ++ ++ if (!unextended_sp_safe) { ++ return false; ++ } ++ ++ // an fp must be within the stack and above (but not equal) sp ++ // second evaluation on fp+ is added to handle situation where fp is -1 ++ bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base()))); ++ ++ // We know sp/unextended_sp are safe only fp is questionable here ++ ++ // If the current frame is known to the code cache then we can attempt to ++ // construct the sender and do some validation of it. This goes a long way ++ // toward eliminating issues when we get in frame construction code ++ ++ if (_cb != NULL ) { ++ ++ // First check if frame is complete and tester is reliable ++ // Unfortunately we can only check frame complete for runtime stubs and nmethod ++ // other generic buffer blobs are more problematic so we just assume they are ++ // ok. adapter blobs never have a frame complete and are never ok. ++ ++ if (!_cb->is_frame_complete_at(_pc)) { ++ if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) { ++ return false; ++ } ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!_cb->code_contains(_pc)) { ++ return false; ++ } ++ ++ // Entry frame checks ++ if (is_entry_frame()) { ++ // an entry frame must have a valid fp. ++ return fp_safe && is_entry_frame_valid(thread); ++ } ++ ++ intptr_t* sender_sp = NULL; ++ intptr_t* sender_unextended_sp = NULL; ++ address sender_pc = NULL; ++ intptr_t* saved_fp = NULL; ++ ++ if (is_interpreted_frame()) { ++ // fp must be safe ++ if (!fp_safe) { ++ return false; ++ } ++ ++ sender_pc = (address) this->fp()[return_addr_offset]; ++ // for interpreted frames, the value below is the sender "raw" sp, ++ // which can be different from the sender unextended sp (the sp seen ++ // by the sender) because of current frame local variables ++ sender_sp = (intptr_t*) addr_at(sender_sp_offset); ++ sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset]; ++ saved_fp = (intptr_t*) this->fp()[link_offset]; ++ ++ } else { ++ // must be some sort of compiled/runtime frame ++ // fp does not have to be safe (although it could be check for c1?) ++ ++ // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc ++ if (_cb->frame_size() <= 0) { ++ return false; ++ } ++ ++ sender_sp = _unextended_sp + _cb->frame_size(); ++ // Is sender_sp safe? ++ if ((address)sender_sp >= thread->stack_base()) { ++ return false; ++ } ++ sender_unextended_sp = sender_sp; ++ // On MIPS the return_address is always the word on the stack ++ sender_pc = (address) *(sender_sp-1); ++ // Note: frame::sender_sp_offset is only valid for compiled frame ++ saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset); ++ } ++ ++ ++ // If the potential sender is the interpreter then we can do some more checking ++ if (Interpreter::contains(sender_pc)) { ++ ++ // FP is always saved in a recognizable place in any code we generate. However ++ // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP ++ // is really a frame pointer. ++ ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ return sender.is_interpreted_frame_valid(thread); ++ ++ } ++ ++ // We must always be able to find a recognizable pc ++ CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc); ++ if (sender_pc == NULL || sender_blob == NULL) { ++ return false; ++ } ++ ++ // Could be a zombie method ++ if (sender_blob->is_zombie() || sender_blob->is_unloaded()) { ++ return false; ++ } ++ ++ // Could just be some random pointer within the codeBlob ++ if (!sender_blob->code_contains(sender_pc)) { ++ return false; ++ } ++ ++ // We should never be able to see an adapter if the current frame is something from code cache ++ if (sender_blob->is_adapter_blob()) { ++ return false; ++ } ++ ++ // Could be the call_stub ++ if (StubRoutines::returns_to_call_stub(sender_pc)) { ++ bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp); ++ ++ if (!saved_fp_safe) { ++ return false; ++ } ++ ++ // construct the potential sender ++ ++ frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc); ++ ++ // Validate the JavaCallWrapper an entry frame must have ++ address jcw = (address)sender.entry_frame_call_wrapper(); ++ ++ bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp()); ++ ++ return jcw_safe; ++ } ++ ++ CompiledMethod* nm = sender_blob->as_compiled_method_or_null(); ++ if (nm != NULL) { ++ if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) || ++ nm->method()->is_method_handle_intrinsic()) { ++ return false; ++ } ++ } ++ ++ // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size ++ // because the return address counts against the callee's frame. ++ ++ if (sender_blob->frame_size() <= 0) { ++ assert(!sender_blob->is_compiled(), "should count return address at least"); ++ return false; ++ } ++ ++ // We should never be able to see anything here except an nmethod. If something in the ++ // code cache (current frame) is called by an entity within the code cache that entity ++ // should not be anything but the call stub (already covered), the interpreter (already covered) ++ // or an nmethod. ++ ++ if (!sender_blob->is_compiled()) { ++ return false; ++ } ++ ++ // Could put some more validation for the potential non-interpreted sender ++ // frame we'd create by calling sender if I could think of any. Wait for next crash in forte... ++ ++ // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb ++ ++ // We've validated the potential sender that would be created ++ return true; ++ } ++ ++ // Must be native-compiled frame. Since sender will try and use fp to find ++ // linkages it must be safe ++ ++ if (!fp_safe) { ++ return false; ++ } ++ ++ // Will the pc we fetch be non-zero (which we'll find at the oldest frame) ++ ++ if ( (address) this->fp()[return_addr_offset] == NULL) return false; ++ ++ ++ // could try and do some more potential verification of native frame if we could think of some... ++ ++ return true; ++ ++} ++ ++void frame::patch_pc(Thread* thread, address pc) { ++ address* pc_addr = &(((address*) sp())[-1]); ++ if (TracePcPatching) { ++ tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]", ++ p2i(pc_addr), p2i(*pc_addr), p2i(pc)); ++ } ++ // Either the return address is the original one or we are going to ++ // patch in the same address that's already there. ++ assert(_pc == *pc_addr || pc == *pc_addr, "must be"); ++ *pc_addr = pc; ++ _cb = CodeCache::find_blob(pc); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ assert(original_pc == _pc, "expected original PC to be stored before patching"); ++ _deopt_state = is_deoptimized; ++ // leave _pc as is ++ } else { ++ _deopt_state = not_deoptimized; ++ _pc = pc; ++ } ++} ++ ++bool frame::is_interpreted_frame() const { ++ return Interpreter::contains(pc()); ++} ++ ++int frame::frame_size(RegisterMap* map) const { ++ frame sender = this->sender(map); ++ return sender.sp() - sp(); ++} ++ ++intptr_t* frame::entry_frame_argument_at(int offset) const { ++ // convert offset to index to deal with tsi ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ // Entry frame's arguments are always in relation to unextended_sp() ++ return &unextended_sp()[index]; ++} ++ ++// sender_sp ++#ifdef CC_INTERP ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ // QQQ why does this specialize method exist if frame::sender_sp() does same thing? ++ // seems odd and if we always know interpreted vs. non then sender_sp() is really ++ // doing too much work. ++ return get_interpreterState()->sender_sp(); ++} ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return get_interpreterState()->monitor_base(); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ return (BasicObjectLock*) get_interpreterState()->stack_base(); ++} ++ ++#else // CC_INTERP ++ ++intptr_t* frame::interpreter_frame_sender_sp() const { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ return (intptr_t*) at(interpreter_frame_sender_sp_offset); ++} ++ ++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) { ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp); ++} ++ ++ ++// monitor elements ++ ++BasicObjectLock* frame::interpreter_frame_monitor_begin() const { ++ return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset); ++} ++ ++BasicObjectLock* frame::interpreter_frame_monitor_end() const { ++ BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset); ++ // make sure the pointer points inside the frame ++ assert((intptr_t) fp() > (intptr_t) result, "result must < than frame pointer"); ++ assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer"); ++ return result; ++} ++ ++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) { ++ *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value; ++} ++ ++// Used by template based interpreter deoptimization ++void frame::interpreter_frame_set_last_sp(intptr_t* sp) { ++ *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp; ++} ++#endif // CC_INTERP ++ ++frame frame::sender_for_entry_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ // Java frame called from C; skip all C frames and return top C ++ // frame of that chunk as the sender ++ JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor(); ++ assert(!entry_frame_is_first(), "next Java fp must be non zero"); ++ assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack"); ++ map->clear(); ++ assert(map->include_argument_oops(), "should be set by clear"); ++ if (jfa->last_Java_pc() != NULL ) { ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc()); ++ return fr; ++ } ++ frame fr(jfa->last_Java_sp(), jfa->last_Java_fp()); ++ return fr; ++} ++ ++frame frame::sender_for_interpreter_frame(RegisterMap* map) const { ++ // sp is the raw sp from the sender after adapter or interpreter extension ++ intptr_t* sender_sp = this->sender_sp(); ++ ++ // This is the sp before any possible extension (adapter/locals). ++ intptr_t* unextended_sp = interpreter_frame_sender_sp(); ++ ++ // The interpreter and compiler(s) always save FP in a known ++ // location on entry. We must record where that location is ++ // so this if FP was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves FP if we record where it is then ++ // we don't have to always save FP on entry and exit to c2 compiled ++ // code, on entry will be enough. ++#ifdef COMPILER2 ++ if (map->update_map()) { ++ update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset)); ++ } ++#endif /* COMPILER2 */ ++ return frame(sender_sp, unextended_sp, link(), sender_pc()); ++} ++ ++ ++//------------------------------------------------------------------------------ ++// frame::verify_deopt_original_pc ++// ++// Verifies the calculated original PC of a deoptimization PC for the ++// given unextended SP. The unextended SP might also be the saved SP ++// for MethodHandle call sites. ++#ifdef ASSERT ++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) { ++ frame fr; ++ ++ // This is ugly but it's better than to change {get,set}_original_pc ++ // to take an SP value as argument. And it's only a debugging ++ // method anyway. ++ fr._unextended_sp = unextended_sp; ++ ++ address original_pc = nm->get_original_pc(&fr); ++ assert(nm->insts_contains(original_pc), ++ "original PC must be in the main code section of the the compiled method (or must be immediately following it)"); ++} ++#endif ++ ++ ++//------------------------------------------------------------------------------ ++// frame::adjust_unextended_sp ++void frame::adjust_unextended_sp() { ++ // On MIPS, sites calling method handle intrinsics and lambda forms are treated ++ // as any other call site. Therefore, no special action is needed when we are ++ // returning to any of these call sites. ++ ++ if (_cb != NULL) { ++ CompiledMethod* sender_cm = _cb->as_compiled_method_or_null(); ++ if (sender_cm != NULL) { ++ // If the sender PC is a deoptimization point, get the original PC. ++ if (sender_cm->is_deopt_entry(_pc) || ++ sender_cm->is_deopt_mh_entry(_pc)) { ++ DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp)); ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------------------ ++// frame::update_map_with_saved_link ++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) { ++ // The interpreter and compiler(s) always save fp in a known ++ // location on entry. We must record where that location is ++ // so that if fp was live on callout from c2 we can find ++ // the saved copy no matter what it called. ++ ++ // Since the interpreter always saves fp if we record where it is then ++ // we don't have to always save fp on entry and exit to c2 compiled ++ // code, on entry will be enough. ++ map->set_location(FP->as_VMReg(), (address) link_addr); ++ // this is weird "H" ought to be at a higher address however the ++ // oopMaps seems to have the "H" regs at the same address and the ++ // vanilla register. ++ // XXXX make this go away ++ if (true) { ++ map->set_location(FP->as_VMReg()->next(), (address) link_addr); ++ } ++} ++ ++//------------------------------sender_for_compiled_frame----------------------- ++frame frame::sender_for_compiled_frame(RegisterMap* map) const { ++ assert(map != NULL, "map must be set"); ++ ++ // frame owned by optimizing compiler ++ assert(_cb->frame_size() >= 0, "must have non-zero frame size"); ++ ++ intptr_t* sender_sp = unextended_sp() + _cb->frame_size(); ++ intptr_t* unextended_sp = sender_sp; ++ ++ // On Loongson the return_address is always the word on the stack ++ // the fp in compiler points to sender fp, but in interpreter, fp points to return address, ++ // so getting sender for compiled frame is not same as interpreter frame. ++ // we hard code here temporarily ++ // spark ++ address sender_pc = (address) *(sender_sp-1); ++ ++ intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset); ++ ++ if (map->update_map()) { ++ // Tell GC to use argument oopmaps for some runtime stubs that need it. ++ // For C1, the runtime stub might not have oop maps, so set this flag ++ // outside of update_register_map. ++ map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread())); ++ if (_cb->oop_maps() != NULL) { ++ OopMapSet::update_register_map(this, map); ++ } ++ ++ // Since the prolog does the save and restore of epb there is no oopmap ++ // for it so we must fill in its location as if there was an oopmap entry ++ // since if our caller was compiled code there could be live jvm state in it. ++ update_map_with_saved_link(map, saved_fp_addr); ++ } ++ assert(sender_sp != sp(), "must have changed"); ++ return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc); ++} ++ ++frame frame::sender(RegisterMap* map) const { ++ // Default is we done have to follow them. The sender_for_xxx will ++ // update it accordingly ++ map->set_include_argument_oops(false); ++ ++ if (is_entry_frame()) return sender_for_entry_frame(map); ++ if (is_interpreted_frame()) return sender_for_interpreter_frame(map); ++ assert(_cb == CodeCache::find_blob(pc()),"Must be the same"); ++ ++ if (_cb != NULL) { ++ return sender_for_compiled_frame(map); ++ } ++ // Must be native-compiled frame, i.e. the marshaling code for native ++ // methods that exists in the core system. ++ return frame(sender_sp(), link(), sender_pc()); ++} ++ ++bool frame::is_interpreted_frame_valid(JavaThread* thread) const { ++// QQQ ++#ifdef CC_INTERP ++#else ++ assert(is_interpreted_frame(), "Not an interpreted frame"); ++ // These are reasonable sanity checks ++ if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) { ++ return false; ++ } ++ if (fp() + interpreter_frame_initial_sp_offset < sp()) { ++ return false; ++ } ++ // These are hacks to keep us out of trouble. ++ // The problem with these is that they mask other problems ++ if (fp() <= sp()) { // this attempts to deal with unsigned comparison above ++ return false; ++ } ++ ++ // do some validation of frame elements ++ ++ // first the method ++ ++ Method* m = *interpreter_frame_method_addr(); ++ ++ // validate the method we'd find in this potential sender ++ if (!Method::is_valid_method(m)) return false; ++ ++ // stack frames shouldn't be much larger than max_stack elements ++ ++ //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) { ++ if (fp() - sp() > 4096) { // stack frames shouldn't be large. ++ return false; ++ } ++ ++ // validate bci/bcp ++ ++ address bcp = interpreter_frame_bcp(); ++ if (m->validate_bci_from_bcp(bcp) < 0) { ++ return false; ++ } ++ ++ // validate ConstantPoolCache* ++ ++ ConstantPoolCache* cp = *interpreter_frame_cache_addr(); ++ ++ if (MetaspaceObj::is_valid(cp) == false) return false; ++ ++ // validate locals ++ ++ address locals = (address) *interpreter_frame_locals_addr(); ++ ++ if (locals > thread->stack_base() || locals < (address) fp()) return false; ++ ++ // We'd have to be pretty unlucky to be mislead at this point ++ ++#endif // CC_INTERP ++ return true; ++} ++ ++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) { ++#ifdef CC_INTERP ++ // Needed for JVMTI. The result should always be in the interpreterState object ++ assert(false, "NYI"); ++ interpreterState istate = get_interpreterState(); ++#endif // CC_INTERP ++ assert(is_interpreted_frame(), "interpreted frame expected"); ++ Method* method = interpreter_frame_method(); ++ BasicType type = method->result_type(); ++ ++ intptr_t* tos_addr; ++ if (method->is_native()) { ++ // Prior to calling into the runtime to report the method_exit the possible ++ // return value is pushed to the native stack. If the result is a jfloat/jdouble ++ // then ST0 is saved. See the note in generate_native_result ++ tos_addr = (intptr_t*)sp(); ++ if (type == T_FLOAT || type == T_DOUBLE) { ++ tos_addr += 2; ++ } ++ } else { ++ tos_addr = (intptr_t*)interpreter_frame_tos_address(); ++ } ++ ++ switch (type) { ++ case T_OBJECT : ++ case T_ARRAY : { ++ oop obj; ++ if (method->is_native()) { ++#ifdef CC_INTERP ++ obj = istate->_oop_temp; ++#else ++ obj = cast_to_oop(at(interpreter_frame_oop_temp_offset)); ++#endif // CC_INTERP ++ } else { ++ oop* obj_p = (oop*)tos_addr; ++ obj = (obj_p == NULL) ? (oop)NULL : *obj_p; ++ } ++ assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check"); ++ *oop_result = obj; ++ break; ++ } ++ case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break; ++ case T_BYTE : value_result->b = *(jbyte*)tos_addr; break; ++ case T_CHAR : value_result->c = *(jchar*)tos_addr; break; ++ case T_SHORT : value_result->s = *(jshort*)tos_addr; break; ++ case T_INT : value_result->i = *(jint*)tos_addr; break; ++ case T_LONG : value_result->j = *(jlong*)tos_addr; break; ++ case T_FLOAT : value_result->f = *(jfloat*)tos_addr; break; ++ case T_DOUBLE : value_result->d = *(jdouble*)tos_addr; break; ++ case T_VOID : /* Nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ return type; ++} ++ ++ ++intptr_t* frame::interpreter_frame_tos_at(jint offset) const { ++ int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize); ++ return &interpreter_frame_tos_address()[index]; ++} ++ ++#ifndef PRODUCT ++ ++#define DESCRIBE_FP_OFFSET(name) \ ++ values.describe(frame_no, fp() + frame::name##_offset, #name) ++ ++void frame::describe_pd(FrameValues& values, int frame_no) { ++ if (is_interpreted_frame()) { ++ DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_last_sp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_method); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mirror); ++ DESCRIBE_FP_OFFSET(interpreter_frame_mdp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_cache); ++ DESCRIBE_FP_OFFSET(interpreter_frame_locals); ++ DESCRIBE_FP_OFFSET(interpreter_frame_bcp); ++ DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp); ++ } ++} ++#endif ++ ++intptr_t *frame::initial_deoptimization_info() { ++ // used to reset the saved FP ++ return fp(); ++} ++ ++intptr_t* frame::real_fp() const { ++ if (_cb != NULL) { ++ // use the frame size if valid ++ int size = _cb->frame_size(); ++ if (size > 0) { ++ return unextended_sp() + size; ++ } ++ } ++ // else rely on fp() ++ assert(! is_compiled_frame(), "unknown compiled frame size"); ++ return fp(); ++} ++ ++#ifndef PRODUCT ++// This is a generic constructor which is only used by pns() in debug.cpp. ++frame::frame(void* sp, void* fp, void* pc) { ++ init((intptr_t*)sp, (intptr_t*)fp, (address)pc); ++} ++ ++void frame::pd_ps() {} ++#endif +diff --git a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp +new file mode 100644 +index 0000000000..bdbfa8aaa2 +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.hpp +@@ -0,0 +1,215 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_HPP ++ ++#include "runtime/synchronizer.hpp" ++ ++// A frame represents a physical stack frame (an activation). Frames can be ++// C or Java frames, and the Java frames can be interpreted or compiled. ++// In contrast, vframes represent source-level activations, so that one physical frame ++// can correspond to multiple source level frames because of inlining. ++// A frame is comprised of {pc, fp, sp} ++// ------------------------------ Asm interpreter ---------------------------------------- ++// Layout of asm interpreter frame: ++// [expression stack ] * <- sp ++// [monitors ] \ ++// ... | monitor block size ++// [monitors ] / ++// [monitor block size ] ++// [byte code index/pointr] = bcx() bcx_offset ++// [pointer to locals ] = locals() locals_offset ++// [constant pool cache ] = cache() cache_offset ++// [methodData ] = mdp() mdx_offset ++// [methodOop ] = method() method_offset ++// [last sp ] = last_sp() last_sp_offset ++// [old stack pointer ] (sender_sp) sender_sp_offset ++// [old frame pointer ] <- fp = link() ++// [return pc ] ++// [oop temp ] (only for native calls) ++// [locals and parameters ] ++// <- sender sp ++// ------------------------------ Asm interpreter ---------------------------------------- ++ ++// ------------------------------ C++ interpreter ---------------------------------------- ++// ++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run) ++// ++// <- SP (current sp) ++// [local variables ] BytecodeInterpreter::run local variables ++// ... BytecodeInterpreter::run local variables ++// [local variables ] BytecodeInterpreter::run local variables ++// [old frame pointer ] fp [ BytecodeInterpreter::run's fp ] ++// [return pc ] (return to frame manager) ++// [interpreter_state* ] (arg to BytecodeInterpreter::run) -------------- ++// [expression stack ] <- last_Java_sp | ++// [... ] * <- interpreter_state.stack | ++// [expression stack ] * <- interpreter_state.stack_base | ++// [monitors ] \ | ++// ... | monitor block size | ++// [monitors ] / <- interpreter_state.monitor_base | ++// [struct interpretState ] <-----------------------------------------| ++// [return pc ] (return to callee of frame manager [1] ++// [locals and parameters ] ++// <- sender sp ++ ++// [1] When the c++ interpreter calls a new method it returns to the frame ++// manager which allocates a new frame on the stack. In that case there ++// is no real callee of this newly allocated frame. The frame manager is ++// aware of the additional frame(s) and will pop them as nested calls ++// complete. Howevers tTo make it look good in the debugger the frame ++// manager actually installs a dummy pc pointing to RecursiveInterpreterActivation ++// with a fake interpreter_state* parameter to make it easy to debug ++// nested calls. ++ ++// Note that contrary to the layout for the assembly interpreter the ++// expression stack allocated for the C++ interpreter is full sized. ++// However this is not as bad as it seems as the interpreter frame_manager ++// will truncate the unused space on succesive method calls. ++// ++// ------------------------------ C++ interpreter ---------------------------------------- ++ ++// Layout of interpreter frame: ++// ++// [ monitor entry ] <--- sp ++// ... ++// [ monitor entry ] ++// -9 [ monitor block top ] ( the top monitor entry ) ++// -8 [ byte code pointer ] (if native, bcp = 0) ++// -7 [ constant pool cache ] ++// -6 [ methodData ] mdx_offset(not core only) ++// -5 [ mirror ] ++// -4 [ methodOop ] ++// -3 [ locals offset ] ++// -2 [ last_sp ] ++// -1 [ sender's sp ] ++// 0 [ sender's fp ] <--- fp ++// 1 [ return address ] ++// 2 [ oop temp offset ] (only for native calls) ++// 3 [ result handler offset ] (only for native calls) ++// 4 [ result type info ] (only for native calls) ++// [ local var m-1 ] ++// ... ++// [ local var 0 ] ++// [ argumnet word n-1 ] <--- ( sender's sp ) ++// ... ++// [ argument word 0 ] <--- S7 ++ ++ public: ++ enum { ++ pc_return_offset = 0, ++ // All frames ++ link_offset = 0, ++ return_addr_offset = 1, ++ // non-interpreter frames ++ sender_sp_offset = 2, ++ ++ // Interpreter frames ++ interpreter_frame_return_addr_offset = 1, ++ interpreter_frame_result_handler_offset = 3, // for native calls only ++ interpreter_frame_oop_temp_offset = 2, // for native calls only ++ ++ interpreter_frame_sender_fp_offset = 0, ++ interpreter_frame_sender_sp_offset = -1, ++ // outgoing sp before a call to an invoked method ++ interpreter_frame_last_sp_offset = interpreter_frame_sender_sp_offset - 1, ++ interpreter_frame_locals_offset = interpreter_frame_last_sp_offset - 1, ++ interpreter_frame_method_offset = interpreter_frame_locals_offset - 1, ++ interpreter_frame_mirror_offset = interpreter_frame_method_offset - 1, ++ interpreter_frame_mdp_offset = interpreter_frame_mirror_offset - 1, ++ interpreter_frame_cache_offset = interpreter_frame_mdp_offset - 1, ++ interpreter_frame_bcp_offset = interpreter_frame_cache_offset - 1, ++ interpreter_frame_initial_sp_offset = interpreter_frame_bcp_offset - 1, ++ ++ interpreter_frame_monitor_block_top_offset = interpreter_frame_initial_sp_offset, ++ interpreter_frame_monitor_block_bottom_offset = interpreter_frame_initial_sp_offset, ++ ++ // Entry frames ++ entry_frame_call_wrapper_offset = -9, ++ ++ // Native frames ++ ++ native_frame_initial_param_offset = 2 ++ ++ }; ++ ++ intptr_t ptr_at(int offset) const { ++ return *ptr_at_addr(offset); ++ } ++ ++ void ptr_at_put(int offset, intptr_t value) { ++ *ptr_at_addr(offset) = value; ++ } ++ ++ private: ++ // an additional field beyond _sp and _pc: ++ intptr_t* _fp; // frame pointer ++ // The interpreter and adapters will extend the frame of the caller. ++ // Since oopMaps are based on the sp of the caller before extension ++ // we need to know that value. However in order to compute the address ++ // of the return address we need the real "raw" sp. Since sparc already ++ // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's ++ // original sp we use that convention. ++ ++ intptr_t* _unextended_sp; ++ void adjust_unextended_sp(); ++ ++ intptr_t* ptr_at_addr(int offset) const { ++ return (intptr_t*) addr_at(offset); ++ } ++#ifdef ASSERT ++ // Used in frame::sender_for_{interpreter,compiled}_frame ++ static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp); ++#endif ++ ++ public: ++ // Constructors ++ ++ frame(intptr_t* sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc); ++ ++ frame(intptr_t* sp, intptr_t* fp); ++ ++ void init(intptr_t* sp, intptr_t* fp, address pc); ++ ++ // accessors for the instance variables ++ intptr_t* fp() const { return _fp; } ++ ++ inline address* sender_pc_addr() const; ++ ++ // expression stack tos if we are nested in a java call ++ intptr_t* interpreter_frame_last_sp() const; ++ ++ // helper to update a map with callee-saved FP ++ static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr); ++ ++ // deoptimization support ++ void interpreter_frame_set_last_sp(intptr_t* sp); ++ ++ static jint interpreter_frame_expression_stack_direction() { return -1; } ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp +new file mode 100644 +index 0000000000..c408f01d69 +--- /dev/null ++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp +@@ -0,0 +1,238 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP ++ ++#include "code/codeCache.hpp" ++#include "code/vmreg.inline.hpp" ++ ++// Inline functions for Loongson frames: ++ ++// Constructors: ++ ++inline frame::frame() { ++ _pc = NULL; ++ _sp = NULL; ++ _unextended_sp = NULL; ++ _fp = NULL; ++ _cb = NULL; ++ _deopt_state = unknown; ++} ++ ++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) { ++ init(sp, fp, pc); ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) { ++ _sp = sp; ++ _unextended_sp = unextended_sp; ++ _fp = fp; ++ _pc = pc; ++ assert(pc != NULL, "no pc?"); ++ _cb = CodeCache::find_blob(pc); ++ adjust_unextended_sp(); ++ ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++inline frame::frame(intptr_t* sp, intptr_t* fp) { ++ _sp = sp; ++ _unextended_sp = sp; ++ _fp = fp; ++ _pc = (address)(sp[-1]); ++ ++ // Here's a sticky one. This constructor can be called via AsyncGetCallTrace ++ // when last_Java_sp is non-null but the pc fetched is junk. If we are truly ++ // unlucky the junk value could be to a zombied method and we'll die on the ++ // find_blob call. This is also why we can have no asserts on the validity ++ // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler ++ // -> pd_last_frame should use a specialized version of pd_last_frame which could ++ // call a specilaized frame constructor instead of this one. ++ // Then we could use the assert below. However this assert is of somewhat dubious ++ // value. ++ // assert(_pc != NULL, "no pc?"); ++ ++ _cb = CodeCache::find_blob(_pc); ++ adjust_unextended_sp(); ++ address original_pc = CompiledMethod::get_deopt_original_pc(this); ++ if (original_pc != NULL) { ++ _pc = original_pc; ++ _deopt_state = is_deoptimized; ++ } else { ++ _deopt_state = not_deoptimized; ++ } ++} ++ ++// Accessors ++ ++inline bool frame::equal(frame other) const { ++ bool ret = sp() == other.sp() ++ && unextended_sp() == other.unextended_sp() ++ && fp() == other.fp() ++ && pc() == other.pc(); ++ assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction"); ++ return ret; ++} ++ ++// Return unique id for this frame. The id must have a value where we can distinguish ++// identity and younger/older relationship. NULL represents an invalid (incomparable) ++// frame. ++inline intptr_t* frame::id(void) const { return unextended_sp(); } ++ ++// Relationals on frames based ++// Return true if the frame is younger (more recent activation) than the frame represented by id ++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() < id ; } ++ ++// Return true if the frame is older (less recent activation) than the frame represented by id ++inline bool frame::is_older(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id"); ++ return this->id() > id ; } ++ ++inline intptr_t* frame::link() const { ++ return (intptr_t*) *(intptr_t **)addr_at(link_offset); ++} ++ ++inline intptr_t* frame::link_or_null() const { ++ intptr_t** ptr = (intptr_t **)addr_at(link_offset); ++ return os::is_readable_pointer(ptr) ? *ptr : NULL; ++} ++ ++inline intptr_t* frame::unextended_sp() const { return _unextended_sp; } ++ ++// Return address: ++ ++inline address* frame::sender_pc_addr() const { return (address*) addr_at( return_addr_offset); } ++inline address frame::sender_pc() const { return *sender_pc_addr(); } ++ ++inline intptr_t* frame::sender_sp() const { return addr_at( sender_sp_offset); } ++ ++inline intptr_t** frame::interpreter_frame_locals_addr() const { ++ return (intptr_t**)addr_at(interpreter_frame_locals_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_last_sp() const { ++ return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset); ++} ++ ++inline intptr_t* frame::interpreter_frame_bcp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_bcp_offset); ++} ++ ++ ++inline intptr_t* frame::interpreter_frame_mdp_addr() const { ++ return (intptr_t*)addr_at(interpreter_frame_mdp_offset); ++} ++ ++ ++ ++// Constant pool cache ++ ++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const { ++ return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset); ++} ++ ++// Method ++ ++inline Method** frame::interpreter_frame_method_addr() const { ++ return (Method**)addr_at(interpreter_frame_method_offset); ++} ++ ++// Mirror ++ ++inline oop* frame::interpreter_frame_mirror_addr() const { ++ return (oop*)addr_at(interpreter_frame_mirror_offset); ++} ++ ++// top of expression stack ++inline intptr_t* frame::interpreter_frame_tos_address() const { ++ intptr_t* last_sp = interpreter_frame_last_sp(); ++ if (last_sp == NULL ) { ++ return sp(); ++ } else { ++ // sp() may have been extended by an adapter ++ assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos"); ++ return last_sp; ++ } ++} ++ ++inline oop* frame::interpreter_frame_temp_oop_addr() const { ++ return (oop *)(fp() + interpreter_frame_oop_temp_offset); ++} ++ ++inline int frame::interpreter_frame_monitor_size() { ++ return BasicObjectLock::size(); ++} ++ ++ ++// expression stack ++// (the max_stack arguments are used by the GC; see class FrameClosure) ++ ++inline intptr_t* frame::interpreter_frame_expression_stack() const { ++ intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end(); ++ return monitor_end-1; ++} ++ ++// Entry frames ++ ++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const { ++ return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset); ++} ++ ++// Compiled frames ++ ++inline oop frame::saved_oop_result(RegisterMap* map) const { ++ return *((oop*) map->location(V0->as_VMReg())); ++} ++ ++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) { ++ *((oop*) map->location(V0->as_VMReg())) = obj; ++} ++ ++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..179f7703c8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp +@@ -0,0 +1,364 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/g1/g1BarrierSet.hpp" ++#include "gc/g1/g1BarrierSetAssembler.hpp" ++#include "gc/g1/g1BarrierSetRuntime.hpp" ++#include "gc/g1/g1CardTable.hpp" ++#include "gc/g1/g1ThreadLocalData.hpp" ++#include "gc/g1/heapRegion.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "utilities/macros.hpp" ++ ++#define __ masm-> ++ ++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) { ++ bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0; ++ ++ if (!dest_uninitialized) { ++#ifndef OPT_THREAD ++ Register thread = T9; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ Label filtered; ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ ++ __ beq(AT, R0, filtered); ++ __ delayed()->nop(); ++ ++ __ pushad(); // push registers ++ if (count == A0) { ++ if (addr == A1) { ++ __ move(AT, A0); ++ __ move(A0, A1); ++ __ move(A1, AT); ++ } else { ++ __ move(A1, count); ++ __ move(A0, addr); ++ } ++ } else { ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ if (UseCompressedOops) { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2); ++ } ++ __ popad(); ++ ++ __ bind(filtered); ++ } ++} ++ ++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ __ pushad(); // push registers (overkill) ++ if (count == A0) { ++ assert_different_registers(A1, addr); ++ __ move(A1, count); ++ __ move(A0, addr); ++ } else { ++ assert_different_registers(A0, count); ++ __ move(A0, addr); ++ __ move(A1, count); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2); ++ __ popad(); ++} ++ ++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool on_oop = type == T_OBJECT || type == T_ARRAY; ++ bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0; ++ bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0; ++ bool on_reference = on_weak || on_phantom; ++ ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread); ++ if (on_oop && on_reference) { ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Generate the G1 pre-barrier code to log the value of ++ // the referent field in an SATB buffer. ++ g1_write_barrier_pre(masm /* masm */, ++ noreg /* obj */, ++ dst /* pre_val */, ++ thread /* thread */, ++ tmp1 /* tmp */, ++ true /* tosca_live */, ++ true /* expand_call */); ++ } ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call) { ++ // If expand_call is true then we expand the call_VM_leaf macro ++ // directly to skip generating the check by ++ // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp. ++ ++ assert(thread == TREG, "must be"); ++ ++ Label done; ++ Label runtime; ++ ++ assert(pre_val != noreg, "check this code"); ++ ++ if (obj != noreg) { ++ assert_different_registers(obj, pre_val, tmp); ++ assert(pre_val != V0, "check this code"); ++ } ++ ++ Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset())); ++ Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset())); ++ ++ // Is marking active? ++ if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) { ++ __ lw(AT, in_progress); ++ } else { ++ assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption"); ++ __ lb(AT, in_progress); ++ } ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // Do we need to load the previous value? ++ if (obj != noreg) { ++ __ load_heap_oop(pre_val, Address(obj, 0)); ++ } ++ ++ // Is the previous value null? ++ __ beq(pre_val, R0, done); ++ __ delayed()->nop(); ++ ++ // Can we store original value in the thread's buffer? ++ // Is index == 0? ++ // (The index field is typed as size_t.) ++ ++ __ ld(tmp, index); ++ __ beq(tmp, R0, runtime); ++ __ delayed()->nop(); ++ ++ __ daddiu(tmp, tmp, -1 * wordSize); ++ __ sd(tmp, index); ++ __ ld(AT, buffer); ++ __ daddu(tmp, tmp, AT); ++ ++ // Record the previous value ++ __ sd(pre_val, tmp, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ if (tosca_live) __ push(V0); ++ ++ if (obj != noreg && obj != V0) __ push(obj); ++ ++ if (pre_val != V0) __ push(pre_val); ++ ++ // Calling the runtime using the regular call_VM_leaf mechanism generates ++ // code (generated by InterpreterMacroAssember::call_VM_leaf_base) ++ // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL. ++ // ++ // If we care generating the pre-barrier without a frame (e.g. in the ++ // intrinsified Reference.get() routine) then ebp might be pointing to ++ // the caller frame and so this check will most likely fail at runtime. ++ // ++ // Expanding the call directly bypasses the generation of the check. ++ // So when we do not have have a full interpreter frame on the stack ++ // expand_call should be passed true. ++ ++ if (expand_call) { ++ assert(pre_val != A1, "smashed arg"); ++ if (thread != A1) __ move(A1, thread); ++ if (pre_val != A0) __ move(A0, pre_val); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } else { ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread); ++ } ++ ++ // save the live input values ++ if (pre_val != V0) ++ __ pop(pre_val); ++ ++ if (obj != noreg && obj != V0) ++ __ pop(obj); ++ ++ if (tosca_live) __ pop(V0); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2) { ++ assert_different_registers(tmp, tmp2, AT); ++ assert(thread == TREG, "must be"); ++ ++ Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset())); ++ Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset())); ++ ++ CardTableBarrierSet* ct = barrier_set_cast(BarrierSet::barrier_set()); ++ assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ Label done; ++ Label runtime; ++ ++ // Does store cross heap regions? ++ __ xorr(AT, store_addr, new_val); ++ __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // crosses regions, storing NULL? ++ __ beq(new_val, R0, done); ++ __ delayed()->nop(); ++ ++ // storing region crossing non-NULL, is card already dirty? ++ const Register card_addr = tmp; ++ const Register cardtable = tmp2; ++ ++ __ move(card_addr, store_addr); ++ __ dsrl(card_addr, card_addr, CardTable::card_shift); ++ // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT ++ // a valid address and therefore is not properly handled by the relocation code. ++ __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base()); ++ __ daddu(card_addr, card_addr, cardtable); ++ ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ __ sync(); ++ __ lb(AT, card_addr, 0); ++ __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val()); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ ++ // storing a region crossing, non-NULL oop, card is clean. ++ // dirty card and log. ++ __ move(AT, (int)G1CardTable::dirty_card_val()); ++ __ sb(AT, card_addr, 0); ++ ++ __ lw(AT, queue_index); ++ __ beq(AT, R0, runtime); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, -1 * wordSize); ++ __ sw(AT, queue_index); ++ __ ld(tmp2, buffer); ++ __ ld(AT, queue_index); ++ __ daddu(tmp2, tmp2, AT); ++ __ sd(card_addr, tmp2, 0); ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ ++ __ bind(runtime); ++ // save the live input values ++ __ push(store_addr); ++ __ push(new_val); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG); ++ __ pop(new_val); ++ __ pop(store_addr); ++ ++ __ bind(done); ++} ++ ++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool as_normal = (decorators & AS_NORMAL) != 0; ++ assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported"); ++ ++ bool needs_pre_barrier = as_normal; ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ Register tmp3 = RT3; ++ Register rthread = TREG; ++ // flatten object address if needed ++ // We do it regardless of precise because we need the registers ++ if (dst.index() == noreg && dst.disp() == 0) { ++ if (dst.base() != tmp3) { ++ __ move(tmp3, dst.base()); ++ } ++ } else { ++ __ lea(tmp3, dst); ++ } ++ ++ if (needs_pre_barrier) { ++ g1_write_barrier_pre(masm /*masm*/, ++ tmp3 /* obj */, ++ tmp2 /* pre_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ val != noreg /* tosca_live */, ++ false /* expand_call */); ++ } ++ if (val == noreg) { ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ } else { ++ Register new_val = val; ++ if (needs_post_barrier) { ++ // G1 barrier needs uncompressed oop for region cross check. ++ if (UseCompressedOops) { ++ new_val = tmp2; ++ __ move(new_val, val); ++ } ++ } ++ BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg); ++ if (needs_post_barrier) { ++ g1_write_barrier_post(masm /*masm*/, ++ tmp3 /* store_adr */, ++ new_val /* new_val */, ++ rthread /* thread */, ++ tmp1 /* tmp */, ++ tmp2 /* tmp2 */); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..ec5c243c3f +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp +@@ -0,0 +1,71 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class LIR_Assembler; ++class StubAssembler; ++class G1PreBarrierStub; ++class G1PostBarrierStub; ++ ++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler { ++ protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count); ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ void g1_write_barrier_pre(MacroAssembler* masm, ++ Register obj, ++ Register pre_val, ++ Register thread, ++ Register tmp, ++ bool tosca_live, ++ bool expand_call); ++ ++ void g1_write_barrier_post(MacroAssembler* masm, ++ Register store_addr, ++ Register new_val, ++ Register thread, ++ Register tmp, ++ Register tmp2); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ public: ++ void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub); ++ void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub); ++ ++ void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm); ++ void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm); ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++}; ++ ++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..071debdc3a +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp +@@ -0,0 +1,194 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "runtime/jniHandles.hpp" ++#include "runtime/thread.hpp" ++ ++#define __ masm-> ++ ++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (UseCompressedOops) { ++ __ lwu(dst, src); ++ if (is_not_null) { ++ __ decode_heap_oop_not_null(dst); ++ } else { ++ __ decode_heap_oop(dst); ++ } ++ } else ++ { ++ __ ld_ptr(dst, src); ++ } ++ } else { ++ assert(in_native, "why else?"); ++ __ ld_ptr(dst, src); ++ } ++ break; ++ } ++ case T_BOOLEAN: __ lbu (dst, src); break; ++ case T_BYTE: __ lb (dst, src); break; ++ case T_CHAR: __ lhu (dst, src); break; ++ case T_SHORT: __ lh (dst, src); break; ++ case T_INT: __ lw (dst, src); break; ++ case T_LONG: __ ld (dst, src); break; ++ case T_ADDRESS: __ ld_ptr(dst, src); break; ++ case T_FLOAT: ++ assert(dst == noreg, "only to ftos"); ++ __ lwc1(FSF, src); ++ break; ++ case T_DOUBLE: ++ assert(dst == noreg, "only to dtos"); ++ __ ldc1(FSF, src); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ bool in_native = (decorators & IN_NATIVE) != 0; ++ bool is_not_null = (decorators & IS_NOT_NULL) != 0; ++ ++ switch (type) { ++ case T_OBJECT: ++ case T_ARRAY: { ++ if (in_heap) { ++ if (val == noreg) { ++ assert(!is_not_null, "inconsistent access"); ++ if (UseCompressedOops) { ++ __ sw(R0, dst); ++ } else { ++ __ sd(R0, dst); ++ } ++ } else { ++ if (UseCompressedOops) { ++ assert(!dst.uses(val), "not enough registers"); ++ if (is_not_null) { ++ __ encode_heap_oop_not_null(val); ++ } else { ++ __ encode_heap_oop(val); ++ } ++ __ sw(val, dst); ++ } else ++ { ++ __ st_ptr(val, dst); ++ } ++ } ++ } else { ++ assert(in_native, "why else?"); ++ assert(val != noreg, "not supported"); ++ __ st_ptr(val, dst); ++ } ++ break; ++ } ++ case T_BOOLEAN: ++ __ andi(val, val, 0x1); // boolean is true if LSB is 1 ++ __ sb(val, dst); ++ break; ++ case T_BYTE: ++ __ sb(val, dst); ++ break; ++ case T_SHORT: ++ __ sh(val, dst); ++ break; ++ case T_CHAR: ++ __ sh(val, dst); ++ break; ++ case T_INT: ++ __ sw(val, dst); ++ break; ++ case T_LONG: ++ __ sd(val, dst); ++ break; ++ case T_FLOAT: ++ assert(val == noreg, "only tos"); ++ __ swc1(FSF, dst); ++ break; ++ case T_DOUBLE: ++ assert(val == noreg, "only tos"); ++ __ sdc1(FSF, dst); ++ break; ++ case T_ADDRESS: ++ __ st_ptr(val, dst); ++ break; ++ default: Unimplemented(); ++ } ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath) { ++ __ clear_jweak_tag(obj); ++ __ ld_ptr(obj, Address(obj, 0)); ++} ++ ++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Register t2, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case) { ++ Unimplemented(); ++} ++ ++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ Unimplemented(); ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..b97ecbcca5 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp +@@ -0,0 +1,83 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "memory/allocation.hpp" ++#include "oops/access.hpp" ++ ++class InterpreterMacroAssembler; ++ ++class BarrierSetAssembler: public CHeapObj { ++private: ++ void incr_allocated_bytes(MacroAssembler* masm, Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1); ++ ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG) {} ++ ++ virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Register dst, Address src, Register tmp1, Register tmp_thread); ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++ ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Register obj2); ++ virtual void obj_equals(MacroAssembler* masm, ++ Register obj1, Address obj2); ++ ++ virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) { ++ // Default implementation does not need to do anything. ++ } ++ ++ // Support for jniFastGetField to try resolving a jobject/jweak in native ++ virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env, ++ Register obj, Register tmp, Label& slowpath); ++ ++ virtual void tlab_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, Register t2, ++ Label& slow_case); ++ virtual void eden_allocate(MacroAssembler* masm, ++ Register thread, Register obj, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1, ++ Label& slow_case); ++ ++ virtual void barrier_stubs_init() {} ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..cb1d53db0a +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp +@@ -0,0 +1,149 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/cardTable.hpp" ++#include "gc/shared/cardTableBarrierSet.hpp" ++#include "gc/shared/cardTableBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++ ++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) { ++ BarrierSet *bs = BarrierSet::barrier_set(); ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ intptr_t disp = (intptr_t) ct->byte_map_base(); ++ ++ Label L_loop, L_done; ++ const Register end = count; ++ assert_different_registers(addr, end); ++ ++ __ beq(count, R0, L_done); // zero count - nothing to do ++ __ delayed()->nop(); ++ ++ if (UseConcMarkSweepGC) __ sync(); ++ ++ __ set64(tmp, disp); ++ ++ __ lea(end, Address(addr, count, TIMES_OOP, 0)); // end == addr+count*oop_size ++ __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive ++ __ shr(addr, CardTable::card_shift); ++ __ shr(end, CardTable::card_shift); ++ __ dsubu(end, end, addr); // end --> cards count ++ ++ __ daddu(addr, addr, tmp); ++ ++ __ BIND(L_loop); ++ if (UseLEXT1) { ++ __ gssbx(R0, addr, count, 0); ++ } else { ++ __ daddu(AT, addr, count); ++ __ sb(R0, AT, 0); ++ } ++ __ daddiu(count, count, -1); ++ __ bgez(count, L_loop); ++ __ delayed()->nop(); ++ ++ __ BIND(L_done); ++} ++ ++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) { ++ // Does a store check for the oop in register obj. The content of ++ // register obj is destroyed afterwards. ++ BarrierSet* bs = BarrierSet::barrier_set(); ++ ++ CardTableBarrierSet* ctbs = barrier_set_cast(bs); ++ CardTable* ct = ctbs->card_table(); ++ assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code"); ++ ++ __ shr(obj, CardTable::card_shift); ++ ++ Address card_addr; ++ ++ intptr_t byte_map_base = (intptr_t)ct->byte_map_base(); ++ Register tmp = T9; ++ assert_different_registers(tmp, obj); ++ __ li(tmp, byte_map_base); ++ __ addu(tmp, tmp, obj); ++ ++ assert(CardTable::dirty_card_val() == 0, "must be"); ++ ++ jbyte dirty = CardTable::dirty_card_val(); ++ if (UseCondCardMark) { ++ Untested("Untested"); ++ __ warn("store_check Untested"); ++ Label L_already_dirty; ++ __ membar(Assembler::StoreLoad); ++ __ lb(AT, tmp, 0); ++ __ addiu(AT, AT, -1 * dirty); ++ __ beq(AT, R0, L_already_dirty); ++ __ delayed()->nop(); ++ __ sb(R0, tmp, 0); ++ __ bind(L_already_dirty); ++ } else { ++ if (ct->scanned_concurrently()) { ++ __ membar(Assembler::StoreLoad); ++ } ++ __ sb(R0, tmp, 0); ++ } ++} ++ ++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ bool in_heap = (decorators & IN_HEAP) != 0; ++ ++ bool is_array = (decorators & IS_ARRAY) != 0; ++ bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0; ++ bool precise = is_array || on_anonymous; ++ ++ bool needs_post_barrier = val != noreg && in_heap; ++ ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg); ++ if (needs_post_barrier) { ++ // flatten object address if needed ++ if (!precise || (dst.index() == noreg && dst.disp() == 0)) { ++ store_check(masm, dst.base(), dst); ++ } else { ++ __ lea(tmp1, dst); ++ store_check(masm, tmp1, dst); ++ } ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..49c2a0ea80 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp +@@ -0,0 +1,42 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler { ++protected: ++ void store_check(MacroAssembler* masm, Register obj, Address dst); ++ ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp); ++ ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp +new file mode 100644 +index 0000000000..765259e626 +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp +@@ -0,0 +1,53 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/modRefBarrierSetAssembler.hpp" ++ ++#define __ masm-> ++ ++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_pre_barrier(masm, decorators, dst, count); ++ } ++} ++ ++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch) { ++ if (is_oop) { ++ gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch); ++ } ++} ++ ++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) { ++ if (type == T_OBJECT || type == T_ARRAY) { ++ oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } else { ++ BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2); ++ } ++} +diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp +new file mode 100644 +index 0000000000..5320a4c0ad +--- /dev/null ++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP ++ ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++ ++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other ++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected ++// accesses, which are overridden in the concrete BarrierSetAssembler. ++ ++class ModRefBarrierSetAssembler: public BarrierSetAssembler { ++protected: ++ virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count) {} ++ virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, ++ Register addr, Register count, Register tmp) {} ++ virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2) = 0; ++public: ++ virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop, ++ Register dst, Register count, Register scratch = NOREG); ++ ++ virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type, ++ Address dst, Register val, Register tmp1, Register tmp2); ++}; ++ ++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp +new file mode 100644 +index 0000000000..abf8141e8b +--- /dev/null ++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp +@@ -0,0 +1,45 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP ++// Size of MIPS Instructions ++const int BytesPerInstWord = 4; ++ ++const int StackAlignmentInBytes = (2*wordSize); ++ ++// Indicates whether the C calling conventions require that ++// 32-bit integer argument values are properly extended to 64 bits. ++// If set, SharedRuntime::c_calling_convention() must adapt ++// signatures accordingly. ++const bool CCallingConventionRequiresIntsAsLongs = false; ++ ++#define SUPPORTS_NATIVE_CX8 ++ ++#define SUPPORT_RESERVED_STACK_AREA ++ ++#define THREAD_LOCAL_POLL ++ ++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp +new file mode 100644 +index 0000000000..3bcad005d1 +--- /dev/null ++++ b/src/hotspot/cpu/mips/globals_mips.hpp +@@ -0,0 +1,137 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP ++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP ++ ++#include "utilities/globalDefinitions.hpp" ++#include "utilities/macros.hpp" ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, ShareVtableStubs, true); ++define_pd_global(bool, NeedsDeoptSuspend, false); // only register window machines need this ++ ++define_pd_global(bool, ImplicitNullChecks, true); // Generate code for implicit null checks ++define_pd_global(bool, TrapBasedNullChecks, false); // Not needed on x86. ++define_pd_global(bool, UncommonNullCast, true); // Uncommon-trap NULLs passed to check cast ++ ++define_pd_global(uintx, CodeCacheSegmentSize, 64); ++define_pd_global(intx, CodeEntryAlignment, 16); ++define_pd_global(intx, OptoLoopAlignment, 16); ++define_pd_global(intx, InlineFrequencyCount, 100); ++// MIPS generates 3x instructions than X86 ++define_pd_global(intx, InlineSmallCode, 4000); ++ ++#define DEFAULT_STACK_YELLOW_PAGES (2) ++#define DEFAULT_STACK_RED_PAGES (1) ++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4)) ++#define DEFAULT_STACK_RESERVED_PAGES (1) ++define_pd_global(uintx, TLABSize, 0); ++define_pd_global(uintx, NewSize, 1024 * K); ++define_pd_global(intx, PreInflateSpin, 10); ++ ++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1); ++define_pd_global(intx, PrefetchScanIntervalInBytes, -1); ++define_pd_global(intx, PrefetchFieldsAhead, -1); ++ ++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES ++#define MIN_STACK_RED_PAGES DEFAULT_STACK_RED_PAGES ++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES ++#define MIN_STACK_RESERVED_PAGES (0) ++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES); ++ ++define_pd_global(intx, StackYellowPages, 2); ++define_pd_global(intx, StackRedPages, 1); ++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES); ++ ++define_pd_global(bool, RewriteBytecodes, true); ++define_pd_global(bool, RewriteFrequentPairs, true); ++define_pd_global(bool, UseMembar, true); ++// GC Ergo Flags ++define_pd_global(intx, CMSYoungGenPerWorker, 64*M); // default max size of CMS young gen, per GC worker thread ++ ++define_pd_global(uintx, TypeProfileLevel, 111); ++ ++define_pd_global(bool, CompactStrings, true); ++ ++define_pd_global(bool, PreserveFramePointer, false); ++ ++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong); ++ ++define_pd_global(bool, ThreadLocalHandshakes, true); ++// Only c2 cares about this at the moment ++define_pd_global(intx, AllocatePrefetchStyle, 2); ++define_pd_global(intx, AllocatePrefetchDistance, -1); ++ ++#define ARCH_FLAGS(develop, \ ++ product, \ ++ diagnostic, \ ++ experimental, \ ++ notproduct, \ ++ range, \ ++ constraint, \ ++ writeable) \ ++ \ ++ product(bool, UseLEXT1, false, \ ++ "Use LoongISA general EXTensions 1") \ ++ \ ++ product(bool, UseLEXT2, false, \ ++ "Use LoongISA general EXTensions 2") \ ++ \ ++ product(bool, UseLEXT3, false, \ ++ "Use LoongISA general EXTensions 3") \ ++ \ ++ product(bool, UseCodeCacheAllocOpt, true, \ ++ "Allocate code cache within 32-bit memory address space") \ ++ \ ++ product(intx, UseSyncLevel, 10000, \ ++ "The sync level on Loongson CPUs" \ ++ "UseSyncLevel == 10000, 111, for all Loongson CPUs, " \ ++ "UseSyncLevel == 4000, 101, maybe for GS464V" \ ++ "UseSyncLevel == 3000, 001, maybe for GS464V" \ ++ "UseSyncLevel == 2000, 011, maybe for GS464E/GS264" \ ++ "UseSyncLevel == 1000, 110, maybe for GS464") \ ++ \ ++ develop(bool, UseBoundCheckInstruction, false, \ ++ "Use bound check instruction") \ ++ \ ++ product(intx, SetFSFOFN, 999, \ ++ "Set the FS/FO/FN bits in FCSR" \ ++ "999 means FS/FO/FN will not be changed" \ ++ "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on") \ ++ \ ++ /* assembler */ \ ++ product(bool, UseCountLeadingZerosInstructionMIPS64, true, \ ++ "Use count leading zeros instruction") \ ++ \ ++ product(bool, UseCountTrailingZerosInstructionMIPS64, false, \ ++ "Use count trailing zeros instruction") \ ++ \ ++ product(bool, UseActiveCoresMP, false, \ ++ "Eliminate barriers for single active cpu") ++ ++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp +new file mode 100644 +index 0000000000..6586c63965 +--- /dev/null ++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp +@@ -0,0 +1,88 @@ ++/* ++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/icBuffer.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/bytecodes.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int InlineCacheBuffer::ic_stub_code_size() { ++ return NativeMovConstReg::instruction_size + ++ NativeGeneralJump::instruction_size + ++ 1; ++ // so that code_end can be set in CodeBuffer ++ // 64bit 15 = 6 + 8 bytes + 1 byte ++ // 32bit 7 = 2 + 4 bytes + 1 byte ++} ++ ++ ++// we use T1 as cached oop(klass) now. this is the target of virtual call, ++// when reach here, the receiver in T0 ++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters ++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) { ++ ResourceMark rm; ++ CodeBuffer code(code_begin, ic_stub_code_size()); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ // note: even though the code contains an embedded oop, we do not need reloc info ++ // because ++ // (1) the oop is old (i.e., doesn't matter for scavenges) ++ // (2) these ICStubs are removed *before* a GC happens, so the roots disappear ++// assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop"); ++#define __ masm-> ++ __ patchable_set48(T1, (long)cached_value); ++ ++ __ patchable_jump(entry_point); ++ __ flush(); ++#undef __ ++} ++ ++ ++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) { ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); // creation also verifies the object ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ return jump->jump_destination(); ++} ++ ++ ++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) { ++ // creation also verifies the object ++ NativeMovConstReg* move = nativeMovConstReg_at(code_begin); ++ // Verifies the jump ++ NativeGeneralJump* jump = nativeGeneralJump_at(move->next_instruction_address()); ++ void* o= (void*)move->data(); ++ return o; ++} +diff --git a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp +new file mode 100644 +index 0000000000..e84e37358b +--- /dev/null ++++ b/src/hotspot/cpu/mips/icache_mips.cpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++ ++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) ++{ ++#define __ _masm-> ++ StubCodeMark mark(this, "ICache", "flush_icache_stub"); ++ address start = __ pc(); ++ ++ __ jr_hb(RA); ++ __ delayed()->ori(V0, A2, 0); ++ ++ *flush_icache_stub = (ICache::flush_icache_stub_t)start; ++#undef __ ++} +diff --git a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp +new file mode 100644 +index 0000000000..f90dee6eef +--- /dev/null ++++ b/src/hotspot/cpu/mips/icache_mips.hpp +@@ -0,0 +1,41 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP ++#define CPU_MIPS_VM_ICACHE_MIPS_HPP ++ ++// Interface for updating the instruction cache. Whenever the VM modifies ++// code, part of the processor instruction cache potentially has to be flushed. ++ ++class ICache : public AbstractICache { ++ public: ++ enum { ++ stub_size = 2 * BytesPerInstWord, // Size of the icache flush stub in bytes ++ line_size = 32, // flush instruction affects a dword ++ log2_line_size = 5 // log2(line_size) ++ }; ++}; ++ ++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp +new file mode 100644 +index 0000000000..e526e39d53 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp +@@ -0,0 +1,276 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "interpreter/invocationCounter.hpp" ++#include "runtime/frame.hpp" ++ ++// This file specializes the assember with interpreter-specific macros ++ ++ ++class InterpreterMacroAssembler: public MacroAssembler { ++#ifndef CC_INTERP ++ private: ++ ++ Register _locals_register; // register that contains the pointer to the locals ++ Register _bcp_register; // register that contains the bcp ++ ++ protected: ++ // Interpreter specific version of call_VM_base ++ virtual void call_VM_leaf_base(address entry_point, ++ int number_of_arguments); ++ ++ virtual void call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions); ++ ++ // base routine for all dispatches ++ void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false); ++#endif // CC_INTERP ++ ++ public: ++ void jump_to_entry(address entry); ++ // narrow int return value ++ void narrow(Register result); ++ ++ InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {} ++ ++ void get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ void get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset); ++ ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ void load_earlyret_value(TosState state); ++ ++#ifdef CC_INTERP ++ void save_bcp() { /* not needed in c++ interpreter and harmless */ } ++ void restore_bcp() { /* not needed in c++ interpreter and harmless */ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg); ++ ++#else ++ ++ // Interpreter-specific registers ++ void save_bcp() { ++ sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_bcp() { ++ ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize); ++ } ++ ++ void restore_locals() { ++ ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize); ++ } ++ ++ // Helpers for runtime call arguments/results ++ void get_method(Register reg) { ++ ld(reg, FP, frame::interpreter_frame_method_offset * wordSize); ++ } ++ ++ void get_const(Register reg){ ++ get_method(reg); ++ ld(reg, reg, in_bytes(Method::const_offset())); ++ } ++ ++ void get_constant_pool(Register reg) { ++ get_const(reg); ++ ld(reg, reg, in_bytes(ConstMethod::constants_offset())); ++ } ++ ++ void get_constant_pool_cache(Register reg) { ++ get_constant_pool(reg); ++ ld(reg, reg, ConstantPool::cache_offset_in_bytes()); ++ } ++ ++ void get_cpool_and_tags(Register cpool, Register tags) { ++ get_constant_pool(cpool); ++ ld(tags, cpool, ConstantPool::tags_offset_in_bytes()); ++ } ++ ++ void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset); ++ void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2)); ++ void get_method_counters(Register method, Register mcs, Label& skip); ++ ++ // load cpool->resolved_references(index); ++ void load_resolved_reference_at_index(Register result, Register index, Register tmp); ++ ++ // load cpool->resolved_klass_at(index) ++ void load_resolved_klass_at_index(Register cpool, // the constant pool (corrupted on return) ++ Register index, // the constant pool index (corrupted on return) ++ Register klass); // contains the Klass on return ++ ++ void pop_ptr( Register r = FSR); ++ void pop_i( Register r = FSR); ++ void pop_l( Register r = FSR); ++ void pop_f(FloatRegister r = FSF); ++ void pop_d(FloatRegister r = FSF); ++ ++ void push_ptr( Register r = FSR); ++ void push_i( Register r = FSR); ++ void push_l( Register r = FSR); ++ void push_f(FloatRegister r = FSF); ++ void push_d(FloatRegister r = FSF); ++ ++ void pop(Register r ) { ((MacroAssembler*)this)->pop(r); } ++ ++ void push(Register r ) { ((MacroAssembler*)this)->push(r); } ++ ++ void pop(TosState state); // transition vtos -> state ++ void push(TosState state); // transition state -> vtos ++ ++ void empty_expression_stack() { ++ ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // NULL last_sp until next java call ++ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ } ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void load_ptr(int n, Register val); ++ void store_ptr(int n, Register val); ++ ++ // Generate a subtype check: branch to ok_is_subtype if sub_klass is ++ // a subtype of super_klass. ++ //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype ); ++ void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype ); ++ ++ // Dispatching ++ void dispatch_prolog(TosState state, int step = 0); ++ void dispatch_epilog(TosState state, int step = 0); ++ void dispatch_only(TosState state, bool generate_poll = false); ++ void dispatch_only_normal(TosState state); ++ void dispatch_only_noverify(TosState state); ++ void dispatch_next(TosState state, int step = 0, bool generate_poll = false); ++ void dispatch_via (TosState state, address* table); ++ ++ // jump to an invoked target ++ void prepare_to_jump_from_interpreted(); ++ void jump_from_interpreted(Register method, Register temp); ++ ++ ++ // Returning from interpreted functions ++ // ++ // Removes the current activation (incl. unlocking of monitors) ++ // and sets up the return address. This code is also used for ++ // exception unwindwing. In that case, we do not want to throw ++ // IllegalMonitorStateExceptions, since that might get us into an ++ // infinite rethrow exception loop. ++ // Additionally this code is used for popFrame and earlyReturn. ++ // In popFrame case we want to skip throwing an exception, ++ // installing an exception, and notifying jvmdi. ++ // In earlyReturn case we only want to skip throwing an exception ++ // and installing an exception. ++ void remove_activation(TosState state, Register ret_addr, ++ bool throw_monitor_exception = true, ++ bool install_monitor_exception = true, ++ bool notify_jvmdi = true); ++#endif // CC_INTERP ++ ++ // Object locking ++ void lock_object (Register lock_reg); ++ void unlock_object(Register lock_reg); ++ ++#ifndef CC_INTERP ++ ++ // Interpreter profiling operations ++ void set_method_data_pointer_for_bcp(); ++ void test_method_data_pointer(Register mdp, Label& zero_continue); ++ void verify_method_data_pointer(); ++ ++ void set_mdp_data_at(Register mdp_in, int constant, Register value); ++ void increment_mdp_data_at(Address data, bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, int constant, ++ bool decrement = false); ++ void increment_mdp_data_at(Register mdp_in, Register reg, int constant, ++ bool decrement = false); ++ void increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where); ++ void set_mdp_flag_at(Register mdp_in, int flag_constant); ++ void test_mdp_data_at(Register mdp_in, int offset, Register value, ++ Register test_value_out, ++ Label& not_equal_continue); ++ ++ void record_klass_in_profile(Register receiver, Register mdp, ++ Register reg2, bool is_virtual_call); ++ void record_klass_in_profile_helper(Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call); ++ ++ void update_mdp_by_offset(Register mdp_in, int offset_of_offset); ++ void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp); ++ void update_mdp_by_constant(Register mdp_in, int constant); ++ void update_mdp_for_ret(Register return_bci); ++ ++ void profile_taken_branch(Register mdp, Register bumped_count); ++ void profile_not_taken_branch(Register mdp); ++ void profile_call(Register mdp); ++ void profile_final_call(Register mdp); ++ void profile_virtual_call(Register receiver, Register mdp, ++ Register scratch2, ++ bool receiver_can_be_null = false); ++ void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN; ++ void profile_ret(Register return_bci, Register mdp); ++ void profile_null_seen(Register mdp); ++ void profile_typecheck(Register mdp, Register klass, Register scratch); ++ void profile_typecheck_failed(Register mdp); ++ void profile_switch_default(Register mdp); ++ void profile_switch_case(Register index_in_scratch, Register mdp, ++ Register scratch2); ++ ++ // Debugging ++ // only if +VerifyOops && state == atos ++ void verify_oop(Register reg, TosState state = atos); ++ // only if +VerifyFPU && (state == ftos || state == dtos) ++ void verify_FPU(int stack_depth, TosState state = ftos); ++ ++ void profile_obj_type(Register obj, const Address& mdo_addr); ++ void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual); ++ void profile_return_type(Register mdp, Register ret, Register tmp); ++ void profile_parameters_type(Register mdp, Register tmp1, Register tmp2); ++#endif // !CC_INTERP ++ ++ typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode; ++ ++ // support for jvmti/dtrace ++ void notify_method_entry(); ++ void notify_method_exit(TosState state, NotifyMethodExitMode mode); ++}; ++ ++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp +new file mode 100644 +index 0000000000..eb35bb0633 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp +@@ -0,0 +1,2126 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interp_masm_mips.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/markOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/basicLock.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/thread.inline.hpp" ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of InterpreterMacroAssembler ++ ++#ifdef CC_INTERP ++void InterpreterMacroAssembler::get_method(Register reg) { ++} ++#endif // CC_INTERP ++ ++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ // The runtime address of BCP may be unaligned. ++ // Refer to the SPARC implementation. ++ lbu(reg, BCP, offset+1); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++} ++ ++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) { ++ assert(reg != tmp, "need separate temp register"); ++ if (offset & 3) { // Offset unaligned? ++ lbu(reg, BCP, offset+3); ++ lbu(tmp, BCP, offset+2); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset+1); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ lbu(tmp, BCP, offset); ++ dsll(reg, reg, 8); ++ daddu(reg, tmp, reg); ++ } else { ++ lwu(reg, BCP, offset); ++ } ++} ++ ++void InterpreterMacroAssembler::jump_to_entry(address entry) { ++ assert(entry, "Entry must have been generated by now"); ++ jmp(entry); ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point, ++ int number_of_arguments) { ++ // interpreter specific ++ // ++ // Note: No need to save/restore bcp & locals (r13 & r14) pointer ++ // since these are callee saved registers and no blocking/ ++ // GC can happen in leaf calls. ++ // Further Note: DO NOT save/restore bcp/locals. If a caller has ++ // already saved them so that it can use BCP/LVP as temporaries ++ // then a save/restore here will DESTROY the copy the caller ++ // saved! There used to be a save_bcp() that only happened in ++ // the ASSERT path (no restore_bcp). Which caused bizarre failures ++ // when jvm built with ASSERTs. ++#ifdef ASSERT ++ save_bcp(); ++ { ++ Label L; ++ ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT,R0,L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif ++ // super call ++ MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments); ++ // interpreter specific ++ // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals ++ // but since they may not have been saved (and we don't want to ++ // save them here (see note above) the assert is invalid. ++} ++ ++void InterpreterMacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ // interpreter specific ++ // ++ // Note: Could avoid restoring locals ptr (callee saved) - however doesn't ++ // really make a difference for these runtime calls, since they are ++ // slow anyway. Btw., bcp must be saved/restored since it may change ++ // due to GC. ++ assert(java_thread == noreg , "not expecting a precomputed java thread"); ++ save_bcp(); ++#ifdef ASSERT ++ { ++ Label L; ++ ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL"); ++ bind(L); ++ } ++#endif /* ASSERT */ ++ // super call ++ MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp, ++ entry_point, number_of_arguments, ++ check_exceptions); ++ // interpreter specific ++ restore_bcp(); ++ restore_locals(); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) { ++ if (JvmtiExport::can_pop_frame()) { ++ Label L; ++ // Initiate popframe handling only if it is not already being ++ // processed. If the flag has the popframe_processing bit set, it ++ // means that this code is called *during* popframe handling - we ++ // don't want to reenter. ++ // This method is only called just after the call into the vm in ++ // call_VM_base, so the arg registers are available. ++ // Not clear if any other register is available, so load AT twice ++ assert(AT != java_thread, "check"); ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_pending_bit); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset())); ++ andi(AT, AT, JavaThread::popframe_processing_bit); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry)); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::load_earlyret_value(TosState state) { ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset())); ++ const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ //V0, oop_addr,V1,val_addr ++ switch (state) { ++ case atos: ++ ld_ptr(V0, oop_addr); ++ st_ptr(R0, oop_addr); ++ verify_oop(V0, state); ++ break; ++ case ltos: ++ ld_ptr(V0, val_addr); // fall through ++ break; ++ case btos: // fall through ++ case ztos: // fall through ++ case ctos: // fall through ++ case stos: // fall through ++ case itos: ++ lw(V0, val_addr); ++ break; ++ case ftos: ++ lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case dtos: ++ ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++ break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++ // Clean up tos value in the thread object ++ move(AT, (int)ilgl); ++ sw(AT, tos_addr); ++ sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset())); ++} ++ ++ ++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) { ++ if (JvmtiExport::can_force_early_return()) { ++ Label L; ++ Register tmp = T9; ++ ++ assert(java_thread != AT, "check"); ++ assert(java_thread != tmp, "check"); ++ ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ ++ // Initiate earlyret handling only if it is not already being processed. ++ // If the flag has the earlyret_processing bit set, it means that this code ++ // is called *during* earlyret handling - we don't want to reenter. ++ lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ move(tmp, JvmtiThreadState::earlyret_pending); ++ bne(tmp, AT, L); ++ delayed()->nop(); ++ ++ // Call Interpreter::remove_activation_early_entry() to get the address of the ++ // same-named entrypoint in the generated interpreter code. ++ ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset())); ++ move(A0, AT); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0); ++ jr(V0); ++ delayed()->nop(); ++ bind(L); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg, ++ int bcp_offset) { ++ assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode"); ++ lbu(AT, BCP, bcp_offset); ++ lbu(reg, BCP, bcp_offset + 1); ++ ins(reg, AT, 8, 8); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ if (index_size == sizeof(u2)) { ++ get_2_byte_integer_at_bcp(index, AT, bcp_offset); ++ } else if (index_size == sizeof(u4)) { ++ get_4_byte_integer_at_bcp(index, AT, bcp_offset); ++ // Check if the secondary index definition is still ~x, otherwise ++ // we have to change the following assembler code to calculate the ++ // plain index. ++ assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line"); ++ nor(index, index, R0); ++ sll(index, index, 0); ++ } else if (index_size == sizeof(u1)) { ++ lbu(index, BCP, bcp_offset); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache, ++ Register index, ++ int bcp_offset, ++ size_t index_size) { ++ assert_different_registers(cache, index); ++ get_cache_index_at_bcp(index, bcp_offset, index_size); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line"); ++ shl(index, 2); ++} ++ ++ ++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache, ++ Register index, ++ Register bytecode, ++ int byte_no, ++ int bcp_offset, ++ size_t index_size) { ++ get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size); ++ // We use a 32-bit load here since the layout of 64-bit words on ++ // little-endian machines allow us that. ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, cache, AT); ++ lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset())); ++ if(os::is_MP()) { ++ sync(); // load acquire ++ } ++ ++ const int shift_count = (1 + byte_no) * BitsPerByte; ++ assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) || ++ (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift), ++ "correct shift count"); ++ dsrl(bytecode, bytecode, shift_count); ++ assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask"); ++ move(AT, ConstantPoolCacheEntry::bytecode_1_mask); ++ andr(bytecode, bytecode, AT); ++} ++ ++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache, ++ Register tmp, ++ int bcp_offset, ++ size_t index_size) { ++ assert(bcp_offset > 0, "bcp is still pointing to start of bytecode"); ++ assert(cache != tmp, "must use different register"); ++ get_cache_index_at_bcp(tmp, bcp_offset, index_size); ++ assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below"); ++ // convert from field index to ConstantPoolCacheEntry index ++ // and from word offset to byte offset ++ assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line"); ++ shl(tmp, 2 + LogBytesPerWord); ++ ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize); ++ // skip past the header ++ daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset())); ++ daddu(cache, cache, tmp); ++} ++ ++void InterpreterMacroAssembler::get_method_counters(Register method, ++ Register mcs, Label& skip) { ++ Label has_counters; ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ bne(mcs, R0, has_counters); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::build_method_counters), method); ++ ld(mcs, method, in_bytes(Method::method_counters_offset())); ++ beq(mcs, R0, skip); // No MethodCounters allocated, OutOfMemory ++ delayed()->nop(); ++ bind(has_counters); ++} ++ ++// Load object from cpool->resolved_references(index) ++void InterpreterMacroAssembler::load_resolved_reference_at_index( ++ Register result, Register index, Register tmp) { ++ assert_different_registers(result, index); ++ // convert from field index to resolved_references() index and from ++ // word index to byte offset. Since this is a java object, it can be compressed ++ shl(index, LogBytesPerHeapOop); ++ ++ get_constant_pool(result); ++ // load pointer for resolved_references[] objArray ++ ld(result, result, ConstantPool::cache_offset_in_bytes()); ++ ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes()); ++ resolve_oop_handle(result, tmp); ++ // Add in the index ++ daddu(result, result, index); ++ load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp); ++} ++ ++// load cpool->resolved_klass_at(index) ++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool, ++ Register index, Register klass) { ++ dsll(AT, index, Address::times_ptr); ++ if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) { ++ gslhx(index, cpool, AT, sizeof(ConstantPool)); ++ } else { ++ daddu(AT, cpool, AT); ++ lh(index, AT, sizeof(ConstantPool)); ++ } ++ Register resolved_klasses = cpool; ++ ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes())); ++ dsll(AT, index, Address::times_ptr); ++ daddu(AT, resolved_klasses, AT); ++ ld(klass, AT, Array::base_offset_in_bytes()); ++} ++ ++// Resets LVP to locals. Register sub_klass cannot be any of the above. ++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) { ++ assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" ); ++ assert( Rsub_klass != T1, "T1 holds 2ndary super array length" ); ++ assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" ); ++ // Profile the not-null value's klass. ++ // Here T9 and T1 are used as temporary registers. ++ profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1 ++ ++ // Do the check. ++ check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1 ++ ++ // Profile the failure of the check. ++ profile_typecheck_failed(T9); // blows T9 ++} ++ ++ ++ ++// Java Expression Stack ++ ++void InterpreterMacroAssembler::pop_ptr(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_i(Register r) { ++ lw(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_l(Register r) { ++ ld(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_f(FloatRegister r) { ++ lwc1(r, SP, 0); ++ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop_d(FloatRegister r) { ++ ldc1(r, SP, 0); ++ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_ptr(Register r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_i(Register r) { ++ // For compatibility reason, don't change to sw. ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ sd(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_l(Register r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sd(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::push_f(FloatRegister r) { ++ daddiu(SP, SP, - Interpreter::stackElementSize); ++ swc1(r, SP, 0); ++} ++ ++void InterpreterMacroAssembler::push_d(FloatRegister r) { ++ daddiu(SP, SP, -2 * Interpreter::stackElementSize); ++ sdc1(r, SP, 0); ++ sd(R0, SP, Interpreter::stackElementSize); ++} ++ ++void InterpreterMacroAssembler::pop(TosState state) { ++ switch (state) { ++ case atos: pop_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: pop_i(); break; ++ case ltos: pop_l(); break; ++ case ftos: pop_f(); break; ++ case dtos: pop_d(); break; ++ case vtos: /* nothing to do */ break; ++ default: ShouldNotReachHere(); ++ } ++ verify_oop(FSR, state); ++} ++ ++//FSR=V0,SSR=V1 ++void InterpreterMacroAssembler::push(TosState state) { ++ verify_oop(FSR, state); ++ switch (state) { ++ case atos: push_ptr(); break; ++ case btos: ++ case ztos: ++ case ctos: ++ case stos: ++ case itos: push_i(); break; ++ case ltos: push_l(); break; ++ case ftos: push_f(); break; ++ case dtos: push_d(); break; ++ case vtos: /* nothing to do */ break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++ ++ ++void InterpreterMacroAssembler::load_ptr(int n, Register val) { ++ ld(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++void InterpreterMacroAssembler::store_ptr(int n, Register val) { ++ sd(val, SP, Interpreter::expr_offset_in_bytes(n)); ++} ++ ++// Jump to from_interpreted entry of a call unless single stepping is possible ++// in this thread in which case we must call the i2i entry ++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) { ++ // record last_sp ++ move(Rsender, SP); ++ sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++#ifndef OPT_THREAD ++ Register thread = temp; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(AT, R0, run_compiled_code); ++ delayed()->nop(); ++ ld(AT, method, in_bytes(Method::interpreter_entry_offset())); ++ jr(AT); ++ delayed()->nop(); ++ bind(run_compiled_code); ++ } ++ ++ ld(AT, method, in_bytes(Method::from_interpreted_offset())); ++ jr(AT); ++ delayed()->nop(); ++} ++ ++ ++// The following two routines provide a hook so that an implementation ++// can schedule the dispatch in two parts. mips64 does not do this. ++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) { ++ // Nothing mips64 specific to be done here ++} ++ ++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) { ++ dispatch_next(state, step); ++} ++ ++// assume the next bytecode in T8. ++void InterpreterMacroAssembler::dispatch_base(TosState state, ++ address* table, ++ bool verifyoop, ++ bool generate_poll) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ ++ if (VerifyActivationFrameSize) { ++ Label L; ++ ++ dsubu(T2, FP, SP); ++ int min_frame_size = (frame::link_offset - ++ frame::interpreter_frame_initial_sp_offset) * wordSize; ++ daddiu(T2, T2, -min_frame_size); ++ bgez(T2, L); ++ delayed()->nop(); ++ stop("broken stack frame"); ++ bind(L); ++ } ++ // FIXME: I do not know which register should pass to verify_oop ++ if (verifyoop) verify_oop(FSR, state); ++ dsll(T2, Rnext, LogBytesPerWord); ++ ++ Label safepoint; ++ address* const safepoint_table = Interpreter::safept_table(state); ++ bool needs_thread_local_poll = generate_poll && ++ SafepointMechanism::uses_thread_local_poll() && table != safepoint_table; ++ ++ if (needs_thread_local_poll) { ++ NOT_PRODUCT(block_comment("Thread-local Safepoint poll")); ++ ld(T3, thread, in_bytes(Thread::polling_page_offset())); ++ andi(T3, T3, SafepointMechanism::poll_bit()); ++ bne(T3, R0, safepoint); ++ delayed()->nop(); ++ } ++ ++ if((long)table >= (long)Interpreter::dispatch_table(btos) && ++ (long)table <= (long)Interpreter::dispatch_table(vtos) ++ ) { ++ int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos); ++ int table_offset = ((int)state - (int)itos) * table_size; ++ ++ // GP points to the starting address of Interpreter::dispatch_table(itos). ++ // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP. ++ if(table_offset != 0) { ++ daddiu(T3, GP, table_offset); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } else { ++ if (UseLEXT1) { ++ gsldx(T3, T2, GP, 0); ++ } else { ++ daddu(T3, T2, GP); ++ ld(T3, T3, 0); ++ } ++ } ++ } else { ++ li(T3, (long)table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ } ++ jr(T3); ++ delayed()->nop(); ++ ++ if (needs_thread_local_poll) { ++ bind(safepoint); ++ li(T3, (long)safepoint_table); ++ if (UseLEXT1) { ++ gsldx(T3, T2, T3, 0); ++ } else { ++ daddu(T3, T2, T3); ++ ld(T3, T3, 0); ++ } ++ jr(T3); ++ delayed()->nop(); ++ } ++} ++ ++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) { ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state)); ++} ++ ++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) { ++ dispatch_base(state, Interpreter::normal_table(state), false); ++} ++ ++ ++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) { ++ // load next bytecode (load before advancing r13 to prevent AGI) ++ lbu(Rnext, BCP, step); ++ increment(BCP, step); ++ dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll); ++} ++ ++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) { ++ // load current bytecode ++ lbu(Rnext, BCP, 0); ++ dispatch_base(state, table); ++} ++ ++// remove activation ++// ++// Unlock the receiver if this is a synchronized method. ++// Unlock any Java monitors from syncronized blocks. ++// Remove the activation from the stack. ++// ++// If there are locked Java monitors ++// If throw_monitor_exception ++// throws IllegalMonitorStateException ++// Else if install_monitor_exception ++// installs IllegalMonitorStateException ++// Else ++// no error processing ++// used registers : T1, T2, T3, T8 ++// T1 : thread, method access flags ++// T2 : monitor entry pointer ++// T3 : method, monitor top ++// T8 : unlock flag ++void InterpreterMacroAssembler::remove_activation( ++ TosState state, ++ Register ret_addr, ++ bool throw_monitor_exception, ++ bool install_monitor_exception, ++ bool notify_jvmdi) { ++ // Note: Registers V0, V1 and F0, F1 may be in use for the result ++ // check if synchronized method ++ Label unlocked, unlock, no_unlock; ++ ++ // get the value of _do_not_unlock_if_synchronized into T8 ++#ifndef OPT_THREAD ++ Register thread = T1; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // reset the flag ++ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ // get method access flags ++ ld(T3, FP, frame::interpreter_frame_method_offset * wordSize); ++ lw(T1, T3, in_bytes(Method::access_flags_offset())); ++ andi(T1, T1, JVM_ACC_SYNCHRONIZED); ++ beq(T1, R0, unlocked); ++ delayed()->nop(); ++ ++ // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set. ++ bne(T8, R0, no_unlock); ++ delayed()->nop(); ++ // unlock monitor ++ push(state); // save result ++ ++ // BasicObjectLock will be first in list, since this is a ++ // synchronized method. However, need to check that the object has ++ // not been unlocked by an explicit monitorexit bytecode. ++ daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize ++ - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, unlock); ++ delayed()->nop(); ++ pop(state); ++ if (throw_monitor_exception) { ++ // Entry already unlocked, need to throw exception ++ // I think mips do not need empty_FPU_stack ++ // remove possible return value from FPU-stack, otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Monitor already unlocked during a stack unroll. If requested, ++ // install an illegal_monitor_state_exception. Continue with ++ // stack unrolling. ++ if (install_monitor_exception) { ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ ++ } ++ ++ b(unlocked); ++ delayed()->nop(); ++ } ++ ++ bind(unlock); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ // Check that for block-structured locking (i.e., that all locked ++ // objects has been unlocked) ++ bind(unlocked); ++ ++ // V0, V1: Might contain return value ++ ++ // Check that all monitors are unlocked ++ { ++ Label loop, exception, entry, restart; ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ const Address monitor_block_top(FP, ++ frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ ++ bind(restart); ++ // points to current entry, starting with top-most entry ++ ld(c_rarg0, monitor_block_top); ++ // points to word before bottom of monitor block ++ daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ b(entry); ++ delayed()->nop(); ++ ++ // Entry already locked, need to throw exception ++ bind(exception); ++ ++ if (throw_monitor_exception) { ++ // Throw exception ++ // remove possible return value from FPU-stack, ++ // otherwise stack could overflow ++ empty_FPU_stack(); ++ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ should_not_reach_here(); ++ } else { ++ // Stack unrolling. Unlock object and install illegal_monitor_exception ++ // Unlock does not block, so don't have to worry about the frame ++ // We don't have to preserve c_rarg0, since we are going to ++ // throw an exception ++ ++ push(state); ++ unlock_object(c_rarg0); ++ pop(state); ++ ++ if (install_monitor_exception) { ++ empty_FPU_stack(); ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::new_illegal_monitor_state_exception)); ++ } ++ ++ b(restart); ++ delayed()->nop(); ++ } ++ ++ bind(loop); ++ ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ bne(T1, R0, exception);// check if current entry is used ++ delayed()->nop(); ++ ++ daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry ++ bind(entry); ++ bne(c_rarg0, T3, loop); // check if bottom reached ++ delayed()->nop(); // if not at bottom then check this entry ++ } ++ ++ bind(no_unlock); ++ ++ // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame) ++ if (notify_jvmdi) { ++ notify_method_exit(state, NotifyJVMTI); // preserve TOSCA ++ } else { ++ notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA ++ } ++ ++ // remove activation ++ ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ if (StackReservedPages > 0) { ++ // testing if reserved zone needs to be re-enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, TSR, AT); ++ blez(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_delayed_StackOverflowError)); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++ } ++ ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize); ++ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); ++ move(SP, TSR); // set sp to sender sp ++} ++ ++#endif // CC_INTERP ++ ++// Lock object ++// ++// Args: ++// c_rarg0: BasicObjectLock to be used for locking ++// ++// Kills: ++// T1 ++// T2 ++void InterpreterMacroAssembler::lock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ } else { ++ Label done, slow_case; ++ const Register tmp_reg = T2; ++ const Register scr_reg = T1; ++ const int obj_offset = BasicObjectLock::obj_offset_in_bytes(); ++ const int lock_offset = BasicObjectLock::lock_offset_in_bytes (); ++ const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Load object pointer into scr_reg ++ ld(scr_reg, lock_reg, obj_offset); ++ ++ if (UseBiasedLocking) { ++ // Note: we use noreg for the temporary register since it's hard ++ // to come up with a free register on all incoming code paths ++ biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case); ++ } ++ ++ // Load (object->mark() | 1) into tmp_reg ++ ld(AT, scr_reg, 0); ++ ori(tmp_reg, AT, 1); ++ ++ // Save (object->mark() | 1) into BasicLock's displaced header ++ sd(tmp_reg, lock_reg, mark_offset); ++ ++ assert(lock_offset == 0, "displached header must be first word in BasicObjectLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label succ, fail; ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail); ++ bind(succ); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ b(done); ++ delayed()->nop(); ++ bind(fail); ++ } else { ++ cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done); ++ } ++ ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) SP <= mark < SP + os::pagesize() ++ // ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in tmp_reg as the result of cmpxchg ++ ++ dsubu(tmp_reg, tmp_reg, SP); ++ move(AT, 7 - os::vm_page_size()); ++ andr(tmp_reg, tmp_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ sd(tmp_reg, lock_reg, mark_offset); ++ if (PrintBiasedLockingStatistics) { ++ bne(tmp_reg, R0, slow_case); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg); ++ } ++ beq(tmp_reg, R0, done); ++ delayed()->nop(); ++ ++ bind(slow_case); ++ // Call the runtime routine for slow case ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg); ++ ++ bind(done); ++ } ++} ++ ++ ++// Unlocks an object. Used in monitorexit bytecode and ++// remove_activation. Throws an IllegalMonitorException if object is ++// not locked by current thread. ++// ++// Args: ++// c_rarg0: BasicObjectLock for lock ++// ++// Kills: ++// T1 ++// T2 ++// T3 ++// Throw an IllegalMonitorException if object is not locked by current thread ++void InterpreterMacroAssembler::unlock_object(Register lock_reg) { ++ assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0"); ++ ++ if (UseHeavyMonitors) { ++ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg); ++ } else { ++ Label done; ++ ++ const Register tmp_reg = T1; ++ const Register scr_reg = T2; ++ const Register hdr_reg = T3; ++ ++ save_bcp(); // Save in case of exception ++ ++ // Convert from BasicObjectLock structure to object and BasicLock structure ++ // Store the BasicLock address into %T2 ++ daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes()); ++ ++ // Load oop into scr_reg(%T1) ++ ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ // free entry ++ sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes()); ++ if (UseBiasedLocking) { ++ biased_locking_exit(scr_reg, hdr_reg, done); ++ } ++ ++ // Load the old header from BasicLock structure ++ ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes()); ++ // zero for recursive case ++ beq(hdr_reg, R0, done); ++ delayed()->nop(); ++ ++ // Atomic swap back the old header ++ cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done); ++ ++ // Call the runtime routine for slow case. ++ sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj ++ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), ++ lock_reg); ++ ++ bind(done); ++ ++ restore_bcp(); ++ } ++} ++ ++#ifndef CC_INTERP ++ ++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp, ++ Label& zero_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++ beq(mdp, R0, zero_continue); ++ delayed()->nop(); ++} ++ ++ ++// Set the method data pointer for the current bcp. ++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Label set_mdp; ++ ++ // V0 and T0 will be used as two temporary registers. ++ push2(V0, T0); ++ ++ get_method(T0); ++ // Test MDO to avoid the call if it is NULL. ++ ld(V0, T0, in_bytes(Method::method_data_offset())); ++ beq(V0, R0, set_mdp); ++ delayed()->nop(); ++ ++ // method: T0 ++ // bcp: BCP --> S0 ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP); ++ // mdi: V0 ++ // mdo is guaranteed to be non-zero here, we checked for it before the call. ++ get_method(T0); ++ ld(T0, T0, in_bytes(Method::method_data_offset())); ++ daddiu(T0, T0, in_bytes(MethodData::data_offset())); ++ daddu(V0, T0, V0); ++ bind(set_mdp); ++ sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ pop2(V0, T0); ++} ++ ++void InterpreterMacroAssembler::verify_method_data_pointer() { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++#ifdef ASSERT ++ Label verify_continue; ++ Register method = V0; ++ Register mdp = V1; ++ Register tmp = A0; ++ push(method); ++ push(mdp); ++ push(tmp); ++ test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue ++ get_method(method); ++ ++ // If the mdp is valid, it will point to a DataLayout header which is ++ // consistent with the bcp. The converse is highly probable also. ++ lhu(tmp, mdp, in_bytes(DataLayout::bci_offset())); ++ ld(AT, method, in_bytes(Method::const_offset())); ++ daddu(tmp, tmp, AT); ++ daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset())); ++ beq(tmp, BCP, verify_continue); ++ delayed()->nop(); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp); ++ bind(verify_continue); ++ pop(tmp); ++ pop(mdp); ++ pop(method); ++#endif // ASSERT ++} ++ ++ ++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in, ++ int constant, ++ Register value) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ Address data(mdp_in, constant); ++ sd(value, data); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ int constant, ++ bool decrement) { ++ // Counter address ++ Address data(mdp_in, constant); ++ ++ increment_mdp_data_at(data, decrement); ++} ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Address data, ++ bool decrement) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ // %%% this does 64bit counters at best it is wasting space ++ // at worst it is a rare bug when counters overflow ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ ld(AT, data); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ sd(AT, data); ++ } else { ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ ld(AT, data); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ sd(AT, data); ++ } ++ pop(tmp); ++} ++ ++ ++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in, ++ Register reg, ++ int constant, ++ bool decrement) { ++ Register tmp = S0; ++ push(tmp); ++ if (decrement) { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Decrement the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ sltu(tmp, R0, AT); ++ dsubu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } else { ++ assert(Assembler::is_simm16(constant), "constant is not a simm16 !"); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ // Increment the register. ++ daddu(tmp, mdp_in, reg); ++ ld(AT, tmp, constant); ++ daddiu(tmp, AT, DataLayout::counter_increment); ++ sltu(tmp, R0, tmp); ++ daddu(AT, AT, tmp); ++ daddu(tmp, mdp_in, reg); ++ sd(AT, tmp, constant); ++ } ++ pop(tmp); ++} ++ ++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in, ++ int flag_byte_constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ int header_offset = in_bytes(DataLayout::header_offset()); ++ int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant); ++ // Set the flag ++ lw(AT, Address(mdp_in, header_offset)); ++ if(Assembler::is_simm16(header_bits)) { ++ ori(AT, AT, header_bits); ++ } else { ++ push(T8); ++ // T8 is used as a temporary register. ++ move(T8, header_bits); ++ orr(AT, AT, T8); ++ pop(T8); ++ } ++ sw(AT, Address(mdp_in, header_offset)); ++} ++ ++ ++ ++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in, ++ int offset, ++ Register value, ++ Register test_value_out, ++ Label& not_equal_continue) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if (test_value_out == noreg) { ++ ld(AT, Address(mdp_in, offset)); ++ bne(AT, value, not_equal_continue); ++ delayed()->nop(); ++ } else { ++ // Put the test value into a register, so caller can use it: ++ ld(test_value_out, Address(mdp_in, offset)); ++ bne(value, test_value_out, not_equal_continue); ++ delayed()->nop(); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, mdp_in, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in, ++ Register reg, ++ int offset_of_disp) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ daddu(AT, reg, mdp_in); ++ assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16"); ++ ld(AT, AT, offset_of_disp); ++ daddu(mdp_in, mdp_in, AT); ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in, ++ int constant) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ if(Assembler::is_simm16(constant)) { ++ daddiu(mdp_in, mdp_in, constant); ++ } else { ++ move(AT, constant); ++ daddu(mdp_in, mdp_in, AT); ++ } ++ sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize)); ++} ++ ++ ++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) { ++ assert(ProfileInterpreter, "must be profiling interpreter"); ++ push(return_bci); // save/restore across call_VM ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret), ++ return_bci); ++ pop(return_bci); ++} ++ ++ ++void InterpreterMacroAssembler::profile_taken_branch(Register mdp, ++ Register bumped_count) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ // Otherwise, assign to mdp ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the taken count. ++ // We inline increment_mdp_data_at to return bumped_count in a register ++ //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset())); ++ ld(bumped_count, mdp, in_bytes(JumpData::taken_offset())); ++ assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1"); ++ daddiu(AT, bumped_count, DataLayout::counter_increment); ++ sltu(AT, R0, AT); ++ daddu(bumped_count, bumped_count, AT); ++ sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are taking a branch. Increment the not taken count. ++ increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset())); ++ ++ // The method data pointer needs to be updated to correspond to ++ // the next bytecode ++ update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_final_call(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_virtual_call(Register receiver, ++ Register mdp, ++ Register reg2, ++ bool receiver_can_be_null) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label skip_receiver_profile; ++ if (receiver_can_be_null) { ++ Label not_null; ++ bne(receiver, R0, not_null); ++ delayed()->nop(); ++ // We are making a call. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, skip_receiver_profile); ++ delayed()->nop(); ++ bind(not_null); ++ } ++ ++ // Record the receiver type. ++ record_klass_in_profile(receiver, mdp, reg2, true); ++ bind(skip_receiver_profile); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_constant(mdp, ++ in_bytes(VirtualCallData:: ++ virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++ ++#if INCLUDE_JVMCI ++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) { ++ assert_different_registers(method, mdp, reg2); ++ if (ProfileInterpreter && MethodProfileWidth > 0) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ Label done; ++ record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth, ++ &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset())); ++ bind(done); ++ ++ update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size())); ++ bind(profile_continue); ++ } ++} ++#endif // INCLUDE_JVMCI ++ ++// This routine creates a state machine for updating the multi-row ++// type profile at a virtual call site (or other type-sensitive bytecode). ++// The machine visits each row (of receiver/count) until the receiver type ++// is found, or until it runs out of rows. At the same time, it remembers ++// the location of the first empty row. (An empty row records null for its ++// receiver, and can be allocated for a newly-observed receiver type.) ++// Because there are two degrees of freedom in the state, a simple linear ++// search will not work; it must be a decision tree. Hence this helper ++// function is recursive, to generate the required tree structured code. ++// It's the interpreter, so we are trading off code space for speed. ++// See below for example code. ++void InterpreterMacroAssembler::record_klass_in_profile_helper( ++ Register receiver, Register mdp, ++ Register reg2, int start_row, ++ Label& done, bool is_virtual_call) { ++ if (TypeProfileWidth == 0) { ++ if (is_virtual_call) { ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ } ++ return; ++ } ++ ++ int last_row = VirtualCallData::row_limit() - 1; ++ assert(start_row <= last_row, "must be work left to do"); ++ // Test this row for both the receiver and for null. ++ // Take any of three different outcomes: ++ // 1. found receiver => increment count and goto done ++ // 2. found null => keep looking for case 1, maybe allocate this cell ++ // 3. found something else => keep looking for cases 1 and 2 ++ // Case 3 is handled by a recursive call. ++ for (int row = start_row; row <= last_row; row++) { ++ Label next_test; ++ bool test_for_null_also = (row == start_row); ++ ++ // See if the receiver is receiver[n]. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row)); ++ test_mdp_data_at(mdp, recvr_offset, receiver, ++ (test_for_null_also ? reg2 : noreg), ++ next_test); ++ // (Reg2 now contains the receiver from the CallData.) ++ ++ // The receiver is receiver[n]. Increment count[n]. ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row)); ++ increment_mdp_data_at(mdp, count_offset); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(next_test); ++ ++ if (test_for_null_also) { ++ Label found_null; ++ // Failed the equality check on receiver[n]... Test for null. ++ if (start_row == last_row) { ++ // The only thing left to do is handle the null case. ++ if (is_virtual_call) { ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ // Receiver did not match any saved receiver and there is no empty row for it. ++ // Increment total counter to indicate polymorphic case. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ bind(found_null); ++ } else { ++ bne(reg2, R0, done); ++ delayed()->nop(); ++ } ++ break; ++ } ++ // Since null is rare, make it be the branch-taken case. ++ beq(reg2, R0, found_null); ++ delayed()->nop(); ++ ++ // Put all the "Case 3" tests here. ++ record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call); ++ ++ // Found a null. Keep searching for a matching receiver, ++ // but remember that this is an empty (unused) slot. ++ bind(found_null); ++ } ++ } ++ ++ // In the fall-through case, we found no matching receiver, but we ++ // observed the receiver[start_row] is NULL. ++ ++ // Fill in the receiver field and increment the count. ++ int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row)); ++ set_mdp_data_at(mdp, recvr_offset, receiver); ++ int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row)); ++ move(reg2, DataLayout::counter_increment); ++ set_mdp_data_at(mdp, count_offset, reg2); ++ if (start_row > 0) { ++ beq(R0, R0, done); ++ delayed()->nop(); ++ } ++} ++ ++// Example state machine code for three profile rows: ++// // main copy of decision tree, rooted at row[1] ++// if (row[0].rec == rec) { row[0].incr(); goto done; } ++// if (row[0].rec != NULL) { ++// // inner copy of decision tree, rooted at row[1] ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[1].rec != NULL) { ++// // degenerate decision tree, rooted at row[2] ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// if (row[2].rec != NULL) { goto done; } // overflow ++// row[2].init(rec); goto done; ++// } else { ++// // remember row[1] is empty ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[1].init(rec); goto done; ++// } ++// } else { ++// // remember row[0] is empty ++// if (row[1].rec == rec) { row[1].incr(); goto done; } ++// if (row[2].rec == rec) { row[2].incr(); goto done; } ++// row[0].init(rec); goto done; ++// } ++// done: ++ ++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver, ++ Register mdp, Register reg2, ++ bool is_virtual_call) { ++ assert(ProfileInterpreter, "must be profiling"); ++ Label done; ++ ++ record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call); ++ ++ bind (done); ++} ++ ++void InterpreterMacroAssembler::profile_ret(Register return_bci, ++ Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ uint row; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the total ret count. ++ increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset())); ++ ++ for (row = 0; row < RetData::row_limit(); row++) { ++ Label next_test; ++ ++ // See if return_bci is equal to bci[n]: ++ test_mdp_data_at(mdp, ++ in_bytes(RetData::bci_offset(row)), ++ return_bci, noreg, ++ next_test); ++ ++ // return_bci is equal to bci[n]. Increment the count. ++ increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row))); ++ ++ // The method data pointer needs to be updated to reflect the new target. ++ update_mdp_by_offset(mdp, ++ in_bytes(RetData::bci_displacement_offset(row))); ++ beq(R0, R0, profile_continue); ++ delayed()->nop(); ++ bind(next_test); ++ } ++ ++ update_mdp_for_ret(return_bci); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_null_seen(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ set_mdp_flag_at(mdp, BitData::null_seen_byte_constant()); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) { ++ if (ProfileInterpreter && TypeProfileCasts) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int count_offset = in_bytes(CounterData::count_offset()); ++ // Back up the address, since we have already bumped the mdp. ++ count_offset -= in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // *Decrement* the counter. We expect to see zero or small negatives. ++ increment_mdp_data_at(mdp, count_offset, true); ++ ++ bind (profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // The method data pointer needs to be updated. ++ int mdp_delta = in_bytes(BitData::bit_data_size()); ++ if (TypeProfileCasts) { ++ mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size()); ++ ++ // Record the object type. ++ record_klass_in_profile(klass, mdp, reg2, false); ++ } ++ update_mdp_by_constant(mdp, mdp_delta); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_default(Register mdp) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Update the default case count ++ increment_mdp_data_at(mdp, ++ in_bytes(MultiBranchData::default_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ in_bytes(MultiBranchData:: ++ default_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::profile_switch_case(Register index, ++ Register mdp, ++ Register reg2) { ++ if (ProfileInterpreter) { ++ Label profile_continue; ++ ++ // If no method data exists, go to profile_continue. ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Build the base (index * per_case_size_in_bytes()) + ++ // case_array_offset_in_bytes() ++ move(reg2, in_bytes(MultiBranchData::per_case_size())); ++ if (UseLEXT1) { ++ gsdmult(index, index, reg2); ++ } else { ++ dmult(index, reg2); ++ mflo(index); ++ } ++ daddiu(index, index, in_bytes(MultiBranchData::case_array_offset())); ++ ++ // Update the case count ++ increment_mdp_data_at(mdp, ++ index, ++ in_bytes(MultiBranchData::relative_count_offset())); ++ ++ // The method data pointer needs to be updated. ++ update_mdp_by_offset(mdp, ++ index, ++ in_bytes(MultiBranchData:: ++ relative_displacement_offset())); ++ ++ bind(profile_continue); ++ } ++} ++ ++ ++void InterpreterMacroAssembler::narrow(Register result) { ++ ++ // Get method->_constMethod->_result_type ++ ld(T9, FP, frame::interpreter_frame_method_offset * wordSize); ++ ld(T9, T9, in_bytes(Method::const_offset())); ++ lbu(T9, T9, in_bytes(ConstMethod::result_type_offset())); ++ ++ Label done, notBool, notByte, notChar; ++ ++ // common case first ++ addiu(AT, T9, -T_INT); ++ beq(AT, R0, done); ++ delayed()->nop(); ++ ++ // mask integer result to narrower return type. ++ addiu(AT, T9, -T_BOOLEAN); ++ bne(AT, R0, notBool); ++ delayed()->nop(); ++ andi(result, result, 0x1); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notBool); ++ addiu(AT, T9, -T_BYTE); ++ bne(AT, R0, notByte); ++ delayed()->nop(); ++ seb(result, result); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notByte); ++ addiu(AT, T9, -T_CHAR); ++ bne(AT, R0, notChar); ++ delayed()->nop(); ++ andi(result, result, 0xFFFF); ++ beq(R0, R0, done); ++ delayed()->nop(); ++ ++ bind(notChar); ++ seh(result, result); ++ ++ // Nothing to do for T_INT ++ bind(done); ++} ++ ++ ++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) { ++ Label update, next, none; ++ ++ verify_oop(obj); ++ ++ if (mdo_addr.index() != noreg) { ++ guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !"); ++ guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !"); ++ push(T0); ++ dsll(T0, mdo_addr.index(), mdo_addr.scale()); ++ daddu(T0, T0, mdo_addr.base()); ++ } ++ ++ bne(obj, R0, update); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::null_seen); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(update); ++ load_klass(obj, obj); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ andi(AT, obj, TypeEntries::type_unknown); ++ bne(AT, R0, next); ++ delayed()->nop(); ++ ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ daddiu(AT, AT, -(TypeEntries::null_seen)); ++ beq(AT, R0, none); ++ delayed()->nop(); ++ ++ // There is a chance that the checks above (re-reading profiling ++ // data from memory) fail if another thread has just set the ++ // profiling to this obj's klass ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ xorr(obj, obj, AT); ++ assert(TypeEntries::type_klass_mask == -4, "must be"); ++ dextm(AT, obj, 2, 62); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ // different than before. Cannot keep accurate profile. ++ if (mdo_addr.index() == noreg) { ++ ld(AT, mdo_addr); ++ } else { ++ ld(AT, T0, mdo_addr.disp()); ++ } ++ ori(AT, AT, TypeEntries::type_unknown); ++ if (mdo_addr.index() == noreg) { ++ sd(AT, mdo_addr); ++ } else { ++ sd(AT, T0, mdo_addr.disp()); ++ } ++ beq(R0, R0, next); ++ delayed()->nop(); ++ ++ bind(none); ++ // first time here. Set profile type. ++ if (mdo_addr.index() == noreg) { ++ sd(obj, mdo_addr); ++ } else { ++ sd(obj, T0, mdo_addr.disp()); ++ } ++ ++ bind(next); ++ if (mdo_addr.index() != noreg) { ++ pop(T0); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) { ++ if (!ProfileInterpreter) { ++ return; ++ } ++ ++ if (MethodData::profile_arguments() || MethodData::profile_return()) { ++ Label profile_continue; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size()); ++ ++ lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start); ++ li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ ++ if (MethodData::profile_arguments()) { ++ Label done; ++ int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset()); ++ if (Assembler::is_simm16(off_to_args)) { ++ daddiu(mdp, mdp, off_to_args); ++ } else { ++ move(AT, off_to_args); ++ daddu(mdp, mdp, AT); ++ } ++ ++ ++ for (int i = 0; i < TypeProfileArgsLimit; i++) { ++ if (i > 0 || MethodData::profile_return()) { ++ // If return value type is profiled we may have no argument to profile ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) { ++ addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count()); ++ } else { ++ li(AT, i*TypeStackSlotEntries::per_arg_count()); ++ subu32(tmp, tmp, AT); ++ } ++ ++ li(AT, TypeStackSlotEntries::per_arg_count()); ++ slt(AT, tmp, AT); ++ bne(AT, R0, done); ++ delayed()->nop(); ++ } ++ ld(tmp, callee, in_bytes(Method::const_offset())); ++ ++ lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // stack offset o (zero based) from the start of the argument ++ // list, for n arguments translates into offset n - o - 1 from ++ // the end of the argument list ++ ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args); ++ subu(tmp, tmp, AT); ++ ++ addiu32(tmp, tmp, -1); ++ ++ Address arg_addr = argument_address(tmp); ++ ld(tmp, arg_addr); ++ ++ Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args); ++ profile_obj_type(tmp, mdo_arg_addr); ++ ++ int to_add = in_bytes(TypeStackSlotEntries::per_arg_size()); ++ if (Assembler::is_simm16(to_add)) { ++ daddiu(mdp, mdp, to_add); ++ } else { ++ move(AT, to_add); ++ daddu(mdp, mdp, AT); ++ } ++ ++ off_to_args += to_add; ++ } ++ ++ if (MethodData::profile_return()) { ++ ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args); ++ ++ int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(); ++ if (Assembler::is_simm16(-1 * tmp_arg_counts)) { ++ addiu32(tmp, tmp, -1 * tmp_arg_counts); ++ } else { ++ move(AT, tmp_arg_counts); ++ subu32(mdp, mdp, AT); ++ } ++ } ++ ++ bind(done); ++ ++ if (MethodData::profile_return()) { ++ // We're right after the type profile for the last ++ // argument. tmp is the number of cells left in the ++ // CallTypeData/VirtualCallTypeData to reach its end. Non null ++ // if there's a return to profile. ++ assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type"); ++ sll(tmp, tmp, exact_log2(DataLayout::cell_size)); ++ daddu(mdp, mdp, tmp); ++ } ++ sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize); ++ } else { ++ assert(MethodData::profile_return(), "either profile call args or call ret"); ++ update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size())); ++ } ++ ++ // mdp points right after the end of the ++ // CallTypeData/VirtualCallTypeData, right after the cells for the ++ // return value type if there's one ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) { ++ assert_different_registers(mdp, ret, tmp, _bcp_register); ++ if (ProfileInterpreter && MethodData::profile_return()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ if (MethodData::profile_return_jsr292_only()) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ // If we don't profile all invoke bytecodes we must make sure ++ // it's a bytecode we indeed profile. We can't go back to the ++ // begining of the ProfileData we intend to update to check its ++ // type because we're right after it and we don't known its ++ // length ++ Label do_profile; ++ lb(tmp, _bcp_register, 0); ++ daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic); ++ beq(AT, R0, do_profile); ++ delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle); ++ beq(AT, R0, do_profile); ++ delayed()->nop(); ++ ++ get_method(tmp); ++ lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes()); ++ li(AT, vmIntrinsics::_compiledLambdaForm); ++ bne(tmp, AT, profile_continue); ++ delayed()->nop(); ++ ++ bind(do_profile); ++ } ++ ++ Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size())); ++ daddu(tmp, ret, R0); ++ profile_obj_type(tmp, mdo_ret_addr); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) { ++ guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !"); ++ ++ if (ProfileInterpreter && MethodData::profile_parameters()) { ++ Label profile_continue, done; ++ ++ test_method_data_pointer(mdp, profile_continue); ++ ++ // Load the offset of the area within the MDO used for ++ // parameters. If it's negative we're not profiling any parameters ++ lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())); ++ bltz(tmp1, profile_continue); ++ delayed()->nop(); ++ ++ // Compute a pointer to the area for parameters from the offset ++ // and move the pointer to the slot for the last ++ // parameters. Collect profiling from last parameter down. ++ // mdo start + parameters offset + array length - 1 ++ daddu(mdp, mdp, tmp1); ++ ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset())); ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ ++ ++ Label loop; ++ bind(loop); ++ ++ int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0)); ++ int type_base = in_bytes(ParametersTypeData::type_offset(0)); ++ Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size); ++ Address arg_type(mdp, tmp1, per_arg_scale, type_base); ++ ++ // load offset on the stack from the slot for this parameter ++ dsll(AT, tmp1, per_arg_scale); ++ daddu(AT, AT, mdp); ++ ld(tmp2, AT, off_base); ++ ++ subu(tmp2, R0, tmp2); ++ ++ // read the parameter from the local area ++ dsll(AT, tmp2, Interpreter::logStackElementSize); ++ daddu(AT, AT, _locals_register); ++ ld(tmp2, AT, 0); ++ ++ // profile the parameter ++ profile_obj_type(tmp2, arg_type); ++ ++ // go to next parameter ++ decrement(tmp1, TypeStackSlotEntries::per_arg_count()); ++ bgtz(tmp1, loop); ++ delayed()->nop(); ++ ++ bind(profile_continue); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) { ++ if (state == atos) { ++ MacroAssembler::verify_oop(reg); ++ } ++} ++ ++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ++} ++#endif // !CC_INTERP ++ ++ ++void InterpreterMacroAssembler::notify_method_entry() { ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ if (JvmtiExport::can_post_interpreter_events()) { ++ Label L; ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, L); ++ delayed()->nop(); ++ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_method_entry)); ++ bind(L); ++ } ++ ++ { ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ //Rthread, ++ thread, ++ //Rmethod); ++ S3); ++ } ++ ++} ++ ++void InterpreterMacroAssembler::notify_method_exit( ++ TosState state, NotifyMethodExitMode mode) { ++ Register tempreg = T0; ++#ifndef OPT_THREAD ++ Register thread = T8; ++ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to ++ // track stack depth. If it is possible to enter interp_only_mode we add ++ // the code to check if the event should be sent. ++ if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) { ++ Label skip; ++ // Note: frame::interpreter_frame_result has a dependency on how the ++ // method result is saved across the call to post_method_exit. If this ++ // is changed then the interpreter_frame_result implementation will ++ // need to be updated too. ++ ++ // template interpreter will leave it on the top of the stack. ++ push(state); ++ lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset())); ++ beq(tempreg, R0, skip); ++ delayed()->nop(); ++ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit)); ++ bind(skip); ++ pop(state); ++ } ++ ++ { ++ // Dtrace notification ++ SkipIfEqual skip_if(this, &DTraceMethodProbes, 0); ++ push(state); ++ get_method(S3); ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ //Rthread, Rmethod); ++ thread, S3); ++ pop(state); ++ } ++} ++ ++// Jump if ((*counter_addr += increment) & mask) satisfies the condition. ++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr, ++ int increment, int mask, ++ Register scratch, bool preloaded, ++ Condition cond, Label* where) { ++ assert_different_registers(scratch, AT); ++ ++ if (!preloaded) { ++ lw(scratch, counter_addr); ++ } ++ addiu32(scratch, scratch, increment); ++ sw(scratch, counter_addr); ++ ++ move(AT, mask); ++ andr(scratch, scratch, AT); ++ ++ if (cond == Assembler::zero) { ++ beq(scratch, R0, *where); ++ delayed()->nop(); ++ } else { ++ unimplemented(); ++ } ++} +diff --git a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp +new file mode 100644 +index 0000000000..054138ea42 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp +@@ -0,0 +1,60 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP ++ ++// This is included in the middle of class Interpreter. ++// Do not include files here. ++ ++// native method calls ++ ++class SignatureHandlerGenerator: public NativeSignatureIterator { ++ private: ++ MacroAssembler* _masm; ++ ++ void move(int from_offset, int to_offset); ++ ++ void box(int from_offset, int to_offset); ++ void pass_int(); ++ void pass_long(); ++ void pass_object(); ++ void pass_float(); ++ void pass_double(); ++ ++ public: ++ // Creation ++ SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer); ++ ++ // Code generation ++ void generate(uint64_t fingerprint); ++ ++ // Code generation support ++ static Register from(); ++ static Register to(); ++ static Register temp(); ++}; ++ ++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp +new file mode 100644 +index 0000000000..e655b2a1a8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp +@@ -0,0 +1,252 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "memory/universe.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/signature.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of SignatureHandlerGenerator ++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator( ++ const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) { ++ _masm = new MacroAssembler(buffer); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset)); ++ __ sd(temp(), to(), to_offset * longSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) { ++ __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) ); ++ __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) ); ++ ++ __ movz(temp(), R0, AT); ++ __ sw(temp(), to(), to_offset * wordSize); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) { ++ // generate code to handle arguments ++ iterate(fingerprint); ++ // return result handler ++ __ li(V0, AbstractInterpreter::result_handler(method()->result_type())); ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ flush(); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() { ++ Argument jni_arg(jni_offset()); ++ ++ // the handle for a receiver will never be null ++ bool do_NULL_check = offset() != 0 || is_static(); ++ if (do_NULL_check) { ++ __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset())); ++ __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT); ++ } else { ++ __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } ++ ++ if (!jni_arg.is_Register()) ++ __ sd(temp(), jni_arg.as_caller_address()); ++} ++ ++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset())); ++ } else { ++ __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset())); ++ __ sw(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2. ++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() { ++ Argument jni_arg(jni_offset()); ++ if(jni_arg.is_Register()) { ++ __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ } else { ++ __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1)); ++ __ sd(temp(), jni_arg.as_caller_address()); ++ } ++} ++ ++ ++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return LVP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::to() { return SP; } ++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return T8; } ++ ++// Implementation of SignatureHandlerLibrary ++ ++void SignatureHandlerLibrary::pd_set_handler(address handler) {} ++ ++ ++class SlowSignatureHandler ++ : public NativeSignatureIterator { ++ private: ++ address _from; ++ intptr_t* _to; ++ intptr_t* _reg_args; ++ intptr_t* _fp_identifiers; ++ unsigned int _num_args; ++ ++ virtual void pass_int() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_long() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2 * Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = from_obj; ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_object() ++ { ++ intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ if (_num_args < Argument::n_register_parameters) { ++ *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ _num_args++; ++ } else { ++ *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr; ++ } ++ } ++ ++ virtual void pass_float() ++ { ++ jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0)); ++ _from -= Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ virtual void pass_double() ++ { ++ intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1)); ++ _from -= 2*Interpreter::stackElementSize; ++ ++ if (_num_args < Argument::n_float_register_parameters) { ++ *_reg_args++ = from_obj; ++ *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double ++ _num_args++; ++ } else { ++ *_to++ = from_obj; ++ } ++ } ++ ++ public: ++ SlowSignatureHandler(methodHandle method, address from, intptr_t* to) ++ : NativeSignatureIterator(method) ++ { ++ _from = from; ++ _to = to; ++ ++ // see TemplateInterpreterGenerator::generate_slow_signature_handler() ++ _reg_args = to - Argument::n_register_parameters + jni_offset() - 1; ++ _fp_identifiers = to - 1; ++ *(int*) _fp_identifiers = 0; ++ _num_args = jni_offset(); ++ } ++}; ++ ++ ++IRT_ENTRY(address, ++ InterpreterRuntime::slow_signature_handler(JavaThread* thread, ++ Method* method, ++ intptr_t* from, ++ intptr_t* to)) ++ methodHandle m(thread, (Method*)method); ++ assert(m->is_native(), "sanity check"); ++ ++ // handle arguments ++ SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1)); ++ ++ // return result handler ++ return Interpreter::result_handler(m->result_type()); ++IRT_END +diff --git a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp +new file mode 100644 +index 0000000000..dccdf6a019 +--- /dev/null ++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp +@@ -0,0 +1,87 @@ ++/* ++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP ++ ++private: ++ ++ // FP value associated with _last_Java_sp: ++ intptr_t* volatile _last_Java_fp; // pointer is volatile not what it points to ++ ++public: ++ // Each arch must define reset, save, restore ++ // These are used by objects that only care about: ++ // 1 - initializing a new state (thread creation, javaCalls) ++ // 2 - saving a current state (javaCalls) ++ // 3 - restoring an old state (javaCalls) ++ ++ void clear(void) { ++ // clearing _last_Java_sp must be first ++ _last_Java_sp = NULL; ++ // fence? ++ _last_Java_fp = NULL; ++ _last_Java_pc = NULL; ++ } ++ ++ void copy(JavaFrameAnchor* src) { ++ // In order to make sure the transition state is valid for "this" ++ // We must clear _last_Java_sp before copying the rest of the new data ++ // ++ // Hack Alert: Temporary bugfix for 4717480/4721647 ++ // To act like previous version (pd_cache_state) don't NULL _last_Java_sp ++ // unless the value is changing ++ // ++ if (_last_Java_sp != src->_last_Java_sp) ++ _last_Java_sp = NULL; ++ ++ _last_Java_fp = src->_last_Java_fp; ++ _last_Java_pc = src->_last_Java_pc; ++ // Must be last so profiler will always see valid frame if has_last_frame() is true ++ _last_Java_sp = src->_last_Java_sp; ++ } ++ ++ // Always walkable ++ bool walkable(void) { return true; } ++ // Never any thing to do since we are always walkable and can find address of return addresses ++ void make_walkable(JavaThread* thread) { } ++ ++ intptr_t* last_Java_sp(void) const { return _last_Java_sp; } ++ ++ address last_Java_pc(void) { return _last_Java_pc; } ++ ++private: ++ ++ static ByteSize last_Java_fp_offset() { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); } ++ ++public: ++ ++ void set_last_Java_sp(intptr_t* sp) { _last_Java_sp = sp; } ++ ++ intptr_t* last_Java_fp(void) { return _last_Java_fp; } ++ // Assert (last_Java_sp == NULL || fp == NULL) ++ void set_last_Java_fp(intptr_t* fp) { _last_Java_fp = fp; } ++ ++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp +new file mode 100644 +index 0000000000..46c8889f99 +--- /dev/null ++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp +@@ -0,0 +1,166 @@ ++/* ++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "memory/resourceArea.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/safepoint.hpp" ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define BUFFER_SIZE 30*wordSize ++ ++// Instead of issuing lfence for LoadLoad barrier, we create data dependency ++// between loads, which is more efficient than lfence. ++ ++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) { ++ const char *name = NULL; ++ switch (type) { ++ case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break; ++ case T_BYTE: name = "jni_fast_GetByteField"; break; ++ case T_CHAR: name = "jni_fast_GetCharField"; break; ++ case T_SHORT: name = "jni_fast_GetShortField"; break; ++ case T_INT: name = "jni_fast_GetIntField"; break; ++ case T_LONG: name = "jni_fast_GetLongField"; break; ++ case T_FLOAT: name = "jni_fast_GetFloatField"; break; ++ case T_DOUBLE: name = "jni_fast_GetDoubleField"; break; ++ default: ShouldNotReachHere(); ++ } ++ ResourceMark rm; ++ BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE); ++ CodeBuffer cbuf(blob); ++ MacroAssembler* masm = new MacroAssembler(&cbuf); ++ address fast_entry = __ pc(); ++ ++ Label slow; ++ ++ // return pc RA ++ // jni env A0 ++ // obj A1 ++ // jfieldID A2 ++ ++ address counter_addr = SafepointSynchronize::safepoint_counter_addr(); ++ __ set64(AT, (long)counter_addr); ++ __ lw(T1, AT, 0); ++ ++ // Parameters(A0~A3) should not be modified, since they will be used in slow path ++ __ andi(AT, T1, 1); ++ __ bne(AT, R0, slow); ++ __ delayed()->nop(); ++ ++ __ move(T0, A1); ++ // Both T0 and T9 are clobbered by try_resolve_jobject_in_native. ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T9, slow); ++ ++ __ dsrl(T2, A2, 2); // offset ++ __ daddu(T0, T0, T2); ++ ++ assert(count < LIST_CAPACITY, "LIST_CAPACITY too small"); ++ speculative_load_pclist[count] = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ lbu (V0, T0, 0); break; ++ case T_BYTE: __ lb (V0, T0, 0); break; ++ case T_CHAR: __ lhu (V0, T0, 0); break; ++ case T_SHORT: __ lh (V0, T0, 0); break; ++ case T_INT: __ lw (V0, T0, 0); break; ++ case T_LONG: __ ld (V0, T0, 0); break; ++ case T_FLOAT: __ lwc1(F0, T0, 0); break; ++ case T_DOUBLE: __ ldc1(F0, T0, 0); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ set64(AT, (long)counter_addr); ++ __ lw(AT, AT, 0); ++ __ bne(T1, AT, slow); ++ __ delayed()->nop(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ slowcase_entry_pclist[count++] = __ pc(); ++ __ bind (slow); ++ address slow_case_addr = NULL; ++ switch (type) { ++ case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break; ++ case T_BYTE: slow_case_addr = jni_GetByteField_addr(); break; ++ case T_CHAR: slow_case_addr = jni_GetCharField_addr(); break; ++ case T_SHORT: slow_case_addr = jni_GetShortField_addr(); break; ++ case T_INT: slow_case_addr = jni_GetIntField_addr(); break; ++ case T_LONG: slow_case_addr = jni_GetLongField_addr(); break; ++ case T_FLOAT: slow_case_addr = jni_GetFloatField_addr(); break; ++ case T_DOUBLE: slow_case_addr = jni_GetDoubleField_addr(); break; ++ default: ShouldNotReachHere(); ++ } ++ __ jmp(slow_case_addr); ++ __ delayed()->nop(); ++ ++ __ flush (); ++ ++ return fast_entry; ++} ++ ++address JNI_FastGetField::generate_fast_get_boolean_field() { ++ return generate_fast_get_int_field0(T_BOOLEAN); ++} ++ ++address JNI_FastGetField::generate_fast_get_byte_field() { ++ return generate_fast_get_int_field0(T_BYTE); ++} ++ ++address JNI_FastGetField::generate_fast_get_char_field() { ++ return generate_fast_get_int_field0(T_CHAR); ++} ++ ++address JNI_FastGetField::generate_fast_get_short_field() { ++ return generate_fast_get_int_field0(T_SHORT); ++} ++ ++address JNI_FastGetField::generate_fast_get_int_field() { ++ return generate_fast_get_int_field0(T_INT); ++} ++ ++address JNI_FastGetField::generate_fast_get_long_field() { ++ return generate_fast_get_int_field0(T_LONG); ++} ++ ++address JNI_FastGetField::generate_fast_get_float_field() { ++ return generate_fast_get_int_field0(T_FLOAT); ++} ++ ++address JNI_FastGetField::generate_fast_get_double_field() { ++ return generate_fast_get_int_field0(T_DOUBLE); ++} +diff --git a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp +new file mode 100644 +index 0000000000..e93237ffd9 +--- /dev/null ++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp +@@ -0,0 +1,144 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP ++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP ++ ++#include "jni.h" ++#include "memory/allocation.hpp" ++#include "oops/oop.hpp" ++ ++// This file holds platform-dependent routines used to write primitive jni ++// types to the array of arguments passed into JavaCalls::call ++ ++class JNITypes : AllStatic { ++ // These functions write a java primitive type (in native format) ++ // to a java stack slot array to be passed as an argument to JavaCalls:calls. ++ // I.e., they are functionally 'push' operations if they have a 'pos' ++ // formal parameter. Note that jlong's and jdouble's are written ++ // _in reverse_ of the order in which they appear in the interpreter ++ // stack. This is because call stubs (see stubGenerator_sparc.cpp) ++ // reverse the argument list constructed by JavaCallArguments (see ++ // javaCalls.hpp). ++ ++private: ++ ++ // 32bit Helper routines. ++ static inline void put_int2r(jint *from, intptr_t *to) { *(jint *)(to++) = from[1]; ++ *(jint *)(to ) = from[0]; } ++ static inline void put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; } ++ ++public: ++ // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[] ++ // is 8 bytes. ++ // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values. ++ // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded. ++ // This error occurs in ReflectInvoke.java ++ // The parameter of DD(int) should be 4 instead of 0x550000004. ++ // ++ // See: [runtime/javaCalls.hpp] ++ ++ static inline void put_int(jint from, intptr_t *to) { *(intptr_t *)(to + 0 ) = from; } ++ static inline void put_int(jint from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = from; } ++ static inline void put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; } ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_long(jlong from, intptr_t *to) { ++ *(jlong*) (to + 1) = from; ++ *(jlong*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_long(jlong from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = from; ++ *(jlong*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_long(jlong *from, intptr_t *to, int& pos) { ++ *(jlong*) (to + 1 + pos) = *from; ++ *(jlong*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // Oops are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_obj(oop from, intptr_t *to) { *(oop *)(to + 0 ) = from; } ++ static inline void put_obj(oop from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = from; } ++ static inline void put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; } ++ ++ // Floats are stored in native format in one JavaCallArgument slot at *to. ++ static inline void put_float(jfloat from, intptr_t *to) { *(jfloat *)(to + 0 ) = from; } ++ static inline void put_float(jfloat from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = from; } ++ static inline void put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; } ++ ++#undef _JNI_SLOT_OFFSET ++#define _JNI_SLOT_OFFSET 0 ++ ++ // Longs are stored in native format in one JavaCallArgument slot at ++ // *(to). ++ // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest), ++ // *(to + 1) must contains a copy of the long value. Otherwise it will corrupts. ++ static inline void put_double(jdouble from, intptr_t *to) { ++ *(jdouble*) (to + 1) = from; ++ *(jdouble*) (to) = from; ++ } ++ ++ // A long parameter occupies two slot. ++ // It must fit the layout rule in methodHandle. ++ // ++ // See: [runtime/reflection.cpp] Reflection::invoke() ++ // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking"); ++ ++ static inline void put_double(jdouble from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = from; ++ *(jdouble*) (to + pos) = from; ++ pos += 2; ++ } ++ ++ static inline void put_double(jdouble *from, intptr_t *to, int& pos) { ++ *(jdouble*) (to + 1 + pos) = *from; ++ *(jdouble*) (to + pos) = *from; ++ pos += 2; ++ } ++ ++ // The get_xxx routines, on the other hand, actually _do_ fetch ++ // java primitive types from the interpreter stack. ++ static inline jint get_int (intptr_t *from) { return *(jint *) from; } ++ static inline jlong get_long (intptr_t *from) { return *(jlong *) (from + _JNI_SLOT_OFFSET); } ++ static inline oop get_obj (intptr_t *from) { return *(oop *) from; } ++ static inline jfloat get_float (intptr_t *from) { return *(jfloat *) from; } ++ static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); } ++#undef _JNI_SLOT_OFFSET ++}; ++ ++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp +new file mode 100644 +index 0000000000..cc868cae55 +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp +@@ -0,0 +1,4257 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "jvm.h" ++#include "asm/assembler.hpp" ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "compiler/disassembler.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "gc/shared/collectedHeap.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/resourceArea.hpp" ++#include "memory/universe.hpp" ++#include "nativeInst_mips.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/biasedLocking.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/objectMonitor.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepoint.hpp" ++#include "runtime/safepointMechanism.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/macros.hpp" ++ ++#ifdef COMPILER2 ++#include "opto/intrinsicnode.hpp" ++#endif ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Implementation of MacroAssembler ++ ++intptr_t MacroAssembler::i[32] = {0}; ++float MacroAssembler::f[32] = {0.0}; ++ ++void MacroAssembler::print(outputStream *s) { ++ unsigned int k; ++ for(k=0; kprint_cr("i%d = 0x%.16lx", k, i[k]); ++ } ++ s->cr(); ++ ++ for(k=0; kprint_cr("f%d = %f", k, f[k]); ++ } ++ s->cr(); ++} ++ ++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; } ++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; } ++ ++void MacroAssembler::save_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ sw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ swc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++void MacroAssembler::restore_registers(MacroAssembler *masm) { ++#define __ masm-> ++ for(int k=0; k<32; k++) { ++ __ lw (as_Register(k), A0, i_offset(k)); ++ } ++ ++ for(int k=0; k<32; k++) { ++ __ lwc1 (as_FloatRegister(k), A0, f_offset(k)); ++ } ++#undef __ ++} ++ ++ ++void MacroAssembler::pd_patch_instruction(address branch, address target) { ++ jint& stub_inst = *(jint*) branch; ++ jint *pc = (jint *)branch; ++ ++ if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) { ++ //b_far: ++ // move(AT, RA); // daddu ++ // emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ // nop(); ++ // lui(T9, 0); // to be patched ++ // ori(T9, 0); ++ // daddu(T9, T9, RA); ++ // move(RA, AT); ++ // jr(T9); ++ ++ assert(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op, "Not a branch label patch"); ++ if(!(opcode(pc[3]) == lui_op ++ && opcode(pc[4]) == ori_op ++ && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); } ++ ++ int offset = target - branch; ++ if (!is_simm16(offset)) { ++ pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12); ++ pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12); ++ } else { ++ // revert to "beq + nop" ++ CodeBuffer cb(branch, 4 * 10); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ b(target); ++ __ delayed()->nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ __ nop(); ++ } ++ return; ++ } else if (special(pc[4]) == jr_op ++ && opcode(pc[4]) == special_op ++ && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) { ++ //jmp_far: ++ // patchable_set48(T9, target); ++ // jr(T9); ++ // nop(); ++ ++ CodeBuffer cb(branch, 4 * 4); ++ MacroAssembler masm(&cb); ++ masm.patchable_set48(T9, (long)(target)); ++ return; ++ } ++ ++#ifndef PRODUCT ++ if (!is_simm16((target - branch - 4) >> 2)) { ++ tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target)); ++ tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch)); ++ Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty); ++ tty->print_cr("======= End of decoding ======="); ++ } ++#endif ++ ++ stub_inst = patched_branch(target - branch, stub_inst, 0); ++} ++ ++static inline address first_cache_address() { ++ return CodeCache::low_bound() + sizeof(HeapBlock::Header); ++} ++ ++static inline address last_cache_address() { ++ return CodeCache::high_bound() - Assembler::InstructionSize; ++} ++ ++int MacroAssembler::call_size(address target, bool far, bool patchable) { ++ if (patchable) return 6 << Assembler::LogInstructionSize; ++ if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop ++ return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize; ++} ++ ++// Can we reach target using jal/j from anywhere ++// in the code cache (because code can be relocated)? ++bool MacroAssembler::reachable_from_cache(address target) { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch); ++} ++ ++bool MacroAssembler::reachable_from_cache() { ++ if (ForceUnreachable) { ++ return false; ++ } else { ++ address cl = first_cache_address(); ++ address ch = last_cache_address(); ++ ++ return fit_in_jal(cl, ch); ++ } ++} ++ ++void MacroAssembler::general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ j(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_jump(address target) { ++ if (reachable_from_cache(target)) { ++ //j(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_jump(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ j(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_jump(address target) { ++ return 6; ++} ++ ++void MacroAssembler::general_call(address target) { ++ if (reachable_from_cache(target)) { ++ jal(target); ++ delayed()->nop(); ++ } else { ++ set64(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_general_call(address target) { ++ if (reachable_from_cache(target)) { ++ //jal(target); ++ //nop(); ++ return 2; ++ } else { ++ //set64(T9, (long)target); ++ //jalr(T9); ++ //nop(); ++ return insts_for_set64((jlong)target) + 2; ++ } ++} ++ ++void MacroAssembler::patchable_call(address target) { ++ if (reachable_from_cache(target)) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ patchable_set48(T9, (long)target); ++ jalr(T9); ++ delayed()->nop(); ++ } ++} ++ ++int MacroAssembler::insts_for_patchable_call(address target) { ++ return 6; ++} ++ ++// Maybe emit a call via a trampoline. If the code cache is small ++// trampolines won't be emitted. ++ ++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) { ++ assert(JavaThread::current()->is_Compiler_thread(), "just checking"); ++ assert(entry.rspec().type() == relocInfo::runtime_call_type ++ || entry.rspec().type() == relocInfo::opt_virtual_call_type ++ || entry.rspec().type() == relocInfo::static_call_type ++ || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type"); ++ ++ address target = entry.target(); ++ if (!reachable_from_cache()) { ++ address stub = emit_trampoline_stub(offset(), target); ++ if (stub == NULL) { ++ return NULL; // CodeCache is full ++ } ++ } ++ ++ if (cbuf) cbuf->set_insts_mark(); ++ relocate(entry.rspec()); ++ ++ if (reachable_from_cache()) { ++ nop(); ++ nop(); ++ nop(); ++ nop(); ++ jal(target); ++ delayed()->nop(); ++ } else { ++ // load the call target from the trampoline stub ++ // branch ++ long dest = (long)pc(); ++ dest += (dest & 0x8000) << 1; ++ lui(T9, dest >> 32); ++ ori(T9, T9, split_low(dest >> 16)); ++ dsll(T9, T9, 16); ++ ld(T9, T9, simm16(split_low(dest))); ++ jalr(T9); ++ delayed()->nop(); ++ } ++ return pc(); ++} ++ ++// Emit a trampoline stub for a call to a target which is too far away. ++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset, ++ address dest) { ++ // Max stub size: alignment nop, TrampolineStub. ++ address stub = start_a_stub(NativeInstruction::nop_instruction_size ++ + NativeCallTrampolineStub::instruction_size); ++ if (stub == NULL) { ++ return NULL; // CodeBuffer::expand failed ++ } ++ ++ // Create a trampoline stub relocation which relates this trampoline stub ++ // with the call instruction at insts_call_instruction_offset in the ++ // instructions code-section. ++ align(wordSize); ++ relocate(trampoline_stub_Relocation::spec(code()->insts()->start() ++ + insts_call_instruction_offset)); ++ emit_int64((int64_t)dest); ++ end_a_stub(); ++ return stub; ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::beq(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ beq_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ bne(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, address entry) { ++ u_char * cur_pc = pc(); ++ ++ //Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ Assembler::bne(rs, rt, offset(entry)); ++ } else { ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(entry); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) { ++ if (L.is_bound()) { ++ bne_far(rs, rt, target(L)); ++ } else { ++ u_char * cur_pc = pc(); ++ Label not_jump; ++ beq(rs, rt, not_jump); ++ delayed()->nop(); ++ ++ b_far(L); ++ delayed()->nop(); ++ ++ bind(not_jump); ++ has_delay_slot(); ++ } ++} ++ ++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ bne(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) { ++ Label not_taken; ++ ++ beq(rs, rt, not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1t_long(Label& L) { ++ Label not_taken; ++ ++ bc1f(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::bc1f_long(Label& L) { ++ Label not_taken; ++ ++ bc1t(not_taken); ++ delayed()->nop(); ++ ++ jmp_far(L); ++ ++ bind(not_taken); ++} ++ ++void MacroAssembler::b_far(Label& L) { ++ if (L.is_bound()) { ++ b_far(target(L)); ++ } else { ++ volatile address dest = target(L); ++// ++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8 ++// 0x00000055651ed514: daddu at, ra, zero ++// 0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520 ++// ++// 0x00000055651ed51c: sll zero, zero, 0 ++// 0x00000055651ed520: lui t9, 0x0 ++// 0x00000055651ed524: ori t9, t9, 0x21b8 ++// 0x00000055651ed528: daddu t9, t9, ra ++// 0x00000055651ed52c: daddu ra, at, zero ++// 0x00000055651ed530: jr t9 ++// 0x00000055651ed534: sll zero, zero, 0 ++// ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ lui(T9, 0); // to be patched ++ ori(T9, T9, 0); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::b_far(address entry) { ++ u_char * cur_pc = pc(); ++ ++ // Near/Far jump ++ if(is_simm16((entry - pc() - 4) / 4)) { ++ b(offset(entry)); ++ } else { ++ // address must be bounded ++ move(AT, RA); ++ emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1)); ++ nop(); ++ li32(T9, entry - pc()); ++ daddu(T9, T9, RA); ++ move(RA, AT); ++ jr(T9); ++ } ++} ++ ++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) { ++ addu_long(AT, base, offset); ++ ld_ptr(rt, AT, 0); ++} ++ ++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) { ++ guarantee(AT != rt, "AT must not equal rt"); ++ addu_long(AT, base, offset); ++ st_ptr(rt, AT, 0); ++} ++ ++Address MacroAssembler::as_Address(AddressLiteral adr) { ++ return Address(adr.target(), adr.rspec()); ++} ++ ++Address MacroAssembler::as_Address(ArrayAddress adr) { ++ return Address::make_array(adr); ++} ++ ++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved). ++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) { ++ Label again; ++ ++ li(tmp_reg1, counter_addr); ++ bind(again); ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ ll(tmp_reg2, tmp_reg1, 0); ++ addiu(tmp_reg2, tmp_reg2, inc); ++ sc(tmp_reg2, tmp_reg1, 0); ++ beq(tmp_reg2, R0, again); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::reserved_stack_check() { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // testing if reserved zone needs to be enabled ++ Label no_reserved_zone_enabling; ++ ++ ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset())); ++ dsubu(AT, SP, AT); ++ bltz(AT, no_reserved_zone_enabling); ++ delayed()->nop(); ++ ++ enter(); // RA and FP are live. ++ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread); ++ leave(); ++ ++ // We have already removed our own frame. ++ // throw_delayed_StackOverflowError will think that it's been ++ // called by our caller. ++ li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry()); ++ jr(AT); ++ delayed()->nop(); ++ should_not_reach_here(); ++ ++ bind(no_reserved_zone_enabling); ++} ++ ++int MacroAssembler::biased_locking_enter(Register lock_reg, ++ Register obj_reg, ++ Register swap_reg, ++ Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, ++ Label* slow_case, ++ BiasedLockingCounters* counters) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ bool need_tmp_reg = false; ++ if (tmp_reg == noreg) { ++ need_tmp_reg = true; ++ tmp_reg = T9; ++ } ++ assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT); ++ assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout"); ++ Address mark_addr (obj_reg, oopDesc::mark_offset_in_bytes()); ++ Address saved_mark_addr(lock_reg, 0); ++ ++ // Biased locking ++ // See whether the lock is currently biased toward our thread and ++ // whether the epoch is still valid ++ // Note that the runtime guarantees sufficient alignment of JavaThread ++ // pointers to allow age to be placed into low bits ++ // First check to see whether biasing is even enabled for this object ++ Label cas_label; ++ int null_check_offset = -1; ++ if (!swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ ld_ptr(swap_reg, mark_addr); ++ } ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ move(tmp_reg, swap_reg); ++ andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ dsubu(AT, AT, tmp_reg); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ ++ bne(AT, R0, cas_label); ++ delayed()->nop(); ++ ++ ++ // The bias pattern is present in the object's header. Need to check ++ // whether the bias owner and the epoch are both still current. ++ // Note that because there is no current thread register on MIPS we ++ // need to store off the mark word we read out of the object to ++ // avoid reloading it and needing to recheck invariants below. This ++ // store is unfortunate but it makes the overall code shorter and ++ // simpler. ++ st_ptr(swap_reg, saved_mark_addr); ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ if (swap_reg_contains_mark) { ++ null_check_offset = offset(); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ xorr(tmp_reg, tmp_reg, swap_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ xorr(swap_reg, swap_reg, tmp_reg); ++#else ++ xorr(swap_reg, TREG, tmp_reg); ++#endif ++ ++ move(AT, ~((int) markOopDesc::age_mask_in_place)); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(swap_reg, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ beq(swap_reg, R0, done); ++ delayed()->nop(); ++ Label try_revoke_bias; ++ Label try_rebias; ++ ++ // At this point we know that the header has the bias pattern and ++ // that we are not the bias owner in the current epoch. We need to ++ // figure out more details about the state of the header in order to ++ // know what operations can be legally performed on the object's ++ // header. ++ ++ // If the low three bits in the xor result aren't clear, that means ++ // the prototype header is no longer biased and we have to revoke ++ // the bias on this object. ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place); ++ andr(AT, swap_reg, AT); ++ bne(AT, R0, try_revoke_bias); ++ delayed()->nop(); ++ // Biasing is still enabled for this data type. See whether the ++ // epoch of the current bias is still valid, meaning that the epoch ++ // bits of the mark word are equal to the epoch bits of the ++ // prototype header. (Note that the prototype header's epoch bits ++ // only change at a safepoint.) If not, attempt to rebias the object ++ // toward the current thread. Note that we must be absolutely sure ++ // that the current epoch is invalid in order to do this because ++ // otherwise the manipulations it performs on the mark word are ++ // illegal. ++ ++ move(AT, markOopDesc::epoch_mask_in_place); ++ andr(AT,swap_reg, AT); ++ bne(AT, R0, try_rebias); ++ delayed()->nop(); ++ // The epoch of the current bias is still valid but we know nothing ++ // about the owner; it might be set or it might be clear. Try to ++ // acquire the bias of the object using an atomic operation. If this ++ // fails we will go in to the runtime to revoke the object's bias. ++ // Note that we first construct the presumed unbiased header so we ++ // don't accidentally blow away another thread's valid bias. ++ ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place); ++ andr(swap_reg, swap_reg, AT); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++#ifndef OPT_THREAD ++ get_thread(tmp_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, TREG, swap_reg); ++#endif ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, this means that ++ // another thread succeeded in biasing it toward itself and we ++ // need to revoke that bias. The revocation will occur in the ++ // interpreter runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(tmp_reg); ++ push(A0); ++ atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg); ++ pop(A0); ++ pop(tmp_reg); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ b(done); ++ delayed()->nop(); ++ ++ bind(try_rebias); ++ // At this point we know the epoch has expired, meaning that the ++ // current "bias owner", if any, is actually invalid. Under these ++ // circumstances _only_, we are allowed to use the current header's ++ // value as the comparison value when doing the cas to acquire the ++ // bias in the current epoch. In other words, we allow transfer of ++ // the bias from one thread to another directly in this situation. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++#ifndef OPT_THREAD ++ get_thread(swap_reg); ++ orr(tmp_reg, tmp_reg, swap_reg); ++#else ++ orr(tmp_reg, tmp_reg, TREG); ++#endif ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // If the biasing toward our thread failed, then another thread ++ // succeeded in biasing it toward itself and we need to revoke that ++ // bias. The revocation will occur in the runtime in the slow case. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ if (slow_case != NULL) { ++ beq_far(AT, R0, *slow_case); ++ delayed()->nop(); ++ } ++ ++ b(done); ++ delayed()->nop(); ++ bind(try_revoke_bias); ++ // The prototype mark in the klass doesn't have the bias bit set any ++ // more, indicating that objects of this data type are not supposed ++ // to be biased any more. We are going to try to reset the mark of ++ // this object to the prototype value and fall through to the ++ // CAS-based locking scheme. Note that if our CAS fails, it means ++ // that another thread raced us for the privilege of revoking the ++ // bias of this particular object, so it's okay to continue in the ++ // normal locking code. ++ // ++ // FIXME: due to a lack of registers we currently blow away the age ++ // bits in this situation. Should attempt to preserve them. ++ ld_ptr(swap_reg, saved_mark_addr); ++ ++ if (need_tmp_reg) { ++ push(tmp_reg); ++ } ++ load_prototype_header(tmp_reg, obj_reg); ++ cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false); ++ if (need_tmp_reg) { ++ pop(tmp_reg); ++ } ++ // Fall through to the normal CAS-based lock, because no matter what ++ // the result of the above CAS, some thread must have succeeded in ++ // removing the bias bit from the object's header. ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ bne(AT, R0, L); ++ delayed()->nop(); ++ push(AT); ++ push(tmp_reg); ++ atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg); ++ pop(tmp_reg); ++ pop(AT); ++ bind(L); ++ } ++ ++ bind(cas_label); ++ return null_check_offset; ++} ++ ++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) { ++ assert(UseBiasedLocking, "why call this otherwise?"); ++ ++ // Check for biased locking unlock case, which is a no-op ++ // Note: we do not have to check the thread ID for two reasons. ++ // First, the interpreter checks for IllegalMonitorStateException at ++ // a higher level. Second, if the bias was revoked while we held the ++ // lock, the object could not be rebiased toward another thread, so ++ // the bias bit would be clear. ++ ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes())); ++ andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place); ++ daddiu(AT, R0, markOopDesc::biased_lock_pattern); ++ ++ beq(AT, temp_reg, done); ++ delayed()->nop(); ++} ++ ++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf ++// this method will handle the stack problem, you need not to preserve the stack space for the argument now ++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) { ++ Label L, E; ++ ++ assert(number_of_arguments <= 4, "just check"); ++ ++ andi(AT, SP, 0xf); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ daddiu(SP, SP, -8); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ daddiu(SP, SP, 8); ++ b(E); ++ delayed()->nop(); ++ ++ bind(L); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(E); ++} ++ ++ ++void MacroAssembler::jmp(address entry) { ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++} ++ ++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ jmp(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ patchable_set48(T9, (long)entry); ++ jr(T9); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::jmp_far(Label& L) { ++ if (L.is_bound()) { ++ address entry = target(L); ++ assert(entry != NULL, "jmp most probably wrong"); ++ InstructionMark im(this); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)entry); ++ } else { ++ InstructionMark im(this); ++ L.add_patch_at(code(), locator()); ++ ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(T9, (long)pc()); ++ } ++ ++ jr(T9); ++ delayed()->nop(); ++} ++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(AT, (long)obj); ++ sd(AT, dst); ++} ++ ++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) { ++ int oop_index; ++ if (obj) { ++ oop_index = oop_recorder()->find_index(obj); ++ } else { ++ oop_index = oop_recorder()->allocate_metadata_index(obj); ++ } ++ relocate(metadata_Relocation::spec(oop_index)); ++ patchable_set48(dst, (long)obj); ++} ++ ++void MacroAssembler::call(address entry) { ++// c/c++ code assume T9 is entry point, so we just always move entry to t9 ++// maybe there is some more graceful method to handle this. FIXME ++// For more info, see class NativeCall. ++ patchable_set48(T9, (long)entry); ++ jalr(T9); ++} ++ ++void MacroAssembler::call(address entry, relocInfo::relocType rtype) { ++ switch (rtype) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rtype); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::call(address entry, RelocationHolder& rh) ++{ ++ switch (rh.type()) { ++ case relocInfo::runtime_call_type: ++ case relocInfo::none: ++ call(entry); ++ break; ++ default: ++ { ++ InstructionMark im(this); ++ relocate(rh); ++ call(entry); ++ } ++ break; ++ } ++} ++ ++void MacroAssembler::ic_call(address entry, jint method_index) { ++ RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index); ++ patchable_set48(IC_Klass, (long)Universe::non_oop_word()); ++ assert(entry != NULL, "call most probably wrong"); ++ InstructionMark im(this); ++ trampoline_call(AddressLiteral(entry, rh)); ++} ++ ++void MacroAssembler::c2bool(Register r) { ++ sltu(r, R0, r); ++} ++ ++#ifndef PRODUCT ++extern "C" void findpc(intptr_t x); ++#endif ++ ++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) { ++ if ( ShowMessageBoxOnError ) { ++ JavaThreadState saved_state = JavaThread::current()->thread_state(); ++ JavaThread::current()->set_thread_state(_thread_in_vm); ++ { ++ // In order to get locks work, we need to fake a in_VM state ++ ttyLocker ttyl; ++ ::tty->print_cr("EXECUTION STOPPED: %s\n", msg); ++ if (CountBytecodes || TraceBytecodes || StopInterpreterAt) { ++ BytecodeCounter::print(); ++ } ++ ++ } ++ ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state); ++ } ++ else ++ ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg); ++} ++ ++ ++void MacroAssembler::stop(const char* msg) { ++ li(A0, (long)msg); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ brk(17); ++} ++ ++void MacroAssembler::warn(const char* msg) { ++ pushad(); ++ li(A0, (long)msg); ++ push(S2); ++ move(AT, -(StackAlignmentInBytes)); ++ move(S2, SP); // use S2 as a sender SP holder ++ andr(SP, SP, AT); // align stack as required by ABI ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ move(SP, S2); // use S2 as a sender SP holder ++ pop(S2); ++ popad(); ++} ++ ++void MacroAssembler::increment(Register reg, int imm) { ++ if (!imm) return; ++ if (is_simm16(imm)) { ++ daddiu(reg, reg, imm); ++ } else { ++ move(AT, imm); ++ daddu(reg, reg, AT); ++ } ++} ++ ++void MacroAssembler::decrement(Register reg, int imm) { ++ increment(reg, -imm); ++} ++ ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions) { ++ call_VM_helper(oop_result, entry_point, 0, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ call_VM_helper(oop_result, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); ++ assert(arg_2 != A1, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1!=A1) move(A1, arg_1); ++ if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM_helper(oop_result, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions); ++} ++ ++void MacroAssembler::call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3, ++ bool check_exceptions) { ++ if (arg_1 != A1) move(A1, arg_1); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument"); ++ if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument"); ++ call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions); ++} ++ ++void MacroAssembler::call_VM_base(Register oop_result, ++ Register java_thread, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments, ++ bool check_exceptions) { ++ ++ address before_call_pc; ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ // debugging support ++ assert(number_of_arguments >= 0 , "cannot have negative number of arguments"); ++ assert(number_of_arguments <= 4 , "cannot have negative number of arguments"); ++ assert(java_thread != oop_result , "cannot use the same register for java_thread & oop_result"); ++ assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp"); ++ ++ assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp"); ++ ++ // set last Java frame before call ++ before_call_pc = (address)pc(); ++ set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc); ++ ++ // do the call ++ move(A0, java_thread); ++ call(entry_point, relocInfo::runtime_call_type); ++ delayed()->nop(); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ get_thread(java_thread); ++#else ++#ifdef ASSERT ++ { ++ Label L; ++ get_thread(AT); ++ beq(java_thread, AT, L); ++ delayed()->nop(); ++ stop("MacroAssembler::call_VM_base: TREG not callee saved?"); ++ bind(L); ++ } ++#endif ++#endif ++ ++ // discard thread and arguments ++ ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // reset last Java frame ++ reset_last_Java_frame(java_thread, false); ++ ++ check_and_handle_popframe(java_thread); ++ check_and_handle_earlyret(java_thread); ++ if (check_exceptions) { ++ // check for pending exceptions (java_thread is set upon return) ++ Label L; ++ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ li(AT, before_call_pc); ++ push(AT); ++ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ bind(L); ++ } ++ ++ // get oop result if there is one and reset the value in the thread ++ if (oop_result->is_valid()) { ++ ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset())); ++ verify_oop(oop_result); ++ } ++} ++ ++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) { ++ ++ move(V0, SP); ++ //we also reserve space for java_thread here ++ move(AT, -(StackAlignmentInBytes)); ++ andr(SP, SP, AT); ++ call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions); ++ ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) { ++ call_VM_leaf_base(entry_point, number_of_arguments); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) { ++ if (arg_0 != A0) move(A0, arg_0); ++ call_VM_leaf(entry_point, 1); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ call_VM_leaf(entry_point, 2); ++} ++ ++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) { ++ if (arg_0 != A0) move(A0, arg_0); ++ if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument"); ++ if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument"); ++ call_VM_leaf(entry_point, 3); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point) { ++ MacroAssembler::call_VM_leaf_base(entry_point, 0); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1) { ++ if (arg_1 != A0) move(A0, arg_1); ++ MacroAssembler::call_VM_leaf_base(entry_point, 1); ++} ++ ++ ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 2); ++} ++void MacroAssembler::super_call_VM_leaf(address entry_point, ++ Register arg_1, ++ Register arg_2, ++ Register arg_3) { ++ if (arg_1 != A0) move(A0, arg_1); ++ if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument"); ++ if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument"); ++ MacroAssembler::call_VM_leaf_base(entry_point, 3); ++} ++ ++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { ++} ++ ++void MacroAssembler::check_and_handle_popframe(Register java_thread) { ++} ++ ++void MacroAssembler::null_check(Register reg, int offset) { ++ if (needs_explicit_null_check(offset)) { ++ // provoke OS NULL exception if reg = NULL by ++ // accessing M[reg] w/o changing any (non-CC) registers ++ // NOTE: cmpl is plenty here to provoke a segv ++ lw(AT, reg, 0); ++ } else { ++ // nothing to do, (later) access of M[reg + offset] ++ // will provoke OS NULL exception if reg = NULL ++ } ++} ++ ++void MacroAssembler::enter() { ++ push2(RA, FP); ++ move(FP, SP); ++} ++ ++void MacroAssembler::leave() { ++ move(SP, FP); ++ pop2(RA, FP); ++} ++ ++void MacroAssembler::unimplemented(const char* what) { ++ const char* buf = NULL; ++ { ++ ResourceMark rm; ++ stringStream ss; ++ ss.print("unimplemented: %s", what); ++ buf = code_string(ss.as_string()); ++ } ++ stop(buf); ++} ++ ++void MacroAssembler::get_thread(Register thread) { ++#ifdef MINIMIZE_RAM_USAGE ++// ++// In MIPS64, we don't use full 64-bit address space. ++// Only a small range is actually used. ++// ++// Example: ++// $ cat /proc/13352/maps ++// 120000000-120010000 r-xp 00000000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 12001c000-120020000 rw-p 0000c000 08:01 41077 /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java ++// 120020000-1208dc000 rwxp 00000000 00:00 0 [heap] ++// 555d574000-555d598000 r-xp 00000000 08:01 2073768 /lib/ld-2.12.so ++// 555d598000-555d59c000 rw-p 00000000 00:00 0 ++// ...... ++// 558b1f8000-558b23c000 rwxp 00000000 00:00 0 ++// 558b23c000-558b248000 ---p 00000000 00:00 0 ++// 558b248000-558b28c000 rwxp 00000000 00:00 0 ++// ffff914000-ffff94c000 rwxp 00000000 00:00 0 [stack] ++// ffffffc000-10000000000 r-xp 00000000 00:00 0 [vdso] ++// ++// All stacks are positioned at 0x55________. ++// Therefore, we can utilize the same algorithm used in 32-bit. ++ // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1); ++ // Thread* thread = _sp_map[index]; ++ Register tmp; ++ ++ if (thread == AT) ++ tmp = T9; ++ else ++ tmp = AT; ++ ++ move(thread, SP); ++ shr(thread, PAGE_SHIFT); ++ ++ push(tmp); ++ li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1)); ++ andr(thread, thread, tmp); ++ shl(thread, Address::times_ptr); // sizeof(Thread *) ++ li48(tmp, (long)ThreadLocalStorage::sp_map_addr()); ++ addu(tmp, tmp, thread); ++ ld_ptr(thread, tmp, 0); ++ pop(tmp); ++#else ++ if (thread != V0) { ++ push(V0); ++ } ++ pushad_except_v0(); ++ ++ push(S5); ++ move(S5, SP); ++ move(AT, -StackAlignmentInBytes); ++ andr(SP, SP, AT); ++ call(CAST_FROM_FN_PTR(address, Thread::current)); ++ //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0); ++ delayed()->nop(); ++ move(SP, S5); ++ pop(S5); ++ ++ popad_except_v0(); ++ if (thread != V0) { ++ move(thread, V0); ++ pop(V0); ++ } ++#endif // MINIMIZE_RAM_USAGE ++} ++ ++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T1; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // we must set sp to zero to clear frame ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is possible ++ // that we need it only for debugging ++ if(clear_fp) { ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++} ++ ++void MacroAssembler::reset_last_Java_frame(bool clear_fp) { ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // we must set sp to zero to clear frame ++ sd(R0, Address(thread, JavaThread::last_Java_sp_offset())); ++ // must clear fp, so that compiled frames are not confused; it is ++ // possible that we need it only for debugging ++ if (clear_fp) { ++ sd(R0, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // Always clear the pc because it could have been set by make_walkable() ++ sd(R0, Address(thread, JavaThread::last_Java_pc_offset())); ++} ++ ++// Write serialization page so VM thread can do a pseudo remote membar. ++// We use the current thread pointer to calculate a thread specific ++// offset to write to within the page. This minimizes bus traffic ++// due to cache line collision. ++void MacroAssembler::serialize_memory(Register thread, Register tmp) { ++ int mask = os::vm_page_size() - sizeof(int); ++ assert_different_registers(AT, tmp); ++ assert(is_uimm(mask, 16), "Not a unsigned 16-bit"); ++ srl(AT, thread, os::get_serialize_page_shift_count()); ++ andi(AT, AT, mask); ++ li(tmp, os::get_memory_serialize_page()); ++ addu(tmp, tmp, AT); ++ sw(R0, tmp, 0); ++} ++ ++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } else { ++ li(AT, SafepointSynchronize::address_of_state()); ++ lw(AT, AT, 0); ++ addiu(AT, AT, -SafepointSynchronize::_not_synchronized); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } ++} ++ ++// Just like safepoint_poll, but use an acquiring load for thread- ++// local polling. ++// ++// We need an acquire here to ensure that any subsequent load of the ++// global SafepointSynchronize::_state flag is ordered after this load ++// of the local Thread::_polling page. We don't want this poll to ++// return false (i.e. not safepointing) and a later poll of the global ++// SafepointSynchronize::_state spuriously to return true. ++// ++// This is to avoid a race when we're in a native->Java transition ++// racing the code which wakes up from a safepoint. ++// ++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ ld(AT, thread_reg, in_bytes(Thread::polling_page_offset())); ++ sync(); ++ andi(AT, AT, SafepointMechanism::poll_bit()); ++ bne(AT, R0, slow_path); ++ delayed()->nop(); ++ } else { ++ safepoint_poll(slow_path, thread_reg); ++ } ++} ++ ++// Calls to C land ++// ++// When entering C land, the fp, & sp of the last Java frame have to be recorded ++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp ++// has to be reset to 0. This is required to allow proper stack traversal. ++void MacroAssembler::set_last_Java_frame(Register java_thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine java_thread register ++ if (!java_thread->is_valid()) { ++#ifndef OPT_THREAD ++ java_thread = T2; ++ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ } ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++} ++ ++void MacroAssembler::set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc) { ++ // determine last_java_sp register ++ if (!last_java_sp->is_valid()) { ++ last_java_sp = SP; ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ get_thread(thread); ++#endif ++ // last_java_fp is optional ++ if (last_java_fp->is_valid()) { ++ sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset())); ++ } ++ ++ // last_java_pc is optional ++ if (last_java_pc != NULL) { ++ relocate(relocInfo::internal_word_type); ++ patchable_set48(AT, (long)last_java_pc); ++ st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ ++ sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset())); ++} ++ ++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes. ++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case); ++} ++ ++// Defines obj, preserves var_size_in_bytes ++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1, Register t2, Label& slow_case) { ++ Unimplemented(); ++ //assert_different_registers(obj, var_size_in_bytes, t1, AT); ++ //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case); ++} ++ ++void MacroAssembler::incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, ++ int con_size_in_bytes, ++ Register t1) { ++ if (!thread->is_valid()) { ++#ifndef OPT_THREAD ++ assert(t1->is_valid(), "need temp reg"); ++ thread = t1; ++ get_thread(thread); ++#else ++ thread = TREG; ++#endif ++ } ++ ++ ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++ if (var_size_in_bytes->is_valid()) { ++ addu(AT, AT, var_size_in_bytes); ++ } else { ++ addiu(AT, AT, con_size_in_bytes); ++ } ++ st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset())); ++} ++ ++void MacroAssembler::li(Register rd, long imm) { ++ if (imm <= max_jint && imm >= min_jint) { ++ li32(rd, (int)imm); ++ } else if (julong(imm) <= 0xFFFFFFFF) { ++ assert_not_delayed(); ++ // lui sign-extends, so we can't use that. ++ ori(rd, R0, julong(imm) >> 16); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++ } else if ((imm > 0) && is_simm16(imm >> 32)) { ++ // A 48-bit address ++ li48(rd, imm); ++ } else { ++ li64(rd, imm); ++ } ++} ++ ++void MacroAssembler::li32(Register reg, int imm) { ++ if (is_simm16(imm)) { ++ addiu(reg, R0, imm); ++ } else { ++ lui(reg, split_low(imm >> 16)); ++ if (split_low(imm)) ++ ori(reg, reg, split_low(imm)); ++ } ++} ++ ++void MacroAssembler::set64(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ } else { ++ lui(d, split_low(value >> 16)); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ } else { // li64 ++ // 6 insts ++ li64(d, value); ++ } ++} ++ ++ ++int MacroAssembler::insts_for_set64(jlong value) { ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ //daddiu(d, R0, value); ++ count++; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ count++; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ //dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ //ori(d, d, split_low(value)); ++ count++; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ //li48(d, value); ++ count += 4; ++ } else { // li64 ++ // 6 insts ++ //li64(d, value); ++ count += 6; ++ } ++ ++ return count; ++} ++ ++void MacroAssembler::patchable_set48(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && is_simm16(value >> 32)) { // li48 ++ // 4 insts ++ li48(d, value); ++ count += 4; ++ } else { // li64 ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_set32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ ori(d, R0, julong(value) >> 16); ++ dsll(d, d, 16); ++ count += 2; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 3) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::patchable_call32(Register d, jlong value) { ++ assert_not_delayed(); ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (is_simm16(value)) { ++ daddiu(d, R0, value); ++ count += 1; ++ } else { ++ lui(d, split_low(value >> 16)); ++ count += 1; ++ if (split_low(value)) { ++ ori(d, d, split_low(value)); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ nop(); ++ count++; ++ } ++} ++ ++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) { ++ assert(UseCompressedClassPointers, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int klass_index = oop_recorder()->find_index(k); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ long narrowKlass = (long)Klass::encode_klass(k); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, narrowKlass); ++} ++ ++ ++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) { ++ assert(UseCompressedOops, "should only be used for compressed header"); ++ assert(oop_recorder() != NULL, "this assembler needs an OopRecorder"); ++ ++ int oop_index = oop_recorder()->find_index(obj); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ relocate(rspec, Assembler::narrow_oop_operand); ++ patchable_set48(dst, oop_index); ++} ++ ++// ((OopHandle)result).resolve(); ++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) { ++ // OopHandle::resolve is an indirection. ++ access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG); ++} ++ ++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) { ++ // get mirror ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ ld_ptr(mirror, method, in_bytes(Method::const_offset())); ++ ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset())); ++ ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes()); ++ ld_ptr(mirror, mirror, mirror_offset); ++ resolve_oop_handle(mirror, tmp); ++} ++ ++void MacroAssembler::li64(Register rd, long imm) { ++ assert_not_delayed(); ++ lui(rd, split_low(imm >> 48)); ++ ori(rd, rd, split_low(imm >> 32)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::li48(Register rd, long imm) { ++ assert_not_delayed(); ++ assert(is_simm16(imm >> 32), "Not a 48-bit address"); ++ lui(rd, imm >> 32); ++ ori(rd, rd, split_low(imm >> 16)); ++ dsll(rd, rd, 16); ++ ori(rd, rd, split_low(imm)); ++} ++ ++void MacroAssembler::verify_oop(Register reg, const char* s) { ++ if (!VerifyOops) return; ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop: %s: %s", reg->name(), s); ++ b = code_string(ss.as_string()); ++ pushad(); ++ move(A1, reg); ++ li(A0, (long)b); ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ popad(); ++} ++ ++ ++void MacroAssembler::verify_oop_addr(Address addr, const char* s) { ++ if (!VerifyOops) { ++ nop(); ++ return; ++ } ++ // Pass register number to verify_oop_subroutine ++ const char * b = NULL; ++ stringStream ss; ++ ss.print("verify_oop_addr: %s", s); ++ b = code_string(ss.as_string()); ++ ++ addiu(SP, SP, - 7 * wordSize); ++ st_ptr(T0, SP, 6 * wordSize); ++ st_ptr(T1, SP, 5 * wordSize); ++ st_ptr(RA, SP, 4 * wordSize); ++ st_ptr(A0, SP, 3 * wordSize); ++ st_ptr(A1, SP, 2 * wordSize); ++ st_ptr(AT, SP, 1 * wordSize); ++ st_ptr(T9, SP, 0); ++ ++ // addr may contain sp so we will have to adjust it based on the ++ // pushes that we just did. ++ if (addr.uses(SP)) { ++ lea(A1, addr); ++ ld_ptr(A1, Address(A1, 7 * wordSize)); ++ } else { ++ ld_ptr(A1, addr); ++ } ++ li(A0, (long)b); ++ // call indirectly to solve generation ordering problem ++ li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address()); ++ ld_ptr(T9, AT, 0); ++ jalr(T9); ++ delayed()->nop(); ++ ld_ptr(T0, SP, 6* wordSize); ++ ld_ptr(T1, SP, 5* wordSize); ++ ld_ptr(RA, SP, 4* wordSize); ++ ld_ptr(A0, SP, 3* wordSize); ++ ld_ptr(A1, SP, 2* wordSize); ++ ld_ptr(AT, SP, 1* wordSize); ++ ld_ptr(T9, SP, 0* wordSize); ++ addiu(SP, SP, 7 * wordSize); ++} ++ ++// used registers : T0, T1 ++void MacroAssembler::verify_oop_subroutine() { ++ // RA: ra ++ // A0: char* error message ++ // A1: oop object to verify ++ ++ Label exit, error; ++ // increment counter ++ li(T0, (long)StubRoutines::verify_oop_count_addr()); ++ lw(AT, T0, 0); ++ daddiu(AT, AT, 1); ++ sw(AT, T0, 0); ++ ++ // make sure object is 'reasonable' ++ beq(A1, R0, exit); // if obj is NULL it is ok ++ delayed()->nop(); ++ ++ // Check if the oop is in the right area of memory ++ // const int oop_mask = Universe::verify_oop_mask(); ++ // const int oop_bits = Universe::verify_oop_bits(); ++ const uintptr_t oop_mask = Universe::verify_oop_mask(); ++ const uintptr_t oop_bits = Universe::verify_oop_bits(); ++ li(AT, oop_mask); ++ andr(T0, A1, AT); ++ li(AT, oop_bits); ++ bne(T0, AT, error); ++ delayed()->nop(); ++ ++ // make sure klass is 'reasonable' ++ // add for compressedoops ++ reinit_heapbase(); ++ // add for compressedoops ++ load_klass(T0, A1); ++ beq(T0, R0, error); // if klass is NULL it is broken ++ delayed()->nop(); ++ // return if everything seems ok ++ bind(exit); ++ ++ jr(RA); ++ delayed()->nop(); ++ ++ // handle errors ++ bind(error); ++ pushad(); ++ call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type); ++ delayed()->nop(); ++ popad(); ++ jr(RA); ++ delayed()->nop(); ++} ++ ++void MacroAssembler::verify_tlab(Register t1, Register t2) { ++#ifdef ASSERT ++ assert_different_registers(t1, t2, AT); ++ if (UseTLAB && VerifyOops) { ++ Label next, ok; ++ ++ get_thread(t1); ++ ++ ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset())); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset())); ++ sltu(AT, t2, AT); ++ beq(AT, R0, next); ++ delayed()->nop(); ++ ++ stop("assert(top >= start)"); ++ ++ bind(next); ++ ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset())); ++ sltu(AT, AT, t2); ++ beq(AT, R0, ok); ++ delayed()->nop(); ++ ++ stop("assert(top <= end)"); ++ ++ bind(ok); ++ ++ } ++#endif ++} ++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset) { ++ intptr_t value = *delayed_value_addr; ++ if (value != 0) ++ return RegisterOrConstant(value + offset); ++ Unimplemented(); ++ //AddressLiteral a(delayed_value_addr); ++ // load indirectly to solve generation ordering problem ++ //movptr(tmp, ExternalAddress((address) delayed_value_addr)); ++ //ld(tmp, a); ++ if (offset != 0) ++ daddiu(tmp,tmp, offset); ++ ++ return RegisterOrConstant(tmp); ++} ++ ++void MacroAssembler::hswap(Register reg) { ++ //short ++ //andi(reg, reg, 0xffff); ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ sra(reg, reg, 16); ++ orr(reg, reg, AT); ++} ++ ++void MacroAssembler::huswap(Register reg) { ++ dsrl(AT, reg, 8); ++ dsll(reg, reg, 24); ++ dsrl(reg, reg, 16); ++ orr(reg, reg, AT); ++ andi(reg, reg, 0xffff); ++} ++ ++// something funny to do this will only one more register AT ++// 32 bits ++void MacroAssembler::swap(Register reg) { ++ srl(AT, reg, 8); ++ sll(reg, reg, 24); ++ orr(reg, reg, AT); ++ //reg : 4 1 2 3 ++ srl(AT, AT, 16); ++ xorr(AT, AT, reg); ++ andi(AT, AT, 0xff); ++ //AT : 0 0 0 1^3); ++ xorr(reg, reg, AT); ++ //reg : 4 1 2 1 ++ sll(AT, AT, 16); ++ xorr(reg, reg, AT); ++ //reg : 4 3 2 1 ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register resflag, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ bind(again); ++ lld(resflag, addr); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ move(resflag, newval); ++ scd(resflag, addr); ++ beq(resflag, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval, ++ Register tmp, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ lld(tmp, addr); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ scd(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, ++ Register resflag, bool sign, bool retold, bool barrier) { ++ assert(oldval != resflag, "oldval != resflag"); ++ assert(newval != resflag, "newval != resflag"); ++ Label again, succ, fail; ++ bind(again); ++ ll(resflag, addr); ++ if (!sign) ++ dinsu(resflag, R0, 32, 32); ++ bne(resflag, oldval, fail); ++ delayed()->nop(); ++ ++ move(resflag, newval); ++ sc(resflag, addr); ++ beq(resflag, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(fail); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, resflag); ++ move(resflag, R0); ++ bind(succ); ++} ++ ++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail) { ++ assert(oldval != tmp, "oldval != tmp"); ++ assert(newval != tmp, "newval != tmp"); ++ Label again, neq; ++ ++ bind(again); ++ ll(tmp, addr); ++ if (!sign) ++ dinsu(tmp, R0, 32, 32); ++ bne(tmp, oldval, neq); ++ delayed()->nop(); ++ move(tmp, newval); ++ sc(tmp, addr); ++ beq(tmp, R0, again); ++ delayed()->nop(); ++ b(succ); ++ delayed()->nop(); ++ ++ bind(neq); ++ if (barrier) ++ sync(); ++ if (retold && oldval != R0) ++ move(oldval, tmp); ++ if (fail) { ++ b(*fail); ++ delayed()->nop(); ++ } ++} ++ ++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) { ++ Label done, again, nequal; ++ ++ Register x_reg = x_regLo; ++ dsll32(x_regHi, x_regHi, 0); ++ dsll32(x_regLo, x_regLo, 0); ++ dsrl32(x_regLo, x_regLo, 0); ++ orr(x_reg, x_regLo, x_regHi); ++ ++ Register c_reg = c_regLo; ++ dsll32(c_regHi, c_regHi, 0); ++ dsll32(c_regLo, c_regLo, 0); ++ dsrl32(c_regLo, c_regLo, 0); ++ orr(c_reg, c_regLo, c_regHi); ++ ++ bind(again); ++ ++ if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync(); ++ lld(AT, dest); ++ bne(AT, c_reg, nequal); ++ delayed()->nop(); ++ ++ //move(AT, x_reg); ++ daddu(AT, x_reg, R0); ++ scd(AT, dest); ++ beq(AT, R0, again); ++ delayed()->nop(); ++ b(done); ++ delayed()->nop(); ++ ++ // not xchged ++ bind(nequal); ++ sync(); ++ //move(c_reg, AT); ++ //move(AT, R0); ++ daddu(c_reg, AT, R0); ++ daddu(AT, R0, R0); ++ bind(done); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_s(tmp, fs, ft); ++ trunc_l_s(tmp, tmp); ++ cvt_s_l(tmp, tmp); ++ mul_s(tmp, tmp, ft); ++ sub_s(fd, fs, tmp); ++} ++ ++// be sure the three register is different ++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) { ++ assert_different_registers(tmp, fs, ft); ++ div_d(tmp, fs, ft); ++ trunc_l_d(tmp, tmp); ++ cvt_d_l(tmp, tmp); ++ mul_d(tmp, tmp, ft); ++ sub_d(fd, fs, tmp); ++} ++ ++#ifdef COMPILER2 ++// Fast_Lock and Fast_Unlock used by C2 ++ ++// Because the transitions from emitted code to the runtime ++// monitorenter/exit helper stubs are so slow it's critical that ++// we inline both the stack-locking fast-path and the inflated fast path. ++// ++// See also: cmpFastLock and cmpFastUnlock. ++// ++// What follows is a specialized inline transliteration of the code ++// in slow_enter() and slow_exit(). If we're concerned about I$ bloat ++// another option would be to emit TrySlowEnter and TrySlowExit methods ++// at startup-time. These methods would accept arguments as ++// (Obj, Self, box, Scratch) and return success-failure ++// indications in the icc.ZFlag. Fast_Lock and Fast_Unlock would simply ++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit. ++// In practice, however, the # of lock sites is bounded and is usually small. ++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer ++// if the processor uses simple bimodal branch predictors keyed by EIP ++// Since the helper routines would be called from multiple synchronization ++// sites. ++// ++// An even better approach would be write "MonitorEnter()" and "MonitorExit()" ++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites ++// to those specialized methods. That'd give us a mostly platform-independent ++// implementation that the JITs could optimize and inline at their pleasure. ++// Done correctly, the only time we'd need to cross to native could would be ++// to park() or unpark() threads. We'd also need a few more unsafe operators ++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and ++// (b) explicit barriers or fence operations. ++// ++// TODO: ++// ++// * Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr). ++// This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals. ++// Given TLAB allocation, Self is usually manifested in a register, so passing it into ++// the lock operators would typically be faster than reifying Self. ++// ++// * Ideally I'd define the primitives as: ++// fast_lock (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED. ++// fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED ++// Unfortunately ADLC bugs prevent us from expressing the ideal form. ++// Instead, we're stuck with a rather awkward and brittle register assignments below. ++// Furthermore the register assignments are overconstrained, possibly resulting in ++// sub-optimal code near the synchronization site. ++// ++// * Eliminate the sp-proximity tests and just use "== Self" tests instead. ++// Alternately, use a better sp-proximity test. ++// ++// * Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value. ++// Either one is sufficient to uniquely identify a thread. ++// TODO: eliminate use of sp in _owner and use get_thread(tr) instead. ++// ++// * Intrinsify notify() and notifyAll() for the common cases where the ++// object is locked by the calling thread but the waitlist is empty. ++// avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll(). ++// ++// * use jccb and jmpb instead of jcc and jmp to improve code density. ++// But beware of excessive branch density on AMD Opterons. ++// ++// * Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success ++// or failure of the fast-path. If the fast-path fails then we pass ++// control to the slow-path, typically in C. In Fast_Lock and ++// Fast_Unlock we often branch to DONE_LABEL, just to find that C2 ++// will emit a conditional branch immediately after the node. ++// So we have branches to branches and lots of ICC.ZF games. ++// Instead, it might be better to have C2 pass a "FailureLabel" ++// into Fast_Lock and Fast_Unlock. In the case of success, control ++// will drop through the node. ICC.ZF is undefined at exit. ++// In the case of failure, the node will branch directly to the ++// FailureLabel ++ ++ ++// obj: object to lock ++// box: on-stack box address (displaced header location) ++// tmp: tmp -- KILLED ++// scr: tmp -- KILLED ++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label IsInflated, DONE, DONE_SET; ++ ++ // Ensure the register assignents are disjoint ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastLock"); ++ ++ if (PrintBiasedLockingStatistics) { ++ atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg); ++ } ++ ++ if (EmitSync & 1) { ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 2) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument. ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)) ; // fetch markword ++ ori(tmpReg, tmpReg, 0x1); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg ++ delayed()->nop(); ++ ++ // Recursive locking ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, (7 - os::vm_page_size() )); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ bind(DONE_LABEL) ; ++ } else { ++ // Possible cases that we'll encounter in fast_lock ++ // ------------------------------------------------ ++ // * Inflated ++ // -- unlocked ++ // -- Locked ++ // = by self ++ // = by other ++ // * biased ++ // -- by Self ++ // -- by other ++ // * neutral ++ // * stack-locked ++ // -- by self ++ // = sp-proximity test hits ++ // = sp-proximity test generates false-negative ++ // -- by other ++ // ++ ++ // TODO: optimize away redundant LDs of obj->mark and improve the markword triage ++ // order to reduce the number of conditional branches in the most common cases. ++ // Beware -- there's a subtle invariant that fetch of the markword ++ // at [FETCH], below, will never observe a biased encoding (*101b). ++ // If this invariant is not held we risk exclusion (safety) failure. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object. ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias ++ delayed()->nop(); ++ ++ // Attempt stack-locking ... ++ ori(tmpReg, tmpReg, markOopDesc::unlocked_value); ++ sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS ++ ++ if (PrintBiasedLockingStatistics) { ++ Label SUCC, FAIL; ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg ++ bind(SUCC); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(FAIL); ++ } else { ++ // If cmpxchg is succ, then scrReg = 1 ++ cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg ++ } ++ ++ // Recursive locking ++ // The object is stack-locked: markword contains stack pointer to BasicLock. ++ // Locked by current thread if difference with current SP is less than one page. ++ dsubu(tmpReg, tmpReg, SP); ++ li(AT, 7 - os::vm_page_size()); ++ andr(tmpReg, tmpReg, AT); ++ sd(tmpReg, Address(boxReg, 0)); ++ ++ if (PrintBiasedLockingStatistics) { ++ Label L; ++ // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++ ++ bne(tmpReg, R0, L); ++ delayed()->nop(); ++ atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg); ++ bind(L); ++ } ++ b(DONE); ++ delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0 ++ ++ bind(IsInflated); ++ // The object's monitor m is unlocked iff m->owner == NULL, ++ // otherwise m->owner may contain a thread or a stack address. ++ ++ // TODO: someday avoid the ST-before-CAS penalty by ++ // relocating (deferring) the following ST. ++ // We should also think about trying a CAS without having ++ // fetched _owner. If the CAS is successful we may ++ // avoid an RTO->RTS upgrade on the $line. ++ // Without cast to int32_t a movptr will destroy r10 which is typically obj ++ li(AT, (int32_t)intptr_t(markOopDesc::unused_mark())); ++ sd(AT, Address(boxReg, 0)); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ // if (m->owner != 0) => AT = 0, goto slow path. ++ bne(AT, R0, DONE_SET); ++ delayed()->ori(scrReg, R0, 0); ++ ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ // It's inflated and appears unlocked ++ cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ; ++ // Intentional fall-through into DONE ... ++ ++ bind(DONE_SET); ++ move(resReg, scrReg); ++ ++ // DONE is a hot target - we'd really like to place it at the ++ // start of cache line by padding with NOPs. ++ // See the AMD and Intel software optimization manuals for the ++ // most efficient "long" NOP encodings. ++ // Unfortunately none of our alignment mechanisms suffice. ++ bind(DONE); ++ // At DONE the resReg is set as follows ... ++ // Fast_Unlock uses the same protocol. ++ // resReg == 1 -> Success ++ // resREg == 0 -> Failure - force control through the slow-path ++ ++ // Avoid branch-to-branch on AMD processors ++ // This appears to be superstition. ++ if (EmitSync & 32) nop() ; ++ ++ } ++} ++ ++// obj: object to unlock ++// box: box address (displaced header location), killed. ++// tmp: killed tmp; cannot be obj nor box. ++// ++// Some commentary on balanced locking: ++// ++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites. ++// Methods that don't have provably balanced locking are forced to run in the ++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock. ++// The interpreter provides two properties: ++// I1: At return-time the interpreter automatically and quietly unlocks any ++// objects acquired the current activation (frame). Recall that the ++// interpreter maintains an on-stack list of locks currently held by ++// a frame. ++// I2: If a method attempts to unlock an object that is not held by the ++// the frame the interpreter throws IMSX. ++// ++// Lets say A(), which has provably balanced locking, acquires O and then calls B(). ++// B() doesn't have provably balanced locking so it runs in the interpreter. ++// Control returns to A() and A() unlocks O. By I1 and I2, above, we know that O ++// is still locked by A(). ++// ++// The only other source of unbalanced locking would be JNI. The "Java Native Interface: ++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter ++// should not be unlocked by "normal" java-level locking and vice-versa. The specification ++// doesn't specify what will occur if a program engages in such mixed-mode locking, however. ++ ++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg, ++ Register tmpReg, Register scrReg) { ++ Label DONE, DONE_SET, Stacked, Inflated; ++ ++ guarantee(objReg != boxReg, ""); ++ guarantee(objReg != tmpReg, ""); ++ guarantee(objReg != scrReg, ""); ++ guarantee(boxReg != tmpReg, ""); ++ guarantee(boxReg != scrReg, ""); ++ ++ block_comment("FastUnlock"); ++ ++ if (EmitSync & 4) { ++ // Disable - inhibit all inlining. Force control through the slow-path ++ move(AT, 0x0); ++ return; ++ } else ++ if (EmitSync & 8) { ++ Label DONE_LABEL ; ++ if (UseBiasedLocking) { ++ biased_locking_exit(objReg, tmpReg, DONE_LABEL); ++ } ++ // classic stack-locking code ... ++ ld(tmpReg, Address(boxReg, 0)) ; ++ beq(tmpReg, R0, DONE_LABEL) ; ++ move(AT, 0x1); // delay slot ++ ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ bind(DONE_LABEL); ++ } else { ++ Label CheckSucc; ++ ++ // Critically, the biased locking test must have precedence over ++ // and appear before the (box->dhw == 0) recursive stack-lock test. ++ if (UseBiasedLocking && !UseOptoBiasInlining) { ++ Label succ, fail; ++ biased_locking_exit(objReg, tmpReg, succ); ++ b(fail); ++ delayed()->nop(); ++ bind(succ); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ bind(fail); ++ } ++ ++ ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header ++ beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock ++ delayed()->sltiu(AT, tmpReg, 1); ++ ++ ld(tmpReg, Address(objReg, 0)); // Examine the object's markword ++ andi(AT, tmpReg, markOopDesc::monitor_value); ++ beq(AT, R0, Stacked); // Inflated? ++ delayed()->nop(); ++ ++ bind(Inflated); ++ // It's inflated. ++ // Despite our balanced locking property we still check that m->_owner == Self ++ // as java routines or native JNI code called by this thread might ++ // have released the lock. ++ // Refer to the comments in synchronizer.cpp for how we might encode extra ++ // state in _succ so we can avoid fetching EntryList|cxq. ++ // ++ // I'd like to add more cases in fast_lock() and fast_unlock() -- ++ // such as recursive enter and exit -- but we have to be wary of ++ // I$ bloat, T$ effects and BP$ effects. ++ // ++ // If there's no contention try a 1-0 exit. That is, exit without ++ // a costly MEMBAR or CAS. See synchronizer.cpp for details on how ++ // we detect and recover from the race that the 1-0 exit admits. ++ // ++ // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier ++ // before it STs null into _owner, releasing the lock. Updates ++ // to data protected by the critical section must be visible before ++ // we drop the lock (and thus before any other thread could acquire ++ // the lock and observe the fields protected by the lock). ++#ifndef OPT_THREAD ++ get_thread(TREG); ++#endif ++ ++ // It's inflated ++ ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ; ++ xorr(scrReg, scrReg, TREG); ++ ++ ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ; ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2)); ++ ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2)); ++ orr(scrReg, scrReg, AT); ++ ++ bne(scrReg, R0, DONE_SET); ++ delayed()->ori(AT, R0, 0); ++ ++ sync(); ++ sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)); ++ b(DONE); ++ delayed()->ori(resReg, R0, 1); ++ ++ bind(Stacked); ++ ld(tmpReg, Address(boxReg, 0)); ++ cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false); ++ ++ bind(DONE_SET); ++ move(resReg, AT); ++ ++ if (EmitSync & 65536) { ++ bind (CheckSucc); ++ } ++ ++ bind(DONE); ++ ++ // Avoid branch to branch on AMD processors ++ if (EmitSync & 32768) { nop() ; } ++ } ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::align(int modulus) { ++ while (offset() % modulus != 0) nop(); ++} ++ ++ ++void MacroAssembler::verify_FPU(int stack_depth, const char* s) { ++ //Unimplemented(); ++} ++ ++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP}; ++ ++//In MIPS64, F0~23 are all caller-saved registers ++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13}; ++ ++// We preserve all caller-saved register ++void MacroAssembler::pushad(){ ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) ++ { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++}; ++ ++void MacroAssembler::popad(){ ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]); ++ for (i = 0; i < len; i++) ++ { ++ ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++}; ++ ++// We preserve all caller-saved register except V0 ++void MacroAssembler::pushad_except_v0() { ++ int i; ++ ++ // Fixed-point registers ++ int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ ++ // Floating-point registers ++ len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ daddiu(SP, SP, -1 * len * wordSize); ++ for (i = 0; i < len; i++) { ++ sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++} ++ ++void MacroAssembler::popad_except_v0() { ++ int i; ++ ++ // Floating-point registers ++ int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]); ++ for (i = 0; i < len; i++) { ++ ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++ ++ // Fixed-point registers ++ len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]); ++ for (i = 0; i < len; i++) { ++ ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize); ++ } ++ daddiu(SP, SP, len * wordSize); ++} ++ ++void MacroAssembler::push2(Register reg1, Register reg2) { ++ daddiu(SP, SP, -16); ++ sd(reg1, SP, 8); ++ sd(reg2, SP, 0); ++} ++ ++void MacroAssembler::pop2(Register reg1, Register reg2) { ++ ld(reg1, SP, 8); ++ ld(reg2, SP, 0); ++ daddiu(SP, SP, 16); ++} ++ ++// for UseCompressedOops Option ++void MacroAssembler::load_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ lwu(dst, Address(src, oopDesc::klass_offset_in_bytes())); ++ decode_klass_not_null(dst); ++ } else ++ ld(dst, src, oopDesc::klass_offset_in_bytes()); ++} ++ ++void MacroAssembler::store_klass(Register dst, Register src) { ++ if(UseCompressedClassPointers){ ++ encode_klass_not_null(src); ++ sw(src, dst, oopDesc::klass_offset_in_bytes()); ++ } else { ++ sd(src, dst, oopDesc::klass_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::load_prototype_header(Register dst, Register src) { ++ load_klass(dst, src); ++ ld(dst, Address(dst, Klass::prototype_header_offset())); ++} ++ ++void MacroAssembler::store_klass_gap(Register dst, Register src) { ++ if (UseCompressedClassPointers) { ++ sw(src, dst, oopDesc::klass_gap_offset_in_bytes()); ++ } ++} ++ ++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } else { ++ bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp); ++ } ++} ++ ++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2) { ++ BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ decorators = AccessInternal::decorator_fixup(decorators); ++ bool as_raw = (decorators & AS_RAW) != 0; ++ if (as_raw) { ++ bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } else { ++ bs->store_at(this, decorators, type, dst, src, tmp1, tmp2); ++ } ++} ++ ++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++// Doesn't do verfication, generates fixed size code ++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1, ++ Register thread_tmp, DecoratorSet decorators) { ++ access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp); ++} ++ ++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1, ++ Register tmp2, DecoratorSet decorators) { ++ access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2); ++} ++ ++// Used for storing NULLs. ++void MacroAssembler::store_heap_oop_null(Address dst) { ++ access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg); ++} ++ ++#ifdef ASSERT ++void MacroAssembler::verify_heapbase(const char* msg) { ++ assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++} ++#endif ++ ++ ++// Algorithm must match oop.inline.hpp encode_heap_oop. ++void MacroAssembler::encode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ return; ++ } ++ ++ movz(r, S5_heapbase, r); ++ dsubu(r, r, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?"); ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop"); ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ movz(dst, S5_heapbase, dst); ++ dsubu(dst, dst, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ movz(dst, R0, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register r) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(r, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null"); ++ bind(ok); ++ } ++#endif ++ verify_oop(r, "broken oop in encode_heap_oop_not_null"); ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(r, r, S5_heapbase); ++ } ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(r, LogMinObjAlignmentInBytes); ++ } ++ ++} ++ ++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should be compressed"); ++#ifdef ASSERT ++ if (CheckCompressedOops) { ++ Label ok; ++ bne(src, R0, ok); ++ delayed()->nop(); ++ stop("null oop passed to encode_heap_oop_not_null2"); ++ bind(ok); ++ } ++#endif ++ verify_oop(src, "broken oop in encode_heap_oop_not_null2"); ++ ++ if (Universe::narrow_oop_base() != NULL) { ++ dsubu(dst, src, S5_heapbase); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shr(dst, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::decode_heap_oop(Register r) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ } else { ++ move(AT, r); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ } ++ daddu(r, r, S5_heapbase); ++ movz(r, R0, AT); ++ } ++ verify_oop(r, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop(Register dst, Register src) { ++#ifdef ASSERT ++ verify_heapbase("MacroAssembler::decode_heap_oop corrupted?"); ++#endif ++ if (Universe::narrow_oop_base() == NULL) { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (dst != src) nop(); // DON'T DELETE THIS GUY. ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ } else { ++ if (dst != src) move(dst, src); ++ } ++ } else { ++ if (dst == src) { ++ move(AT, dst); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(dst, LogMinObjAlignmentInBytes); ++ } ++ daddu(dst, dst, S5_heapbase); ++ movz(dst, R0, AT); ++ } else { ++ if (Universe::narrow_oop_shift() != 0) { ++ assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ daddu(dst, src, S5_heapbase); ++ } ++ movz(dst, R0, src); ++ } ++ } ++ verify_oop(dst, "broken oop in decode_heap_oop"); ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register r) { ++ // Note: it will change flags ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ shl(r, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(r, r, S5_heapbase); ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ } ++} ++ ++void MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) { ++ assert (UseCompressedOops, "should only be used for compressed headers"); ++ assert (Universe::heap() != NULL, "java heap should be initialized"); ++ ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ //lea(dst, Address(S5_heapbase, src, Address::times_8, 0)); ++ if (Universe::narrow_oop_shift() != 0) { ++ assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong"); ++ if (LogMinObjAlignmentInBytes == Address::times_8) { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ daddu(dst, dst, S5_heapbase); ++ } else { ++ dsll(dst, src, LogMinObjAlignmentInBytes); ++ if (Universe::narrow_oop_base() != NULL) { ++ daddu(dst, dst, S5_heapbase); ++ } ++ } ++ } else { ++ assert (Universe::narrow_oop_base() == NULL, "sanity"); ++ if (dst != src) { ++ move(dst, src); ++ } ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register r) { ++ if (Universe::narrow_klass_base() != NULL) { ++ assert(r != AT, "Encoding a klass in AT"); ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ dsubu(r, r, AT); ++ } ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(r, LogKlassAlignmentInBytes); ++ } ++} ++ ++void MacroAssembler::encode_klass_not_null(Register dst, Register src) { ++ if (dst == src) { ++ encode_klass_not_null(src); ++ } else { ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ dsubu(dst, src, dst); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shr(dst, LogKlassAlignmentInBytes); ++ } ++ } else { ++ if (Universe::narrow_klass_shift() != 0) { ++ assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ dsrl(dst, src, LogKlassAlignmentInBytes); ++ } else { ++ move(dst, src); ++ } ++ } ++ } ++} ++ ++// Function instr_size_for_decode_klass_not_null() counts the instructions ++// generated by decode_klass_not_null(register r) and reinit_heapbase(), ++// when (Universe::heap() != NULL). Hence, if the instructions they ++// generate change, then this method needs to be updated. ++int MacroAssembler::instr_size_for_decode_klass_not_null() { ++ assert (UseCompressedClassPointers, "only for compressed klass ptrs"); ++ if (Universe::narrow_klass_base() != NULL) { ++ // mov64 + addq + shlq? + mov64 (for reinit_heapbase()). ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10); ++ } else { ++ // longest load decode klass function, mov64, leaq ++ return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register r) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ assert(r != AT, "Decoding a klass in AT"); ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ shl(r, LogKlassAlignmentInBytes); ++ } ++ if (Universe::narrow_klass_base() != NULL) { ++ set64(AT, (int64_t)Universe::narrow_klass_base()); ++ daddu(r, r, AT); ++ //Not neccessary for MIPS at all. ++ //reinit_heapbase(); ++ } ++} ++ ++void MacroAssembler::decode_klass_not_null(Register dst, Register src) { ++ assert (UseCompressedClassPointers, "should only be used for compressed headers"); ++ ++ if (dst == src) { ++ decode_klass_not_null(dst); ++ } else { ++ // Cannot assert, unverified entry point counts instructions (see .ad file) ++ // vtableStubs also counts instructions in pd_code_size_limit. ++ // Also do not verify_oop as this is called by verify_oop. ++ set64(dst, (int64_t)Universe::narrow_klass_base()); ++ if (Universe::narrow_klass_shift() != 0) { ++ assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong"); ++ assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?"); ++ dsll(AT, src, Address::times_8); ++ daddu(dst, dst, AT); ++ } else { ++ daddu(dst, src, dst); ++ } ++ } ++} ++ ++void MacroAssembler::incrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ addu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { decrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ addu32(reg, reg, AT); ++} ++ ++void MacroAssembler::decrementl(Register reg, int value) { ++ if (value == min_jint) { ++ move(AT, value); ++ subu32(reg, reg, AT); ++ return; ++ } ++ if (value < 0) { incrementl(reg, -value); return; } ++ if (value == 0) { ; return; } ++ ++ move(AT, value); ++ subu32(reg, reg, AT); ++} ++ ++void MacroAssembler::reinit_heapbase() { ++ if (UseCompressedOops || UseCompressedClassPointers) { ++ if (Universe::heap() != NULL) { ++ if (Universe::narrow_oop_base() == NULL) { ++ move(S5_heapbase, R0); ++ } else { ++ set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base()); ++ } ++ } else { ++ set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr()); ++ ld(S5_heapbase, S5_heapbase, 0); ++ } ++ } ++} ++ ++void MacroAssembler::check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success) { ++//implement ind gen_subtype_check ++ Label L_failure; ++ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg, &L_success, &L_failure, NULL); ++ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL); ++ bind(L_failure); ++} ++ ++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset) { ++ assert_different_registers(sub_klass, super_klass, temp_reg); ++ bool must_load_sco = (super_check_offset.constant_or_zero() == -1); ++ if (super_check_offset.is_register()) { ++ assert_different_registers(sub_klass, super_klass, ++ super_check_offset.as_register()); ++ } else if (must_load_sco) { ++ assert(temp_reg != noreg, "supply either a temp or a register offset"); ++ } ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ int sco_offset = in_bytes(Klass::super_check_offset_offset()); ++ // If the pointers are equal, we are done (e.g., String[] elements). ++ // This self-check enables sharing of secondary supertype arrays among ++ // non-primary types such as array-of-interface. Otherwise, each such ++ // type would need its own customized SSA. ++ // We move this check to the front of the fast path because many ++ // type checks are in fact trivially successful in this manner, ++ // so we get a nicely predicted branch right at the start of the check. ++ beq(sub_klass, super_klass, *L_success); ++ delayed()->nop(); ++ // Check the supertype display: ++ if (must_load_sco) { ++ lwu(temp_reg, super_klass, sco_offset); ++ super_check_offset = RegisterOrConstant(temp_reg); ++ } ++ daddu(AT, sub_klass, super_check_offset.register_or_noreg()); ++ ld(AT, AT, super_check_offset.constant_or_zero()); ++ ++ // This check has worked decisively for primary supers. ++ // Secondary supers are sought in the super_cache ('super_cache_addr'). ++ // (Secondary supers are interfaces and very deeply nested subtypes.) ++ // This works in the same check above because of a tricky aliasing ++ // between the super_cache and the primary super display elements. ++ // (The 'super_check_addr' can address either, as the case requires.) ++ // Note that the cache is updated below if it does not help us find ++ // what we need immediately. ++ // So if it was a primary super, we can just fail immediately. ++ // Otherwise, it's the slow path for us (no success at this point). ++ ++ if (super_check_offset.is_register()) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ addiu(AT, super_check_offset.as_register(), -sc_offset); ++ if (L_failure == &L_fallthrough) { ++ beq(AT, R0, *L_slow_path); ++ delayed()->nop(); ++ } else { ++ bne_far(AT, R0, *L_failure); ++ delayed()->nop(); ++ b(*L_slow_path); ++ delayed()->nop(); ++ } ++ } else if (super_check_offset.as_constant() == sc_offset) { ++ // Need a slow path; fast failure is impossible. ++ if (L_slow_path == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne(super_klass, AT, *L_slow_path); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } else { ++ // No slow path; it's a fast decision. ++ if (L_failure == &L_fallthrough) { ++ beq(super_klass, AT, *L_success); ++ delayed()->nop(); ++ } else { ++ bne_far(super_klass, AT, *L_failure); ++ delayed()->nop(); ++ b(*L_success); ++ delayed()->nop(); ++ } ++ } ++ ++ bind(L_fallthrough); ++ ++} ++ ++ ++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes) { ++ if (temp2_reg == noreg) ++ temp2_reg = TSR; ++ assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg); ++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg) ++ ++ Label L_fallthrough; ++ int label_nulls = 0; ++ if (L_success == NULL) { L_success = &L_fallthrough; label_nulls++; } ++ if (L_failure == NULL) { L_failure = &L_fallthrough; label_nulls++; } ++ assert(label_nulls <= 1, "at most one NULL in the batch"); ++ ++ // a couple of useful fields in sub_klass: ++ int ss_offset = in_bytes(Klass::secondary_supers_offset()); ++ int sc_offset = in_bytes(Klass::secondary_super_cache_offset()); ++ Address secondary_supers_addr(sub_klass, ss_offset); ++ Address super_cache_addr( sub_klass, sc_offset); ++ ++ // Do a linear scan of the secondary super-klass chain. ++ // This code is rarely used, so simplicity is a virtue here. ++ // The repne_scan instruction uses fixed registers, which we must spill. ++ // Don't worry too much about pre-existing connections with the input regs. ++ ++#ifndef PRODUCT ++ int* pst_counter = &SharedRuntime::_partial_subtype_ctr; ++ ExternalAddress pst_counter_addr((address) pst_counter); ++#endif //PRODUCT ++ ++ // We will consult the secondary-super array. ++ ld(temp_reg, secondary_supers_addr); ++ // Load the array length. ++ lw(temp2_reg, Address(temp_reg, Array::length_offset_in_bytes())); ++ // Skip to start of data. ++ daddiu(temp_reg, temp_reg, Array::base_offset_in_bytes()); ++ ++ // OpenJDK8 never compresses klass pointers in secondary-super array. ++ Label Loop, subtype; ++ bind(Loop); ++ beq(temp2_reg, R0, *L_failure); ++ delayed()->nop(); ++ ld(AT, temp_reg, 0); ++ beq(AT, super_klass, subtype); ++ delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize); ++ b(Loop); ++ delayed()->daddiu(temp2_reg, temp2_reg, -1); ++ ++ bind(subtype); ++ sd(super_klass, super_cache_addr); ++ if (L_success != &L_fallthrough) { ++ b(*L_success); ++ delayed()->nop(); ++ } ++ ++ // Success. Cache the super we found and proceed in triumph. ++#undef IS_A_TEMP ++ ++ bind(L_fallthrough); ++} ++ ++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) { ++ ld(oop_result, Address(java_thread, JavaThread::vm_result_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_offset())); ++ verify_oop(oop_result, "broken oop in call_VM_base"); ++} ++ ++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) { ++ ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset())); ++ sd(R0, Address(java_thread, JavaThread::vm_result_2_offset())); ++} ++ ++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot, ++ int extra_slot_offset) { ++ // cf. TemplateTable::prepare_invoke(), if (load_receiver). ++ int stackElementSize = Interpreter::stackElementSize; ++ int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0); ++#ifdef ASSERT ++ int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1); ++ assert(offset1 - offset == stackElementSize, "correct arithmetic"); ++#endif ++ Register scale_reg = NOREG; ++ Address::ScaleFactor scale_factor = Address::no_scale; ++ if (arg_slot.is_constant()) { ++ offset += arg_slot.as_constant() * stackElementSize; ++ } else { ++ scale_reg = arg_slot.as_register(); ++ scale_factor = Address::times_8; ++ } ++ // We don't push RA on stack in prepare_invoke. ++ // offset += wordSize; // return PC is on stack ++ if(scale_reg==NOREG) return Address(SP, offset); ++ else { ++ dsll(scale_reg, scale_reg, scale_factor); ++ daddu(scale_reg, SP, scale_reg); ++ return Address(scale_reg, offset); ++ } ++} ++ ++SkipIfEqual::~SkipIfEqual() { ++ _masm->bind(_label); ++} ++ ++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) { ++ switch (size_in_bytes) { ++ case 8: ld(dst, src); break; ++ case 4: lw(dst, src); break; ++ case 2: is_signed ? lh(dst, src) : lhu(dst, src); break; ++ case 1: is_signed ? lb( dst, src) : lbu( dst, src); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) { ++ switch (size_in_bytes) { ++ case 8: sd(src, dst); break; ++ case 4: sw(src, dst); break; ++ case 2: sh(src, dst); break; ++ case 1: sb(src, dst); break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++// Look up the method for a megamorphic invokeinterface call. ++// The target method is determined by . ++// The receiver klass is in recv_klass. ++// On success, the result will be in method_result, and execution falls through. ++// On failure, execution transfers to the given label. ++void MacroAssembler::lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& L_no_such_interface, ++ bool return_method) { ++ assert_different_registers(recv_klass, intf_klass, scan_temp, AT); ++ assert_different_registers(method_result, intf_klass, scan_temp, AT); ++ assert(recv_klass != method_result || !return_method, ++ "recv_klass can be destroyed when method isn't needed"); ++ ++ assert(itable_index.is_constant() || itable_index.as_register() == method_result, ++ "caller must use same register for non-constant itable index as for method"); ++ ++ // Compute start of first itableOffsetEntry (which is at the end of the vtable) ++ int vtable_base = in_bytes(Klass::vtable_start_offset()); ++ int itentry_off = itableMethodEntry::method_offset_in_bytes(); ++ int scan_step = itableOffsetEntry::size() * wordSize; ++ int vte_size = vtableEntry::size() * wordSize; ++ Address::ScaleFactor times_vte_scale = Address::times_ptr; ++ assert(vte_size == wordSize, "else adjust times_vte_scale"); ++ ++ lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset())); ++ ++ // %%% Could store the aligned, prescaled offset in the klassoop. ++ dsll(scan_temp, scan_temp, times_vte_scale); ++ daddu(scan_temp, recv_klass, scan_temp); ++ daddiu(scan_temp, scan_temp, vtable_base); ++ if (HeapWordsPerLong > 1) { ++ // Round up to align_object_offset boundary ++ // see code for InstanceKlass::start_of_itable! ++ round_to(scan_temp, BytesPerLong); ++ } ++ ++ if (return_method) { ++ // Adjust recv_klass by scaled itable_index, so we can free itable_index. ++ assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ if (itable_index.is_constant()) { ++ set64(AT, (int)itable_index.is_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, itable_index.as_register(), (int)Address::times_ptr); ++ } ++ daddu(AT, AT, recv_klass); ++ daddiu(recv_klass, AT, itentry_off); ++ } ++ ++ Label search, found_method; ++ ++ for (int peel = 1; peel >= 0; peel--) { ++ ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes())); ++ ++ if (peel) { ++ beq(intf_klass, method_result, found_method); ++ delayed()->nop(); ++ } else { ++ bne(intf_klass, method_result, search); ++ delayed()->nop(); ++ // (invert the test to fall through to found_method...) ++ } ++ ++ if (!peel) break; ++ ++ bind(search); ++ ++ // Check that the previous entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ beq(method_result, R0, L_no_such_interface); ++ delayed()->nop(); ++ daddiu(scan_temp, scan_temp, scan_step); ++ } ++ ++ bind(found_method); ++ ++ if (return_method) { ++ // Got a hit. ++ lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes())); ++ if (UseLEXT1) { ++ gsldx(method_result, recv_klass, scan_temp, 0); ++ } else { ++ daddu(AT, recv_klass, scan_temp); ++ ld(method_result, AT, 0); ++ } ++ } ++} ++ ++// virtual method calling ++void MacroAssembler::lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result) { ++ Register tmp = GP; ++ push(tmp); ++ ++ if (vtable_index.is_constant()) { ++ assert_different_registers(recv_klass, method_result, tmp); ++ } else { ++ assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp); ++ } ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below"); ++ if (vtable_index.is_constant()) { ++ set64(AT, vtable_index.as_constant()); ++ dsll(AT, AT, (int)Address::times_ptr); ++ } else { ++ dsll(AT, vtable_index.as_register(), (int)Address::times_ptr); ++ } ++ set64(tmp, base + vtableEntry::method_offset_in_bytes()); ++ daddu(tmp, tmp, AT); ++ daddu(tmp, tmp, recv_klass); ++ ld(method_result, tmp, 0); ++ ++ pop(tmp); ++} ++ ++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ sw(src_reg, tmp_reg, disp); ++ } else { ++ st_ptr(src_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ st_ptr(src_reg, tmp_reg, disp); ++ break; ++ case T_INT: ++ sw(src_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ case T_SHORT: ++ sh(src_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ sb(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++} ++ ++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ sdc1(src_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ swc1(src_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) { ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++} ++ ++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) { ++ switch (type) { ++ case T_LONG: ++ ld_ptr(dst_reg, tmp_reg, disp); ++ break; ++ case T_ARRAY: ++ case T_OBJECT: ++ if (UseCompressedOops && !wide) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_ADDRESS: ++ if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) { ++ lwu(dst_reg, tmp_reg, disp); ++ } else { ++ ld_ptr(dst_reg, tmp_reg, disp); ++ } ++ break; ++ case T_INT: ++ lw(dst_reg, tmp_reg, disp); ++ break; ++ case T_CHAR: ++ lhu(dst_reg, tmp_reg, disp); ++ break; ++ case T_SHORT: ++ lh(dst_reg, tmp_reg, disp); ++ break; ++ case T_BYTE: ++ case T_BOOLEAN: ++ lb(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide); ++ } ++ ++ return code_offset; ++} ++ ++#ifdef COMPILER2 ++// Compare strings, used for char[] and byte[]. ++void MacroAssembler::string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae) { ++ Label L, Loop, haveResult, done; ++ ++ bool isLL = ae == StrIntrinsicNode::LL; ++ bool isLU = ae == StrIntrinsicNode::LU; ++ bool isUL = ae == StrIntrinsicNode::UL; ++ ++ bool str1_isL = isLL || isLU; ++ bool str2_isL = isLL || isUL; ++ ++ if (!str1_isL) srl(cnt1, cnt1, 1); ++ if (!str2_isL) srl(cnt2, cnt2, 1); ++ ++ // compute the and difference of lengths (in result) ++ subu(result, cnt1, cnt2); // result holds the difference of two lengths ++ ++ // compute the shorter length (in cnt1) ++ slt(AT, cnt2, cnt1); ++ movn(cnt1, cnt2, AT); ++ ++ // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register ++ bind(Loop); // Loop begin ++ beq(cnt1, R0, done); ++ if (str1_isL) { ++ delayed()->lbu(AT, str1, 0); ++ } else { ++ delayed()->lhu(AT, str1, 0); ++ } ++ ++ // compare current character ++ if (str2_isL) { ++ lbu(cnt2, str2, 0); ++ } else { ++ lhu(cnt2, str2, 0); ++ } ++ bne(AT, cnt2, haveResult); ++ delayed()->addiu(str1, str1, str1_isL ? 1 : 2); ++ addiu(str2, str2, str2_isL ? 1 : 2); ++ b(Loop); ++ delayed()->addiu(cnt1, cnt1, -1); // Loop end ++ ++ bind(haveResult); ++ subu(result, AT, cnt2); ++ ++ bind(done); ++} ++ ++// Compare char[] or byte[] arrays or substrings. ++void MacroAssembler::arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char) { ++ Label Loop, True, False; ++ ++ beq(str1, str2, True); // same char[] ? ++ delayed()->daddiu(result, R0, 1); ++ ++ beq(cnt, R0, True); ++ delayed()->nop(); // count == 0 ++ ++ bind(Loop); ++ ++ // compare current character ++ if (is_char) { ++ lhu(AT, str1, 0); ++ lhu(tmp, str2, 0); ++ } else { ++ lbu(AT, str1, 0); ++ lbu(tmp, str2, 0); ++ } ++ bne(AT, tmp, False); ++ delayed()->addiu(str1, str1, is_char ? 2 : 1); ++ addiu(cnt, cnt, -1); ++ bne(cnt, R0, Loop); ++ delayed()->addiu(str2, str2, is_char ? 2 : 1); ++ ++ b(True); ++ delayed()->nop(); ++ ++ bind(False); ++ daddiu(result, R0, 0); ++ ++ bind(True); ++} ++#endif // COMPILER2 ++ ++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) { ++ switch (type) { ++ case T_DOUBLE: ++ ldc1(dst_reg, tmp_reg, disp); ++ break; ++ case T_FLOAT: ++ lwc1(dst_reg, tmp_reg, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) { ++ int code_offset = 0; ++ Register tmp_reg = T9; ++ Register index_reg = addr.index(); ++ if (index_reg == NOREG) { ++ tmp_reg = NOREG; ++ } ++ ++ int scale = addr.scale(); ++ if (tmp_reg != NOREG && scale >= 0) { ++ dsll(tmp_reg, index_reg, scale); ++ } ++ ++ int disp = addr.disp(); ++ bool disp_is_simm16 = true; ++ if (!Assembler::is_simm16(disp)) { ++ disp_is_simm16 = false; ++ } ++ ++ Register base_reg = addr.base(); ++ if (tmp_reg != NOREG) { ++ assert_different_registers(tmp_reg, base_reg, index_reg); ++ } ++ ++ if (tmp_reg != NOREG) { ++ daddu(tmp_reg, base_reg, tmp_reg); ++ if (!disp_is_simm16) { ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } else { ++ if (!disp_is_simm16) { ++ tmp_reg = T9; ++ assert_different_registers(tmp_reg, base_reg); ++ move(tmp_reg, disp); ++ daddu(tmp_reg, base_reg, tmp_reg); ++ } ++ code_offset = offset(); ++ load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type); ++ } ++ ++ return code_offset; ++} ++ ++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) { ++ const int32_t inverted_jweak_mask = ~static_cast(JNIHandles::weak_tag_mask); ++ STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code ++ // The inverted mask is sign-extended ++ move(AT, inverted_jweak_mask); ++ andr(possibly_jweak, AT, possibly_jweak); ++} ++ ++void MacroAssembler::resolve_jobject(Register value, ++ Register thread, ++ Register tmp) { ++ assert_different_registers(value, thread, tmp); ++ Label done, not_weak; ++ beq(value, R0, done); // Use NULL as-is. ++ delayed()->nop(); ++ move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag. ++ andr(AT, value, AT); ++ beq(AT, R0, not_weak); ++ delayed()->nop(); ++ // Resolve jweak. ++ access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF, ++ value, Address(value, -JNIHandles::weak_tag_value), tmp, thread); ++ verify_oop(value); ++ b(done); ++ delayed()->nop(); ++ bind(not_weak); ++ // Resolve (untagged) jobject. ++ access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread); ++ verify_oop(value); ++ bind(done); ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_signed) { ++ switch (cmp) { ++ case EQ: ++ subu(AT, op1, op2); ++ movz(dst, src, AT); ++ break; ++ ++ case NE: ++ subu(AT, op1, op2); ++ movn(dst, src, AT); ++ break; ++ ++ case GT: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case GE: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ case LT: ++ if (is_signed) { ++ slt(AT, op1, op2); ++ } else { ++ sltu(AT, op1, op2); ++ } ++ movn(dst, src, AT); ++ break; ++ ++ case LE: ++ if (is_signed) { ++ slt(AT, op2, op1); ++ } else { ++ sltu(AT, op2, op1); ++ } ++ movz(dst, src, AT); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case NE: ++ if (is_float) { ++ c_eq_s(op1, op2); ++ } else { ++ c_eq_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GT: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case GE: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movf(dst, src); ++ break; ++ ++ case LT: ++ if (is_float) { ++ c_ult_s(op1, op2); ++ } else { ++ c_ult_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ case LE: ++ if (is_float) { ++ c_ule_s(op1, op2); ++ } else { ++ c_ule_d(op1, op2); ++ } ++ movt(dst, src); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ switch(cmp) { ++ case EQ: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case NE: ++ if (!is_float) { ++ c_eq_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_eq_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GT: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case GE: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movf_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movf_s(dst, src); ++ } ++ break; ++ ++ case LT: ++ if (!is_float) { ++ c_ult_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ult_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ case LE: ++ if (!is_float) { ++ c_ule_d(op1, op2); ++ movt_d(dst, src); ++ } else { ++ c_ule_s(op1, op2); ++ movt_s(dst, src); ++ } ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp, ++ bool is_float) { ++ Label L; ++ ++ switch(cmp) { ++ case EQ: ++ bne(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case NE: ++ beq(op1, op2, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GT: ++ slt(AT, op2, op1); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case GE: ++ slt(AT, op1, op2); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LT: ++ slt(AT, op1, op2); ++ beq(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ case LE: ++ slt(AT, op2, op1); ++ bne(AT, R0, L); ++ delayed()->nop(); ++ if (is_float) { ++ mov_s(dst, src); ++ } else { ++ mov_d(dst, src); ++ } ++ bind(L); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++} ++ ++void MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ gssbx(reg, base, index, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ gsshx(reg, base, index, disp); ++ break; ++ case STORE_INT: ++ gsswx(reg, base, index, disp); ++ break; ++ case STORE_LONG: ++ gssdx(reg, base, index, disp); ++ break; ++ case LOAD_BYTE: ++ gslbx(reg, base, index, disp); ++ break; ++ case LOAD_SHORT: ++ gslhx(reg, base, index, disp); ++ break; ++ case LOAD_INT: ++ gslwx(reg, base, index, disp); ++ break; ++ case LOAD_LONG: ++ gsldx(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ gsswxc1(reg, base, index, disp); ++ break; ++ case STORE_DOUBLE: ++ gssdxc1(reg, base, index, disp); ++ break; ++ case LOAD_FLOAT: ++ gslwxc1(reg, base, index, disp); ++ break; ++ case LOAD_DOUBLE: ++ gsldxc1(reg, base, index, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_BYTE: ++ sb(reg, base, disp); ++ break; ++ case STORE_CHAR: ++ case STORE_SHORT: ++ sh(reg, base, disp); ++ break; ++ case STORE_INT: ++ sw(reg, base, disp); ++ break; ++ case STORE_LONG: ++ sd(reg, base, disp); ++ break; ++ case LOAD_BYTE: ++ lb(reg, base, disp); ++ break; ++ case LOAD_U_BYTE: ++ lbu(reg, base, disp); ++ break; ++ case LOAD_SHORT: ++ lh(reg, base, disp); ++ break; ++ case LOAD_U_SHORT: ++ lhu(reg, base, disp); ++ break; ++ case LOAD_INT: ++ lw(reg, base, disp); ++ break; ++ case LOAD_U_INT: ++ lwu(reg, base, disp); ++ break; ++ case LOAD_LONG: ++ ld(reg, base, disp); ++ break; ++ case LOAD_LINKED_LONG: ++ lld(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} ++ ++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) { ++ switch (type) { ++ case STORE_FLOAT: ++ swc1(reg, base, disp); ++ break; ++ case STORE_DOUBLE: ++ sdc1(reg, base, disp); ++ break; ++ case LOAD_FLOAT: ++ lwc1(reg, base, disp); ++ break; ++ case LOAD_DOUBLE: ++ ldc1(reg, base, disp); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++} +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp +new file mode 100644 +index 0000000000..55ec29e91b +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp +@@ -0,0 +1,818 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "runtime/rtmLocking.hpp" ++#include "utilities/macros.hpp" ++ ++// MacroAssembler extends Assembler by frequently used macros. ++// ++// Instructions for which a 'better' code sequence exists depending ++// on arguments should also go in here. ++ ++class MacroAssembler: public Assembler { ++ friend class LIR_Assembler; ++ friend class Runtime1; // as_Address() ++ ++ public: ++ // Compare code ++ typedef enum { ++ EQ = 0x01, ++ NE = 0x02, ++ GT = 0x03, ++ GE = 0x04, ++ LT = 0x05, ++ LE = 0x06 ++ } CMCompare; ++ ++ protected: ++ ++ // Support for VM calls ++ // ++ // This is the base routine called by the different versions of call_VM_leaf. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ #define VIRTUAL virtual ++ ++ VIRTUAL void call_VM_leaf_base( ++ address entry_point, // the entry point ++ int number_of_arguments // the number of arguments to pop after the call ++ ); ++ ++ // This is the base routine called by the different versions of call_VM. The interpreter ++ // may customize this version by overriding it for its purposes (e.g., to save/restore ++ // additional registers when doing a VM call). ++ // ++ // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base ++ // returns the register which contains the thread upon return. If a thread register has been ++ // specified, the return value will correspond to that register. If no last_java_sp is specified ++ // (noreg) than sp will be used instead. ++ VIRTUAL void call_VM_base( // returns the register containing the thread upon return ++ Register oop_result, // where an oop-result ends up if any; use noreg otherwise ++ Register java_thread, // the thread if computed before ; use noreg otherwise ++ Register last_java_sp, // to set up last_Java_frame in stubs; use noreg otherwise ++ address entry_point, // the entry point ++ int number_of_arguments, // the number of arguments (w/o thread) to pop after the call ++ bool check_exceptions // whether to check for pending exceptions after return ++ ); ++ ++ void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true); ++ ++ // helpers for FPU flag access ++ // tmp is a temporary register, if none is available use noreg ++ ++ public: ++ MacroAssembler(CodeBuffer* code) : Assembler(code) {} ++ ++ // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code. ++ // The implementation is only non-empty for the InterpreterMacroAssembler, ++ // as only the interpreter handles PopFrame and ForceEarlyReturn requests. ++ virtual void check_and_handle_popframe(Register java_thread); ++ virtual void check_and_handle_earlyret(Register java_thread); ++ ++ Address as_Address(AddressLiteral adr); ++ Address as_Address(ArrayAddress adr); ++ ++ static intptr_t i[32]; ++ static float f[32]; ++ static void print(outputStream *s); ++ ++ static int i_offset(unsigned int k); ++ static int f_offset(unsigned int k); ++ ++ static void save_registers(MacroAssembler *masm); ++ static void restore_registers(MacroAssembler *masm); ++ ++ // Support for NULL-checks ++ // ++ // Generates code that causes a NULL OS exception if the content of reg is NULL. ++ // If the accessed location is M[reg + offset] and the offset is known, provide the ++ // offset. No explicit code generation is needed if the offset is within a certain ++ // range (0 <= offset <= page_size). ++ ++ void null_check(Register reg, int offset = -1); ++ static bool needs_explicit_null_check(intptr_t offset); ++ ++ // Required platform-specific helpers for Label::patch_instructions. ++ // They _shadow_ the declarations in AbstractAssembler, which are undefined. ++ void pd_patch_instruction(address branch, address target); ++ ++ address emit_trampoline_stub(int insts_call_instruction_offset, address target); ++ ++ // Support for inc/dec with optimal instruction selection depending on value ++ void incrementl(Register reg, int value = 1); ++ void decrementl(Register reg, int value = 1); ++ ++ ++ // Alignment ++ void align(int modulus); ++ ++ ++ // Stack frame creation/removal ++ void enter(); ++ void leave(); ++ ++ // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information) ++ // The pointer will be loaded into the thread register. ++ void get_thread(Register thread); ++ ++ ++ // Support for VM calls ++ // ++ // It is imperative that all calls into the VM are handled via the call_VM macros. ++ // They make sure that the stack linkage is setup correctly. call_VM's correspond ++ // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points. ++ ++ ++ void call_VM(Register oop_result, ++ address entry_point, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ // Overloadings with last_Java_sp ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ int number_of_arguments = 0, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, bool ++ check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, ++ bool check_exceptions = true); ++ void call_VM(Register oop_result, ++ Register last_java_sp, ++ address entry_point, ++ Register arg_1, Register arg_2, Register arg_3, ++ bool check_exceptions = true); ++ ++ void get_vm_result (Register oop_result, Register thread); ++ void get_vm_result_2(Register metadata_result, Register thread); ++ void call_VM_leaf(address entry_point, ++ int number_of_arguments = 0); ++ void call_VM_leaf(address entry_point, ++ Register arg_1); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2); ++ void call_VM_leaf(address entry_point, ++ Register arg_1, Register arg_2, Register arg_3); ++ ++ // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls ++ void super_call_VM_leaf(address entry_point); ++ void super_call_VM_leaf(address entry_point, Register arg_1); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2); ++ void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3); ++ ++ // last Java Frame (fills frame anchor) ++ void set_last_Java_frame(Register thread, ++ Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ // thread in the default location (S6) ++ void set_last_Java_frame(Register last_java_sp, ++ Register last_java_fp, ++ address last_java_pc); ++ ++ void reset_last_Java_frame(Register thread, bool clear_fp); ++ ++ // thread in the default location (S6) ++ void reset_last_Java_frame(bool clear_fp); ++ ++ // jobjects ++ void clear_jweak_tag(Register possibly_jweak); ++ void resolve_jobject(Register value, Register thread, Register tmp); ++ ++ // C 'boolean' to Java boolean: x == 0 ? 0 : 1 ++ void c2bool(Register x); ++ ++ void resolve_oop_handle(Register result, Register tmp); ++ void load_mirror(Register dst, Register method, Register tmp); ++ ++ // oop manipulations ++ void load_klass(Register dst, Register src); ++ void store_klass(Register dst, Register src); ++ ++ void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src, ++ Register tmp1, Register thread_tmp); ++ void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src, ++ Register tmp1, Register tmp2); ++ ++ void load_heap_oop(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg, ++ Register thread_tmp = noreg, DecoratorSet decorators = 0); ++ void store_heap_oop(Address dst, Register src, Register tmp1 = noreg, ++ Register tmp2 = noreg, DecoratorSet decorators = 0); ++ ++ // Used for storing NULL. All other oop constants should be ++ // stored using routines that take a jobject. ++ void store_heap_oop_null(Address dst); ++ ++ void load_prototype_header(Register dst, Register src); ++ ++ void store_klass_gap(Register dst, Register src); ++ ++ void encode_heap_oop(Register r); ++ void encode_heap_oop(Register dst, Register src); ++ void decode_heap_oop(Register r); ++ void decode_heap_oop(Register dst, Register src); ++ void encode_heap_oop_not_null(Register r); ++ void decode_heap_oop_not_null(Register r); ++ void encode_heap_oop_not_null(Register dst, Register src); ++ void decode_heap_oop_not_null(Register dst, Register src); ++ ++ void encode_klass_not_null(Register r); ++ void decode_klass_not_null(Register r); ++ void encode_klass_not_null(Register dst, Register src); ++ void decode_klass_not_null(Register dst, Register src); ++ ++ // Returns the byte size of the instructions generated by decode_klass_not_null() ++ // when compressed klass pointers are being used. ++ static int instr_size_for_decode_klass_not_null(); ++ ++ // if heap base register is used - reinit it with the correct value ++ void reinit_heapbase(); ++ ++ DEBUG_ONLY(void verify_heapbase(const char* msg);) ++ ++ void set_narrow_klass(Register dst, Klass* k); ++ void set_narrow_oop(Register dst, jobject obj); ++ ++ ++ ++ ++ // Sign extension ++ void sign_extend_short(Register reg) { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); } ++ void sign_extend_byte(Register reg) { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); } ++ void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp); ++ ++ // allocation ++ void eden_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void tlab_allocate( ++ Register obj, // result: pointer to object after successful allocation ++ Register var_size_in_bytes, // object size in bytes if unknown at compile time; invalid otherwise ++ int con_size_in_bytes, // object size in bytes if known at compile time ++ Register t1, // temp register ++ Register t2, // temp register ++ Label& slow_case // continuation point if fast allocation fails ++ ); ++ void incr_allocated_bytes(Register thread, ++ Register var_size_in_bytes, int con_size_in_bytes, ++ Register t1 = noreg); ++ // interface method calling ++ void lookup_interface_method(Register recv_klass, ++ Register intf_klass, ++ RegisterOrConstant itable_index, ++ Register method_result, ++ Register scan_temp, ++ Label& no_such_interface, ++ bool return_method = true); ++ ++ // virtual method calling ++ void lookup_virtual_method(Register recv_klass, ++ RegisterOrConstant vtable_index, ++ Register method_result); ++ ++ // Test sub_klass against super_klass, with fast and slow paths. ++ ++ // The fast path produces a tri-state answer: yes / no / maybe-slow. ++ // One of the three labels can be NULL, meaning take the fall-through. ++ // If super_check_offset is -1, the value is loaded up from super_klass. ++ // No registers are killed, except temp_reg. ++ void check_klass_subtype_fast_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label* L_success, ++ Label* L_failure, ++ Label* L_slow_path, ++ RegisterOrConstant super_check_offset = RegisterOrConstant(-1)); ++ ++ // The rest of the type check; must be wired to a corresponding fast path. ++ // It does not repeat the fast path logic, so don't use it standalone. ++ // The temp_reg and temp2_reg can be noreg, if no temps are available. ++ // Updates the sub's secondary super cache as necessary. ++ // If set_cond_codes, condition codes will be Z on success, NZ on failure. ++ void check_klass_subtype_slow_path(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Register temp2_reg, ++ Label* L_success, ++ Label* L_failure, ++ bool set_cond_codes = false); ++ ++ // Simplified, combined version, good for typical uses. ++ // Falls through on failure. ++ void check_klass_subtype(Register sub_klass, ++ Register super_klass, ++ Register temp_reg, ++ Label& L_success); ++ ++ ++ // Debugging ++ ++ // only if +VerifyOops ++ void verify_oop(Register reg, const char* s = "broken oop"); ++ void verify_oop_addr(Address addr, const char * s = "broken oop addr"); ++ void verify_oop_subroutine(); ++ // TODO: verify method and klass metadata (compare against vptr?) ++ void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {} ++ void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){} ++ ++ #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__) ++ #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__) ++ ++ // only if +VerifyFPU ++ void verify_FPU(int stack_depth, const char* s = "illegal FPU state"); ++ ++ // prints msg, dumps registers and stops execution ++ void stop(const char* msg); ++ ++ // prints msg and continues ++ void warn(const char* msg); ++ ++ static void debug(char* msg/*, RegistersForDebugging* regs*/); ++ static void debug64(char* msg, int64_t pc, int64_t regs[]); ++ ++ void print_reg(Register reg); ++ void print_reg(FloatRegister reg); ++ ++ void untested() { stop("untested"); } ++ ++ void unimplemented(const char* what = ""); ++ ++ void should_not_reach_here() { stop("should not reach here"); } ++ ++ void print_CPU_state(); ++ ++ // Stack overflow checking ++ void bang_stack_with_offset(int offset) { ++ // stack grows down, caller passes positive offset ++ assert(offset > 0, "must bang with negative offset"); ++ if (offset <= 32768) { ++ sw(A0, SP, -offset); ++ } else { ++ li(AT, offset); ++ dsubu(AT, SP, AT); ++ sw(A0, AT, 0); ++ } ++ } ++ ++ // Writes to stack successive pages until offset reached to check for ++ // stack overflow + shadow pages. Also, clobbers tmp ++ void bang_stack_size(Register size, Register tmp); ++ ++ // Check for reserved stack access in method being exited (for JIT) ++ void reserved_stack_check(); ++ ++ virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr, ++ Register tmp, ++ int offset); ++ ++ // Support for serializing memory accesses between threads ++ void serialize_memory(Register thread, Register tmp); ++ ++ void safepoint_poll(Label& slow_path, Register thread_reg); ++ void safepoint_poll_acquire(Label& slow_path, Register thread_reg); ++ ++ //void verify_tlab(); ++ void verify_tlab(Register t1, Register t2); ++ ++ // Biased locking support ++ // lock_reg and obj_reg must be loaded up with the appropriate values. ++ // tmp_reg is optional. If it is supplied (i.e., != noreg) it will ++ // be killed; if not supplied, push/pop will be used internally to ++ // allocate a temporary (inefficient, avoid if possible). ++ // Optional slow case is for implementations (interpreter and C1) which branch to ++ // slow case directly. Leaves condition codes set for C2's Fast_Lock node. ++ // Returns offset of first potentially-faulting instruction for null ++ // check info (currently consumed only by C1). If ++ // swap_reg_contains_mark is true then returns -1 as it is assumed ++ // the calling code has already passed any potential faults. ++ int biased_locking_enter(Register lock_reg, Register obj_reg, ++ Register swap_reg, Register tmp_reg, ++ bool swap_reg_contains_mark, ++ Label& done, Label* slow_case = NULL, ++ BiasedLockingCounters* counters = NULL); ++ void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done); ++#ifdef COMPILER2 ++ void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr); ++ void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr); ++#endif ++ ++ ++ // Arithmetics ++ // Regular vs. d* versions ++ inline void addu_long(Register rd, Register rs, Register rt) { ++ daddu(rd, rs, rt); ++ } ++ inline void addu_long(Register rd, Register rs, long imm32_64) { ++ daddiu(rd, rs, imm32_64); ++ } ++ ++ void round_to(Register reg, int modulus) { ++ assert_different_registers(reg, AT); ++ increment(reg, modulus - 1); ++ move(AT, - modulus); ++ andr(reg, reg, AT); ++ } ++ ++ // the follow two might use AT register, be sure you have no meanful data in AT before you call them ++ void increment(Register reg, int imm); ++ void decrement(Register reg, int imm); ++ ++ void shl(Register reg, int sa) { dsll(reg, reg, sa); } ++ void shr(Register reg, int sa) { dsrl(reg, reg, sa); } ++ void sar(Register reg, int sa) { dsra(reg, reg, sa); } ++ ++ // Helper functions for statistics gathering. ++ void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2); ++ ++ // Calls ++ void call(address entry); ++ void call(address entry, relocInfo::relocType rtype); ++ void call(address entry, RelocationHolder& rh); ++ ++ address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL); ++ ++ // Emit the CompiledIC call idiom ++ void ic_call(address entry, jint method_index = 0); ++ ++ // Jumps ++ void jmp(address entry); ++ void jmp(address entry, relocInfo::relocType rtype); ++ void jmp_far(Label& L); // always long jumps ++ ++ /* branches may exceed 16-bit offset */ ++ void b_far(address entry); ++ void b_far(Label& L); ++ ++ void bne_far (Register rs, Register rt, address entry); ++ void bne_far (Register rs, Register rt, Label& L); ++ ++ void beq_far (Register rs, Register rt, address entry); ++ void beq_far (Register rs, Register rt, Label& L); ++ ++ // For C2 to support long branches ++ void beq_long (Register rs, Register rt, Label& L); ++ void bne_long (Register rs, Register rt, Label& L); ++ void bc1t_long (Label& L); ++ void bc1f_long (Label& L); ++ ++ void patchable_call(address target); ++ void general_call(address target); ++ ++ void patchable_jump(address target); ++ void general_jump(address target); ++ ++ static int insts_for_patchable_call(address target); ++ static int insts_for_general_call(address target); ++ ++ static int insts_for_patchable_jump(address target); ++ static int insts_for_general_jump(address target); ++ ++ // Floating ++ // Data ++ ++ // Load and store values by size and signed-ness ++ void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg); ++ void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg); ++ ++ // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs ++ inline void ld_ptr(Register rt, Address a) { ++ ld(rt, a); ++ } ++ ++ inline void ld_ptr(Register rt, Register base, int offset16) { ++ ld(rt, base, offset16); ++ } ++ ++ // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs ++ inline void st_ptr(Register rt, Address a) { ++ sd(rt, a); ++ } ++ ++ inline void st_ptr(Register rt, Register base, int offset16) { ++ sd(rt, base, offset16); ++ } ++ ++ void ld_ptr(Register rt, Register base, Register offset); ++ void st_ptr(Register rt, Register base, Register offset); ++ ++ // swap the two byte of the low 16-bit halfword ++ // this directive will use AT, be sure the high 16-bit of reg is zero ++ void hswap(Register reg); ++ void huswap(Register reg); ++ ++ // convert big endian integer to little endian integer ++ void swap(Register reg); ++ ++ // implement the x86 instruction semantic ++ // if c_reg == *dest then *dest <= x_reg ++ // else c_reg <= *dest ++ // the AT indicate if xchg occurred, 1 for xchged, else 0 ++ void cmpxchg(Address addr, Register oldval, Register newval, Register resflag, ++ bool retold, bool barrier); ++ void cmpxchg(Address addr, Register oldval, Register newval, Register tmp, ++ bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag, ++ bool sign, bool retold, bool barrier); ++ void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp, ++ bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL); ++ void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi); ++ ++ //pop & push ++ void extend_sign(Register rh, Register rl) { stop("extend_sign"); } ++ void neg(Register reg) { dsubu(reg, R0, reg); } ++ void push (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); } ++ void pop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop (FloatRegister reg) { ldc1(reg, SP, 0); daddiu(SP, SP, 8); } ++ void pop () { daddiu(SP, SP, 8); } ++ void pop2 () { daddiu(SP, SP, 16); } ++ void push2(Register reg1, Register reg2); ++ void pop2 (Register reg1, Register reg2); ++ void dpush (Register reg) { daddiu(SP, SP, -8); sd (reg, SP, 0); } ++ void dpop (Register reg) { ld (reg, SP, 0); daddiu(SP, SP, 8); } ++ //we need 2 fun to save and resotre general register ++ void pushad(); ++ void popad(); ++ void pushad_except_v0(); ++ void popad_except_v0(); ++ ++ //move an 32-bit immediate to Register ++ void move(Register reg, int imm32) { li32(reg, imm32); } ++ void li (Register rd, long imm); ++ void li (Register rd, address addr) { li(rd, (long)addr); } ++ //replace move(Register reg, int imm) ++ void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64 ++ void set64(Register d, jlong value); ++ static int insts_for_set64(jlong value); ++ ++ void patchable_set48(Register d, jlong value); ++ void patchable_set32(Register d, jlong value); ++ ++ void patchable_call32(Register d, jlong value); ++ ++ static int call_size(address target, bool far, bool patchable); ++ ++ static bool reachable_from_cache(address target); ++ static bool reachable_from_cache(); ++ ++ ++ void dli(Register rd, long imm) { li(rd, imm); } ++ void li64(Register rd, long imm); ++ void li48(Register rd, long imm); ++ ++ void move(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void move_u32(Register rd, Register rs) { addu32(rd, rs, R0); } ++ void dmove(Register rd, Register rs) { daddu(rd, rs, R0); } ++ void mov_metadata(Register dst, Metadata* obj); ++ void mov_metadata(Address dst, Metadata* obj); ++ ++ void store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type); ++ void store_for_type(Register src_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT); ++ void load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide); ++ void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type); ++ int load_for_type(Register dst_reg, Address addr, BasicType type = T_INT, bool wide = false); ++ int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT); ++ ++#ifndef PRODUCT ++ static void pd_print_patched_instruction(address branch) { ++ jint stub_inst = *(jint*) branch; ++ print_instruction(stub_inst); ++ ::tty->print("%s", " (unresolved)"); ++ ++ } ++#endif ++ ++ //FIXME ++ void empty_FPU_stack(){/*need implemented*/}; ++ ++#ifdef COMPILER2 ++ // Compare strings. ++ void string_compare(Register str1, Register str2, ++ Register cnt1, Register cnt2, Register result, ++ int ae); ++ ++ // Compare char[] or byte[] arrays. ++ void arrays_equals(Register str1, Register str2, ++ Register cnt, Register tmp, Register result, ++ bool is_char); ++#endif ++ ++ // method handles (JSR 292) ++ Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0); ++ ++ // Conditional move ++ void cmp_cmov(Register op1, ++ Register op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_signed = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ Register dst, ++ Register src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(FloatRegister op1, ++ FloatRegister op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ void cmp_cmov(Register op1, ++ Register op2, ++ FloatRegister dst, ++ FloatRegister src, ++ CMCompare cmp = EQ, ++ bool is_float = true); ++ ++#undef VIRTUAL ++ ++public: ++ ++// Memory Data Type ++#define INT_TYPE 0x100 ++#define FLOAT_TYPE 0x200 ++#define SIGNED_TYPE 0x10 ++#define UNSIGNED_TYPE 0x20 ++ ++ typedef enum { ++ LOAD_BYTE = INT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_CHAR = INT_TYPE | SIGNED_TYPE | 0x2, ++ LOAD_SHORT = INT_TYPE | SIGNED_TYPE | 0x3, ++ LOAD_INT = INT_TYPE | SIGNED_TYPE | 0x4, ++ LOAD_LONG = INT_TYPE | SIGNED_TYPE | 0x5, ++ STORE_BYTE = INT_TYPE | SIGNED_TYPE | 0x6, ++ STORE_CHAR = INT_TYPE | SIGNED_TYPE | 0x7, ++ STORE_SHORT = INT_TYPE | SIGNED_TYPE | 0x8, ++ STORE_INT = INT_TYPE | SIGNED_TYPE | 0x9, ++ STORE_LONG = INT_TYPE | SIGNED_TYPE | 0xa, ++ LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb, ++ ++ LOAD_U_BYTE = INT_TYPE | UNSIGNED_TYPE | 0x1, ++ LOAD_U_SHORT = INT_TYPE | UNSIGNED_TYPE | 0x2, ++ LOAD_U_INT = INT_TYPE | UNSIGNED_TYPE | 0x3, ++ ++ LOAD_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x1, ++ LOAD_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x2, ++ STORE_FLOAT = FLOAT_TYPE | SIGNED_TYPE | 0x3, ++ STORE_DOUBLE = FLOAT_TYPE | SIGNED_TYPE | 0x4 ++ } CMLoadStoreDataType; ++ ++ void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) { ++ assert((type & INT_TYPE), "must be General reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++ void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) { ++ assert((type & FLOAT_TYPE), "must be Float reg type"); ++ loadstore_t(reg, base, index, scale, disp, type); ++ } ++ ++private: ++ ++ template ++ void loadstore_t(T reg, int base, int index, int scale, int disp, int type) { ++ if (index != 0) { ++ if (Assembler::is_simm16(disp)) { ++ if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) { ++ if (scale == 0) { ++ gs_loadstore(reg, as_Register(base), as_Register(index), disp, type); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ gs_loadstore(reg, as_Register(base), AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ loadstore(reg, AT, disp, type); ++ } ++ } else { ++ if (scale == 0) { ++ addu(AT, as_Register(base), as_Register(index)); ++ } else { ++ dsll(AT, as_Register(index), scale); ++ addu(AT, as_Register(base), AT); ++ } ++ move(RT9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, AT, RT9, 0, type); ++ } else { ++ addu(AT, AT, RT9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } else { ++ if (Assembler::is_simm16(disp)) { ++ loadstore(reg, as_Register(base), disp, type); ++ } else { ++ move(RT9, disp); ++ if (UseLEXT1 && (type & SIGNED_TYPE)) { ++ gs_loadstore(reg, as_Register(base), RT9, 0, type); ++ } else { ++ addu(AT, as_Register(base), RT9); ++ loadstore(reg, AT, 0, type); ++ } ++ } ++ } ++ } ++ void loadstore(Register reg, Register base, int disp, int type); ++ void loadstore(FloatRegister reg, Register base, int disp, int type); ++ void gs_loadstore(Register reg, Register base, Register index, int disp, int type); ++ void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type); ++}; ++ ++/** ++ * class SkipIfEqual: ++ * ++ * Instantiating this class will result in assembly code being output that will ++ * jump around any code emitted between the creation of the instance and it's ++ * automatic destruction at the end of a scope block, depending on the value of ++ * the flag passed to the constructor, which will be checked at run-time. ++ */ ++class SkipIfEqual { ++private: ++ MacroAssembler* _masm; ++ Label _label; ++ ++public: ++ inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value) ++ : _masm(masm) { ++ _masm->li(AT, (address)flag_addr); ++ _masm->lb(AT, AT, 0); ++ if (value) { ++ _masm->bne(AT, R0, _label); ++ } else { ++ _masm->beq(AT, R0, _label); ++ } ++ _masm->delayed()->nop(); ++ } ++ ++ ~SkipIfEqual(); ++}; ++ ++#ifdef ASSERT ++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; } ++#endif ++ ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp +new file mode 100644 +index 0000000000..92c05fb726 +--- /dev/null ++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp +@@ -0,0 +1,34 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2017, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP ++ ++#include "asm/assembler.inline.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/codeBuffer.hpp" ++#include "code/codeCache.hpp" ++ ++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp +new file mode 100644 +index 0000000000..e9788ac52c +--- /dev/null ++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp +@@ -0,0 +1,576 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "classfile/javaClasses.inline.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "memory/allocation.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "utilities/preserveException.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) // nothing ++#define STOP(error) stop(error) ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#define STOP(error) block_comment(error); __ stop(error) ++#endif ++ ++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++ ++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) { ++ if (VerifyMethodHandles) ++ verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class), ++ "MH argument is a Class"); ++ __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes())); ++} ++ ++#ifdef ASSERT ++static int check_nonzero(const char* xname, int x) { ++ assert(x != 0, "%s should be nonzero", xname); ++ return x; ++} ++#define NONZERO(x) check_nonzero(#x, x) ++#else //ASSERT ++#define NONZERO(x) (x) ++#endif //ASSERT ++ ++#ifdef ASSERT ++void MethodHandles::verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message) { ++} ++ ++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) { ++ Label L; ++ BLOCK_COMMENT("verify_ref_kind {"); ++ __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes()))); ++ __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT); ++ __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK); ++ __ andr(temp, temp, AT); ++ __ move(AT, ref_kind); ++ __ beq(temp, AT, L); ++ __ delayed()->nop(); ++ { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal); ++ jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind); ++ if (ref_kind == JVM_REF_invokeVirtual || ++ ref_kind == JVM_REF_invokeSpecial) ++ // could do this for all ref_kinds, but would explode assembly code size ++ trace_method_handle(_masm, buf); ++ __ STOP(buf); ++ } ++ BLOCK_COMMENT("} verify_ref_kind"); ++ __ bind(L); ++} ++ ++#endif //ASSERT ++ ++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry) { ++ assert(method == Rmethod, "interpreter calling convention"); ++ ++ Label L_no_such_method; ++ __ beq(method, R0, L_no_such_method); ++ __ delayed()->nop(); ++ ++ __ verify_method_ptr(method); ++ ++ if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) { ++ Label run_compiled_code; ++ // JVMTI events, such as single-stepping, are implemented partly by avoiding running ++ // compiled code in threads for which the event is enabled. Check here for ++ // interp_only_mode if these events CAN be enabled. ++ Register rthread = TREG; ++ // interp_only is an int, on little endian it is sufficient to test the byte only ++ // Is a cmpl faster? ++ __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset())); ++ __ beq(AT, R0, run_compiled_code); ++ __ delayed()->nop(); ++ __ ld(T9, method, in_bytes(Method::interpreter_entry_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ __ BIND(run_compiled_code); ++ } ++ ++ const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() : ++ Method::from_interpreted_offset(); ++ __ ld(T9, method, in_bytes(entry_offset)); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_method); ++ address wrong_method = StubRoutines::throw_AbstractMethodError_entry(); ++ __ jmp(wrong_method, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++} ++ ++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry) { ++ BLOCK_COMMENT("jump_to_lambda_form {"); ++ // This is the initial entry point of a lazy method handle. ++ // After type checking, it picks up the invoker from the LambdaForm. ++ assert_different_registers(recv, method_temp, temp2); ++ assert(recv != noreg, "required register"); ++ assert(method_temp == Rmethod, "required register for loading method"); ++ ++ //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); }); ++ ++ // Load the invoker, as MH -> MH.form -> LF.vmentry ++ __ verify_oop(recv); ++ __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2); ++ __ verify_oop(method_temp); ++ __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()))); ++ __ verify_oop(method_temp); ++ __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg); ++ ++ if (VerifyMethodHandles && !for_compiler_entry) { ++ // make sure recv is already on stack ++ __ ld(temp2, Address(method_temp, Method::const_offset())); ++ __ load_sized_value(temp2, ++ Address(temp2, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ Label L; ++ Address recv_addr = __ argument_address(temp2, -1); ++ __ ld(AT, recv_addr); ++ __ beq(recv, AT, L); ++ __ delayed()->nop(); ++ ++ recv_addr = __ argument_address(temp2, -1); ++ __ ld(V0, recv_addr); ++ __ STOP("receiver not on stack"); ++ __ BIND(L); ++ } ++ ++ jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry); ++ BLOCK_COMMENT("} jump_to_lambda_form"); ++} ++ ++ ++// Code generation ++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm, ++ vmIntrinsics::ID iid) { ++ const bool not_for_compiler_entry = false; // this is the interpreter entry ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ if (iid == vmIntrinsics::_invokeGeneric || ++ iid == vmIntrinsics::_compiledLambdaForm) { ++ // Perhaps surprisingly, the symbolic references visible to Java are not directly used. ++ // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod. ++ // They all allow an appendix argument. ++ __ stop("empty stubs make SG sick"); ++ return NULL; ++ } ++ ++ // Rmethod: Method* ++ // T9: argument locator (parameter slot count, added to sp) ++ // S7: used as temp to hold mh or receiver ++ Register t9_argp = T9; // argument list ptr, live on error paths ++ Register s7_mh = S7; // MH receiver; dies quickly and is recycled ++ Register rm_method = Rmethod; // eventual target of this invocation ++ ++ // here's where control starts out: ++ __ align(CodeEntryAlignment); ++ address entry_point = __ pc(); ++ ++ if (VerifyMethodHandles) { ++ assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2"); ++ ++ Label L; ++ BLOCK_COMMENT("verify_intrinsic_id {"); ++ __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes()); ++ guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions."); ++ __ addiu(AT, AT, -1 * (int) iid); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ if (iid == vmIntrinsics::_linkToVirtual || ++ iid == vmIntrinsics::_linkToSpecial) { ++ // could do this for all kinds, but would explode assembly code size ++ trace_method_handle(_masm, "bad Method*::intrinsic_id"); ++ } ++ __ STOP("bad Method*::intrinsic_id"); ++ __ bind(L); ++ BLOCK_COMMENT("} verify_intrinsic_id"); ++ } ++ ++ // First task: Find out how big the argument list is. ++ Address t9_first_arg_addr; ++ int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid); ++ assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic"); ++ if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ __ ld(t9_argp, Address(rm_method, Method::const_offset())); ++ __ load_sized_value(t9_argp, ++ Address(t9_argp, ConstMethod::size_of_parameters_offset()), ++ sizeof(u2), false); ++ // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), ""); ++ t9_first_arg_addr = __ argument_address(t9_argp, -1); ++ } else { ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ if (!is_signature_polymorphic_static(iid)) { ++ __ ld(s7_mh, t9_first_arg_addr); ++ DEBUG_ONLY(t9_argp = noreg); ++ } ++ ++ // t9_first_arg_addr is live! ++ ++ trace_method_handle_interpreter_entry(_masm, iid); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry); ++ ++ } else { ++ // Adjust argument list by popping the trailing MemberName argument. ++ Register r_recv = noreg; ++ if (MethodHandles::ref_kind_has_receiver(ref_kind)) { ++ // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack. ++ __ ld(r_recv = T2, t9_first_arg_addr); ++ } ++ DEBUG_ONLY(t9_argp = noreg); ++ Register rm_member = rm_method; // MemberName ptr; incoming method ptr is dead now ++ __ pop(rm_member); // extract last argument ++ generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry); ++ } ++ ++ return entry_point; ++} ++ ++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm, ++ vmIntrinsics::ID iid, ++ Register receiver_reg, ++ Register member_reg, ++ bool for_compiler_entry) { ++ assert(is_signature_polymorphic(iid), "expected invoke iid"); ++ Register rm_method = Rmethod; // eventual target of this invocation ++ // temps used in this code are not used in *either* compiled or interpreted calling sequences ++ Register j_rarg0 = T0; ++ Register j_rarg1 = A0; ++ Register j_rarg2 = A1; ++ Register j_rarg3 = A2; ++ Register j_rarg4 = A3; ++ Register j_rarg5 = A4; ++ ++ Register temp1 = T8; ++ Register temp2 = T9; ++ Register temp3 = V0; ++ if (for_compiler_entry) { ++ assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment"); ++ assert_different_registers(temp1, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp2, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ assert_different_registers(temp3, j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5); ++ } ++ else { ++ assert_different_registers(temp1, temp2, temp3, saved_last_sp_register()); // don't trash lastSP ++ } ++ assert_different_registers(temp1, temp2, temp3, receiver_reg); ++ assert_different_registers(temp1, temp2, temp3, member_reg); ++ ++ if (iid == vmIntrinsics::_invokeBasic) { ++ // indirect through MH.form.vmentry.vmtarget ++ jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry); ++ ++ } else { ++ // The method is a member invoker used by direct method handles. ++ if (VerifyMethodHandles) { ++ // make sure the trailing argument really is a MemberName (caller responsibility) ++ verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName), ++ "MemberName required for invokeVirtual etc."); ++ } ++ ++ Address member_clazz( member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes())); ++ Address member_vmindex( member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes())); ++ Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())); ++ Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())); ++ ++ Register temp1_recv_klass = temp1; ++ if (iid != vmIntrinsics::_linkToStatic) { ++ __ verify_oop(receiver_reg); ++ if (iid == vmIntrinsics::_linkToSpecial) { ++ // Don't actually load the klass; just null-check the receiver. ++ __ null_check(receiver_reg); ++ } else { ++ // load receiver klass itself ++ __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ BLOCK_COMMENT("check_receiver {"); ++ // The receiver for the MemberName must be in receiver_reg. ++ // Check the receiver against the MemberName.clazz ++ if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) { ++ // Did not load it above... ++ __ load_klass(temp1_recv_klass, receiver_reg); ++ __ verify_klass_ptr(temp1_recv_klass); ++ } ++ if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) { ++ Label L_ok; ++ Register temp2_defc = temp2; ++ __ load_heap_oop(temp2_defc, member_clazz, temp3); ++ load_klass_from_Class(_masm, temp2_defc); ++ __ verify_klass_ptr(temp2_defc); ++ __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok); ++ // If we get here, the type check failed! ++ __ STOP("receiver class disagrees with MemberName.clazz"); ++ __ bind(L_ok); ++ } ++ BLOCK_COMMENT("} check_receiver"); ++ } ++ if (iid == vmIntrinsics::_linkToSpecial || ++ iid == vmIntrinsics::_linkToStatic) { ++ DEBUG_ONLY(temp1_recv_klass = noreg); // these guys didn't load the recv_klass ++ } ++ ++ // Live registers at this point: ++ // member_reg - MemberName that was the trailing argument ++ // temp1_recv_klass - klass of stacked receiver, if needed ++ ++ Label L_incompatible_class_change_error; ++ switch (iid) { ++ case vmIntrinsics::_linkToSpecial: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToStatic: ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3); ++ } ++ __ load_heap_oop(rm_method, member_vmtarget); ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg); ++ break; ++ ++ case vmIntrinsics::_linkToVirtual: ++ { ++ // same as TemplateTable::invokevirtual, ++ // minus the CP setup and profiling: ++ ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3); ++ } ++ ++ // pick out the vtable index from the MemberName, and then we can discard it: ++ Register temp2_index = temp2; ++ __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L_index_ok; ++ __ slt(AT, R0, temp2_index); ++ __ bne(AT, R0, L_index_ok); ++ __ delayed()->nop(); ++ __ STOP("no virtual index"); ++ __ BIND(L_index_ok); ++ } ++ ++ // Note: The verifier invariants allow us to ignore MemberName.clazz and vmtarget ++ // at this point. And VerifyMethodHandles has already checked clazz, if needed. ++ ++ // get target Method* & entry point ++ __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method); ++ break; ++ } ++ ++ case vmIntrinsics::_linkToInterface: ++ { ++ // same as TemplateTable::invokeinterface ++ // (minus the CP setup and profiling, with different argument motion) ++ if (VerifyMethodHandles) { ++ verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3); ++ } ++ ++ Register temp3_intf = temp3; ++ __ load_heap_oop(temp3_intf, member_clazz); ++ load_klass_from_Class(_masm, temp3_intf); ++ __ verify_klass_ptr(temp3_intf); ++ ++ Register rm_index = rm_method; ++ __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg); ++ if (VerifyMethodHandles) { ++ Label L; ++ __ slt(AT, rm_index, R0); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ STOP("invalid vtable index for MH.invokeInterface"); ++ __ bind(L); ++ } ++ ++ // given intf, index, and recv klass, dispatch to the implementation method ++ __ lookup_interface_method(temp1_recv_klass, temp3_intf, ++ // note: next two args must be the same: ++ rm_index, rm_method, ++ temp2, ++ L_incompatible_class_change_error); ++ break; ++ } ++ ++ default: ++ fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid)); ++ break; ++ } ++ ++ // Live at this point: ++ // rm_method ++ ++ // After figuring out which concrete method to call, jump into it. ++ // Note that this works in the interpreter with no data motion. ++ // But the compiled version will require that r_recv be shifted out. ++ __ verify_method_ptr(rm_method); ++ jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry); ++ ++ if (iid == vmIntrinsics::_linkToInterface) { ++ __ bind(L_incompatible_class_change_error); ++ address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry(); ++ __ jmp(icce_entry, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++#ifndef PRODUCT ++void trace_method_handle_stub(const char* adaptername, ++ oop mh, ++ intptr_t* saved_regs, ++ intptr_t* entry_sp) { ++ // called as a leaf from native code: do not block the JVM! ++ bool has_mh = (strstr(adaptername, "/static") == NULL && ++ strstr(adaptername, "linkTo") == NULL); // static linkers don't have MH ++ const char* mh_reg_name = has_mh ? "s7_mh" : "s7"; ++ tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT, ++ adaptername, mh_reg_name, ++ p2i(mh), p2i(entry_sp)); ++ ++ if (Verbose) { ++ tty->print_cr("Registers:"); ++ const int saved_regs_count = RegisterImpl::number_of_registers; ++ for (int i = 0; i < saved_regs_count; i++) { ++ Register r = as_Register(i); ++ // The registers are stored in reverse order on the stack (by pusha). ++ tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]); ++ if ((i + 1) % 4 == 0) { ++ tty->cr(); ++ } else { ++ tty->print(", "); ++ } ++ } ++ tty->cr(); ++ ++ { ++ // dumping last frame with frame::describe ++ ++ JavaThread* p = JavaThread::active(); ++ ++ ResourceMark rm; ++ PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here ++ FrameValues values; ++ ++ // Note: We want to allow trace_method_handle from any call site. ++ // While trace_method_handle creates a frame, it may be entered ++ // without a PC on the stack top (e.g. not just after a call). ++ // Walking that frame could lead to failures due to that invalid PC. ++ // => carefully detect that frame when doing the stack walking ++ ++ // Current C frame ++ frame cur_frame = os::current_frame(); ++ ++ // Robust search of trace_calling_frame (independant of inlining). ++ // Assumes saved_regs comes from a pusha in the trace_calling_frame. ++ assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?"); ++ frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame); ++ while (trace_calling_frame.fp() < saved_regs) { ++ trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame); ++ } ++ ++ // safely create a frame and call frame::describe ++ intptr_t *dump_sp = trace_calling_frame.sender_sp(); ++ intptr_t *dump_fp = trace_calling_frame.link(); ++ ++ bool walkable = has_mh; // whether the traced frame shoud be walkable ++ ++ if (walkable) { ++ // The previous definition of walkable may have to be refined ++ // if new call sites cause the next frame constructor to start ++ // failing. Alternatively, frame constructors could be ++ // modified to support the current or future non walkable ++ // frames (but this is more intrusive and is not considered as ++ // part of this RFE, which will instead use a simpler output). ++ frame dump_frame = frame(dump_sp, dump_fp); ++ dump_frame.describe(values, 1); ++ } else { ++ // Stack may not be walkable (invalid PC above FP): ++ // Add descriptions without building a Java frame to avoid issues ++ values.describe(-1, dump_fp, "fp for #1 "); ++ values.describe(-1, dump_sp, "sp for #1"); ++ } ++ values.describe(-1, entry_sp, "raw top of stack"); ++ ++ tty->print_cr("Stack layout:"); ++ values.print(p); ++ } ++ if (has_mh && oopDesc::is_oop(mh)) { ++ mh->print(); ++ if (java_lang_invoke_MethodHandle::is_instance(mh)) { ++ if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0) ++ java_lang_invoke_MethodHandle::form(mh)->print(); ++ } ++ } ++ } ++} ++ ++// The stub wraps the arguments in a struct on the stack to avoid ++// dealing with the different calling conventions for passing 6 ++// arguments. ++struct MethodHandleStubArguments { ++ const char* adaptername; ++ oopDesc* mh; ++ intptr_t* saved_regs; ++ intptr_t* entry_sp; ++}; ++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) { ++ trace_method_handle_stub(args->adaptername, ++ args->mh, ++ args->saved_regs, ++ args->entry_sp); ++} ++ ++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) { ++} ++#endif //PRODUCT +diff --git a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp +new file mode 100644 +index 0000000000..03b65fc8ef +--- /dev/null ++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp +@@ -0,0 +1,62 @@ ++/* ++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// Platform-specific definitions for method handles. ++// These definitions are inlined into class MethodHandles. ++ ++// Adapters ++enum /* platform_dependent_constants */ { ++ adapter_code_size = 32000 DEBUG_ONLY(+ 150000) ++}; ++ ++// Additional helper methods for MethodHandles code generation: ++public: ++ static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg); ++ ++ static void verify_klass(MacroAssembler* _masm, ++ Register obj, SystemDictionary::WKID klass_id, ++ const char* error_message = "wrong klass") NOT_DEBUG_RETURN; ++ ++ static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) { ++ verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle), ++ "reference is a MH"); ++ } ++ ++ static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN; ++ ++ // Similar to InterpreterMacroAssembler::jump_from_interpreted. ++ // Takes care of special dispatch from single stepping too. ++ static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp, ++ bool for_compiler_entry); ++ ++ static void jump_to_lambda_form(MacroAssembler* _masm, ++ Register recv, Register method_temp, ++ Register temp2, ++ bool for_compiler_entry); ++ ++ static Register saved_last_sp_register() { ++ // Should be in sharedRuntime, not here. ++ return I29; ++ } +diff --git a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad +new file mode 100644 +index 0000000000..3563bbe0e5 +--- /dev/null ++++ b/src/hotspot/cpu/mips/mips.ad +@@ -0,0 +1,25 @@ ++// ++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ +diff --git a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad +new file mode 100644 +index 0000000000..b4acbd83f7 +--- /dev/null ++++ b/src/hotspot/cpu/mips/mips_64.ad +@@ -0,0 +1,12243 @@ ++// ++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++// ++// This code is free software; you can redistribute it and/or modify it ++// under the terms of the GNU General Public License version 2 only, as ++// published by the Free Software Foundation. ++// ++// This code is distributed in the hope that it will be useful, but WITHOUT ++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++// version 2 for more details (a copy is included in the LICENSE file that ++// accompanied this code). ++// ++// You should have received a copy of the GNU General Public License version ++// 2 along with this work; if not, write to the Free Software Foundation, ++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++// ++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++// or visit www.oracle.com if you need additional information or have any ++// questions. ++// ++// ++ ++// GodSon3 Architecture Description File ++ ++//----------REGISTER DEFINITION BLOCK------------------------------------------ ++// This information is used by the matcher and the register allocator to ++// describe individual registers and classes of registers within the target ++// archtecture. ++ ++// format: ++// reg_def name (call convention, c-call convention, ideal type, encoding); ++// call convention : ++// NS = No-Save ++// SOC = Save-On-Call ++// SOE = Save-On-Entry ++// AS = Always-Save ++// ideal type : ++// see opto/opcodes.hpp for more info ++// reg_class name (reg, ...); ++// alloc_class name (reg, ...); ++register %{ ++ ++// General Registers ++// Integer Registers ++ reg_def R0 ( NS, NS, Op_RegI, 0, VMRegImpl::Bad()); ++ reg_def AT ( NS, NS, Op_RegI, 1, AT->as_VMReg()); ++ reg_def AT_H ( NS, NS, Op_RegI, 1, AT->as_VMReg()->next()); ++ reg_def V0 (SOC, SOC, Op_RegI, 2, V0->as_VMReg()); ++ reg_def V0_H (SOC, SOC, Op_RegI, 2, V0->as_VMReg()->next()); ++ reg_def V1 (SOC, SOC, Op_RegI, 3, V1->as_VMReg()); ++ reg_def V1_H (SOC, SOC, Op_RegI, 3, V1->as_VMReg()->next()); ++ reg_def A0 (SOC, SOC, Op_RegI, 4, A0->as_VMReg()); ++ reg_def A0_H (SOC, SOC, Op_RegI, 4, A0->as_VMReg()->next()); ++ reg_def A1 (SOC, SOC, Op_RegI, 5, A1->as_VMReg()); ++ reg_def A1_H (SOC, SOC, Op_RegI, 5, A1->as_VMReg()->next()); ++ reg_def A2 (SOC, SOC, Op_RegI, 6, A2->as_VMReg()); ++ reg_def A2_H (SOC, SOC, Op_RegI, 6, A2->as_VMReg()->next()); ++ reg_def A3 (SOC, SOC, Op_RegI, 7, A3->as_VMReg()); ++ reg_def A3_H (SOC, SOC, Op_RegI, 7, A3->as_VMReg()->next()); ++ reg_def A4 (SOC, SOC, Op_RegI, 8, A4->as_VMReg()); ++ reg_def A4_H (SOC, SOC, Op_RegI, 8, A4->as_VMReg()->next()); ++ reg_def A5 (SOC, SOC, Op_RegI, 9, A5->as_VMReg()); ++ reg_def A5_H (SOC, SOC, Op_RegI, 9, A5->as_VMReg()->next()); ++ reg_def A6 (SOC, SOC, Op_RegI, 10, A6->as_VMReg()); ++ reg_def A6_H (SOC, SOC, Op_RegI, 10, A6->as_VMReg()->next()); ++ reg_def A7 (SOC, SOC, Op_RegI, 11, A7->as_VMReg()); ++ reg_def A7_H (SOC, SOC, Op_RegI, 11, A7->as_VMReg()->next()); ++ reg_def T0 (SOC, SOC, Op_RegI, 12, T0->as_VMReg()); ++ reg_def T0_H (SOC, SOC, Op_RegI, 12, T0->as_VMReg()->next()); ++ reg_def T1 (SOC, SOC, Op_RegI, 13, T1->as_VMReg()); ++ reg_def T1_H (SOC, SOC, Op_RegI, 13, T1->as_VMReg()->next()); ++ reg_def T2 (SOC, SOC, Op_RegI, 14, T2->as_VMReg()); ++ reg_def T2_H (SOC, SOC, Op_RegI, 14, T2->as_VMReg()->next()); ++ reg_def T3 (SOC, SOC, Op_RegI, 15, T3->as_VMReg()); ++ reg_def T3_H (SOC, SOC, Op_RegI, 15, T3->as_VMReg()->next()); ++ reg_def S0 (SOC, SOE, Op_RegI, 16, S0->as_VMReg()); ++ reg_def S0_H (SOC, SOE, Op_RegI, 16, S0->as_VMReg()->next()); ++ reg_def S1 (SOC, SOE, Op_RegI, 17, S1->as_VMReg()); ++ reg_def S1_H (SOC, SOE, Op_RegI, 17, S1->as_VMReg()->next()); ++ reg_def S2 (SOC, SOE, Op_RegI, 18, S2->as_VMReg()); ++ reg_def S2_H (SOC, SOE, Op_RegI, 18, S2->as_VMReg()->next()); ++ reg_def S3 (SOC, SOE, Op_RegI, 19, S3->as_VMReg()); ++ reg_def S3_H (SOC, SOE, Op_RegI, 19, S3->as_VMReg()->next()); ++ reg_def S4 (SOC, SOE, Op_RegI, 20, S4->as_VMReg()); ++ reg_def S4_H (SOC, SOE, Op_RegI, 20, S4->as_VMReg()->next()); ++ reg_def S5 (SOC, SOE, Op_RegI, 21, S5->as_VMReg()); ++ reg_def S5_H (SOC, SOE, Op_RegI, 21, S5->as_VMReg()->next()); ++ reg_def S6 (SOC, SOE, Op_RegI, 22, S6->as_VMReg()); ++ reg_def S6_H (SOC, SOE, Op_RegI, 22, S6->as_VMReg()->next()); ++ reg_def S7 (SOC, SOE, Op_RegI, 23, S7->as_VMReg()); ++ reg_def S7_H (SOC, SOE, Op_RegI, 23, S7->as_VMReg()->next()); ++ reg_def T8 (SOC, SOC, Op_RegI, 24, T8->as_VMReg()); ++ reg_def T8_H (SOC, SOC, Op_RegI, 24, T8->as_VMReg()->next()); ++ reg_def T9 (SOC, SOC, Op_RegI, 25, T9->as_VMReg()); ++ reg_def T9_H (SOC, SOC, Op_RegI, 25, T9->as_VMReg()->next()); ++ ++// Special Registers ++ reg_def K0 ( NS, NS, Op_RegI, 26, K0->as_VMReg()); ++ reg_def K1 ( NS, NS, Op_RegI, 27, K1->as_VMReg()); ++ reg_def GP ( NS, NS, Op_RegI, 28, GP->as_VMReg()); ++ reg_def GP_H ( NS, NS, Op_RegI, 28, GP->as_VMReg()->next()); ++ reg_def SP ( NS, NS, Op_RegI, 29, SP->as_VMReg()); ++ reg_def SP_H ( NS, NS, Op_RegI, 29, SP->as_VMReg()->next()); ++ reg_def FP ( NS, NS, Op_RegI, 30, FP->as_VMReg()); ++ reg_def FP_H ( NS, NS, Op_RegI, 30, FP->as_VMReg()->next()); ++ reg_def RA ( NS, NS, Op_RegI, 31, RA->as_VMReg()); ++ reg_def RA_H ( NS, NS, Op_RegI, 31, RA->as_VMReg()->next()); ++ ++// Floating registers. ++reg_def F0 ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()); ++reg_def F0_H ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()); ++reg_def F1 ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()); ++reg_def F1_H ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()); ++reg_def F2 ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()); ++reg_def F2_H ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()); ++reg_def F3 ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()); ++reg_def F3_H ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()); ++reg_def F4 ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()); ++reg_def F4_H ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()); ++reg_def F5 ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()); ++reg_def F5_H ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()); ++reg_def F6 ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()); ++reg_def F6_H ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()); ++reg_def F7 ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()); ++reg_def F7_H ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()); ++reg_def F8 ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()); ++reg_def F8_H ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()); ++reg_def F9 ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()); ++reg_def F9_H ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()); ++reg_def F10 ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()); ++reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()); ++reg_def F11 ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()); ++reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()); ++reg_def F12 ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()); ++reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()); ++reg_def F13 ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()); ++reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()); ++reg_def F14 ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()); ++reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()); ++reg_def F15 ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()); ++reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()); ++reg_def F16 ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()); ++reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()); ++reg_def F17 ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()); ++reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()); ++reg_def F18 ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()); ++reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()); ++reg_def F19 ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()); ++reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()); ++reg_def F20 ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()); ++reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()); ++reg_def F21 ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()); ++reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()); ++reg_def F22 ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()); ++reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()); ++reg_def F23 ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()); ++reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()); ++reg_def F24 ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()); ++reg_def F24_H ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()); ++reg_def F25 ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()); ++reg_def F25_H ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()); ++reg_def F26 ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()); ++reg_def F26_H ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()); ++reg_def F27 ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()); ++reg_def F27_H ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()); ++reg_def F28 ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()); ++reg_def F28_H ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()); ++reg_def F29 ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()); ++reg_def F29_H ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()); ++reg_def F30 ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()); ++reg_def F30_H ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()); ++reg_def F31 ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()); ++reg_def F31_H ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()); ++ ++ ++// ---------------------------- ++// Special Registers ++//S6 is used for get_thread(S6) ++//S5 is uesd for heapbase of compressed oop ++alloc_class chunk0( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S5, S5_H, ++ S6, S6_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T8, T8_H, ++ T9, T9_H, ++ T1, T1_H, // inline_cache_reg ++ V1, V1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ V0, V0_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H, ++ GP, GP_H ++ RA, RA_H, ++ SP, SP_H, // stack_pointer ++ FP, FP_H // frame_pointer ++ ); ++ ++alloc_class chunk1( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F19, F19_H, ++ F18, F18_H, ++ F17, F17_H, ++ F16, F16_H, ++ F15, F15_H, ++ F14, F14_H, ++ F13, F13_H, ++ F12, F12_H, ++ F29, F29_H, ++ F30, F30_H, ++ F31, F31_H); ++ ++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 ); ++reg_class s0_reg( S0 ); ++reg_class s1_reg( S1 ); ++reg_class s2_reg( S2 ); ++reg_class s3_reg( S3 ); ++reg_class s4_reg( S4 ); ++reg_class s5_reg( S5 ); ++reg_class s6_reg( S6 ); ++reg_class s7_reg( S7 ); ++ ++reg_class t_reg( T0, T1, T2, T3, T8, T9 ); ++reg_class t0_reg( T0 ); ++reg_class t1_reg( T1 ); ++reg_class t2_reg( T2 ); ++reg_class t3_reg( T3 ); ++reg_class t8_reg( T8 ); ++reg_class t9_reg( T9 ); ++ ++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 ); ++reg_class a0_reg( A0 ); ++reg_class a1_reg( A1 ); ++reg_class a2_reg( A2 ); ++reg_class a3_reg( A3 ); ++reg_class a4_reg( A4 ); ++reg_class a5_reg( A5 ); ++reg_class a6_reg( A6 ); ++reg_class a7_reg( A7 ); ++ ++reg_class v0_reg( V0 ); ++reg_class v1_reg( V1 ); ++ ++reg_class sp_reg( SP, SP_H ); ++reg_class fp_reg( FP, FP_H ); ++ ++reg_class v0_long_reg( V0, V0_H ); ++reg_class v1_long_reg( V1, V1_H ); ++reg_class a0_long_reg( A0, A0_H ); ++reg_class a1_long_reg( A1, A1_H ); ++reg_class a2_long_reg( A2, A2_H ); ++reg_class a3_long_reg( A3, A3_H ); ++reg_class a4_long_reg( A4, A4_H ); ++reg_class a5_long_reg( A5, A5_H ); ++reg_class a6_long_reg( A6, A6_H ); ++reg_class a7_long_reg( A7, A7_H ); ++reg_class t0_long_reg( T0, T0_H ); ++reg_class t1_long_reg( T1, T1_H ); ++reg_class t2_long_reg( T2, T2_H ); ++reg_class t3_long_reg( T3, T3_H ); ++reg_class t8_long_reg( T8, T8_H ); ++reg_class t9_long_reg( T9, T9_H ); ++reg_class s0_long_reg( S0, S0_H ); ++reg_class s1_long_reg( S1, S1_H ); ++reg_class s2_long_reg( S2, S2_H ); ++reg_class s3_long_reg( S3, S3_H ); ++reg_class s4_long_reg( S4, S4_H ); ++reg_class s5_long_reg( S5, S5_H ); ++reg_class s6_long_reg( S6, S6_H ); ++reg_class s7_long_reg( S7, S7_H ); ++ ++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 ); ++ ++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 ); ++ ++reg_class p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class no_T8_p_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++reg_class long_reg( ++ S7, S7_H, ++ S0, S0_H, ++ S1, S1_H, ++ S2, S2_H, ++ S4, S4_H, ++ S3, S3_H, ++ T8, T8_H, ++ T2, T2_H, ++ T3, T3_H, ++ T1, T1_H, ++ A7, A7_H, ++ A6, A6_H, ++ A5, A5_H, ++ A4, A4_H, ++ A3, A3_H, ++ A2, A2_H, ++ A1, A1_H, ++ A0, A0_H, ++ T0, T0_H ++ ); ++ ++ ++// Floating point registers. ++// F31 are not used as temporary registers in D2I ++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31); ++reg_class dbl_reg( F0, F0_H, ++ F1, F1_H, ++ F2, F2_H, ++ F3, F3_H, ++ F4, F4_H, ++ F5, F5_H, ++ F6, F6_H, ++ F7, F7_H, ++ F8, F8_H, ++ F9, F9_H, ++ F10, F10_H, ++ F11, F11_H, ++ F12, F12_H, ++ F13, F13_H, ++ F14, F14_H, ++ F15, F15_H, ++ F16, F16_H, ++ F17, F17_H, ++ F18, F18_H, ++ F19, F19_H, ++ F20, F20_H, ++ F21, F21_H, ++ F22, F22_H, ++ F23, F23_H, ++ F24, F24_H, ++ F25, F25_H, ++ F26, F26_H, ++ F27, F27_H, ++ F28, F28_H, ++ F29, F29_H, ++ F31, F31_H); ++ ++reg_class flt_arg0( F12 ); ++reg_class dbl_arg0( F12, F12_H ); ++reg_class dbl_arg1( F14, F14_H ); ++ ++%} ++ ++//----------DEFINITION BLOCK--------------------------------------------------- ++// Define name --> value mappings to inform the ADLC of an integer valued name ++// Current support includes integer values in the range [0, 0x7FFFFFFF] ++// Format: ++// int_def ( , ); ++// Generated Code in ad_.hpp ++// #define () ++// // value == ++// Generated code in ad_.cpp adlc_verification() ++// assert( == , "Expect () to equal "); ++// ++definitions %{ ++ int_def DEFAULT_COST ( 100, 100); ++ int_def HUGE_COST (1000000, 1000000); ++ ++ // Memory refs are twice as expensive as run-of-the-mill. ++ int_def MEMORY_REF_COST ( 200, DEFAULT_COST * 2); ++ ++ // Branches are even more expensive. ++ int_def BRANCH_COST ( 300, DEFAULT_COST * 3); ++ // we use jr instruction to construct call, so more expensive ++ int_def CALL_COST ( 500, DEFAULT_COST * 5); ++/* ++ int_def EQUAL ( 1, 1 ); ++ int_def NOT_EQUAL ( 2, 2 ); ++ int_def GREATER ( 3, 3 ); ++ int_def GREATER_EQUAL ( 4, 4 ); ++ int_def LESS ( 5, 5 ); ++ int_def LESS_EQUAL ( 6, 6 ); ++*/ ++%} ++ ++ ++ ++//----------SOURCE BLOCK------------------------------------------------------- ++// This is a block of C++ code which provides values, functions, and ++// definitions necessary in the rest of the architecture description ++ ++source_hpp %{ ++// Header information of the source block. ++// Method declarations/definitions which are used outside ++// the ad-scope can conveniently be defined here. ++// ++// To keep related declarations/definitions/uses close together, ++// we switch between source %{ }% and source_hpp %{ }% freely as needed. ++ ++class CallStubImpl { ++ ++ //-------------------------------------------------------------- ++ //---< Used for optimization in Compile::shorten_branches >--- ++ //-------------------------------------------------------------- ++ ++ public: ++ // Size of call trampoline stub. ++ static uint size_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++ ++ // number of relocations needed by a call trampoline stub ++ static uint reloc_call_trampoline() { ++ return 0; // no call trampolines on this platform ++ } ++}; ++ ++class HandlerImpl { ++ ++ public: ++ ++ static int emit_exception_handler(CodeBuffer &cbuf); ++ static int emit_deopt_handler(CodeBuffer& cbuf); ++ ++ static uint size_exception_handler() { ++ // NativeCall instruction size is the same as NativeJump. ++ // exception handler starts out as jump and can be patched to ++ // a call be deoptimization. (4932387) ++ // Note that this value is also credited (in output.cpp) to ++ // the size of the code section. ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++ ++ static uint size_deopt_handler() { ++ int size = NativeCall::instruction_size; ++ const uintx m = 16 - 1; ++ return mask_bits(size + m, ~m); ++ //return round_to(size, 16); ++ } ++}; ++ ++%} // end source_hpp ++ ++source %{ ++ ++#define NO_INDEX 0 ++#define RELOC_IMM64 Assembler::imm_operand ++#define RELOC_DISP32 Assembler::disp32_operand ++ ++ ++#define __ _masm. ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++ ++// Emit exception handler code. ++// Stuff framesize into a register and call a VM stub routine. ++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_exception_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_exception_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_exception_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++// Emit deopt handler code. ++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) { ++ // Note that the code buffer's insts_mark is always relative to insts. ++ // That's why we must use the macroassembler to generate a handler. ++ MacroAssembler _masm(&cbuf); ++ address base = __ start_a_stub(size_deopt_handler()); ++ if (base == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return 0; // CodeBuffer::expand failed ++ } ++ ++ int offset = __ offset(); ++ ++ __ block_comment("; emit_deopt_handler"); ++ ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call(SharedRuntime::deopt_blob()->unpack()); ++ __ align(16); ++ assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow"); ++ __ end_a_stub(); ++ return offset; ++} ++ ++ ++const bool Matcher::match_rule_supported(int opcode) { ++ if (!has_match_rule(opcode)) ++ return false; ++ ++ switch (opcode) { ++ //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz. ++ case Op_CountLeadingZerosI: ++ case Op_CountLeadingZerosL: ++ if (!UseCountLeadingZerosInstructionMIPS64) ++ return false; ++ break; ++ case Op_CountTrailingZerosI: ++ case Op_CountTrailingZerosL: ++ if (!UseCountTrailingZerosInstructionMIPS64) ++ return false; ++ break; ++ } ++ ++ return true; // Per default match rules are supported. ++} ++ ++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) { ++ // TODO ++ // identify extra cases that we might want to provide match rules for ++ // e.g. Op_ vector nodes and other intrinsics while guarding with vlen ++ bool ret_value = match_rule_supported(opcode); ++ // Add rules here. ++ ++ return ret_value; // Per default match rules are supported. ++} ++ ++const bool Matcher::has_predicated_vectors(void) { ++ return false; ++} ++ ++const int Matcher::float_pressure(int default_pressure_threshold) { ++ Unimplemented(); ++ return default_pressure_threshold; ++} ++ ++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) { ++ int offs = offset - br_size + 4; ++ // To be conservative on MIPS ++ // branch node should be end with: ++ // branch inst ++ // delay slot ++ const int safety_zone = 3 * BytesPerInstWord; ++ return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2); ++} ++ ++ ++// No additional cost for CMOVL. ++const int Matcher::long_cmove_cost() { return 0; } ++ ++// No CMOVF/CMOVD with SSE2 ++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; } ++ ++// Does the CPU require late expand (see block.cpp for description of late expand)? ++const bool Matcher::require_postalloc_expand = false; ++ ++// Do we need to mask the count passed to shift instructions or does ++// the cpu only look at the lower 5/6 bits anyway? ++const bool Matcher::need_masked_shift_count = false; ++ ++bool Matcher::narrow_oop_use_complex_address() { ++ assert(UseCompressedOops, "only for compressed oops code"); ++ return false; ++} ++ ++bool Matcher::narrow_klass_use_complex_address() { ++ assert(UseCompressedClassPointers, "only for compressed klass code"); ++ return false; ++} ++ ++bool Matcher::const_oop_prefer_decode() { ++ // Prefer ConN+DecodeN over ConP. ++ return true; ++} ++ ++bool Matcher::const_klass_prefer_decode() { ++ // TODO: Either support matching DecodeNKlass (heap-based) in operand ++ // or condisider the following: ++ // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode. ++ //return Universe::narrow_klass_base() == NULL; ++ return true; ++} ++ ++// This is UltraSparc specific, true just means we have fast l2f conversion ++const bool Matcher::convL2FSupported(void) { ++ return true; ++} ++ ++// Max vector size in bytes. 0 if not supported. ++const int Matcher::vector_width_in_bytes(BasicType bt) { ++ if (MaxVectorSize == 0) ++ return 0; ++ assert(MaxVectorSize == 8, ""); ++ return 8; ++} ++ ++// Vector ideal reg ++const uint Matcher::vector_ideal_reg(int size) { ++ assert(MaxVectorSize == 8, ""); ++ switch(size) { ++ case 8: return Op_VecD; ++ } ++ ShouldNotReachHere(); ++ return 0; ++} ++ ++// Only lowest bits of xmm reg are used for vector shift count. ++const uint Matcher::vector_shift_count_ideal_reg(int size) { ++ fatal("vector shift is not supported"); ++ return Node::NotAMachineReg; ++} ++ ++ ++const bool Matcher::convi2l_type_required = true; ++ ++// Should the Matcher clone shifts on addressing modes, expecting them ++// to be subsumed into complex addressing expressions or compute them ++// into registers? ++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) { ++ return clone_base_plus_offset_address(m, mstack, address_visited); ++} ++ ++void Compile::reshape_address(AddPNode* addp) { ++} ++ ++// Limits on vector size (number of elements) loaded into vector. ++const int Matcher::max_vector_size(const BasicType bt) { ++ assert(is_java_primitive(bt), "only primitive type vectors"); ++ return vector_width_in_bytes(bt)/type2aelembytes(bt); ++} ++ ++const int Matcher::min_vector_size(const BasicType bt) { ++ return max_vector_size(bt); // Same as max. ++} ++ ++// MIPS supports misaligned vectors store/load? FIXME ++const bool Matcher::misaligned_vectors_ok() { ++ return false; ++ //return !AlignVector; // can be changed by flag ++} ++ ++// Register for DIVI projection of divmodI ++RegMask Matcher::divI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for MODI projection of divmodI ++RegMask Matcher::modI_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++// Register for DIVL projection of divmodL ++RegMask Matcher::divL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++int Matcher::regnum_to_fpu_offset(int regnum) { ++ return regnum - 32; // The FP registers are in the second chunk ++} ++ ++ ++const bool Matcher::isSimpleConstant64(jlong value) { ++ // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?. ++ return true; ++} ++ ++ ++// Return whether or not this register is ever used as an argument. This ++// function is used on startup to build the trampoline stubs in generateOptoStub. ++// Registers not mentioned will be killed by the VM call in the trampoline, and ++// arguments in those registers not be available to the callee. ++bool Matcher::can_be_java_arg( int reg ) { ++ // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention() ++ if ( reg == T0_num || reg == T0_H_num ++ || reg == A0_num || reg == A0_H_num ++ || reg == A1_num || reg == A1_H_num ++ || reg == A2_num || reg == A2_H_num ++ || reg == A3_num || reg == A3_H_num ++ || reg == A4_num || reg == A4_H_num ++ || reg == A5_num || reg == A5_H_num ++ || reg == A6_num || reg == A6_H_num ++ || reg == A7_num || reg == A7_H_num ) ++ return true; ++ ++ if ( reg == F12_num || reg == F12_H_num ++ || reg == F13_num || reg == F13_H_num ++ || reg == F14_num || reg == F14_H_num ++ || reg == F15_num || reg == F15_H_num ++ || reg == F16_num || reg == F16_H_num ++ || reg == F17_num || reg == F17_H_num ++ || reg == F18_num || reg == F18_H_num ++ || reg == F19_num || reg == F19_H_num ) ++ return true; ++ ++ return false; ++} ++ ++bool Matcher::is_spillable_arg( int reg ) { ++ return can_be_java_arg(reg); ++} ++ ++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) { ++ return false; ++} ++ ++// Register for MODL projection of divmodL ++RegMask Matcher::modL_proj_mask() { ++ ShouldNotReachHere(); ++ return RegMask(); ++} ++ ++const RegMask Matcher::method_handle_invoke_SP_save_mask() { ++ return FP_REG_mask(); ++} ++ ++// MIPS doesn't support AES intrinsics ++const bool Matcher::pass_original_key_for_aes() { ++ return false; ++} ++ ++int CallStaticJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallLeafDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++int CallRuntimeDirectNode::compute_padding(int current_offset) const { ++ const uintx m = alignment_required() - 1; ++ return mask_bits(current_offset + m, ~m) - current_offset; ++} ++ ++// If CPU can load and store mis-aligned doubles directly then no fixup is ++// needed. Else we split the double into 2 integer pieces and move it ++// piece-by-piece. Only happens when passing doubles into C code as the ++// Java calling convention forces doubles to be aligned. ++const bool Matcher::misaligned_doubles_ok = false; ++// Do floats take an entire double register or just half? ++//const bool Matcher::float_in_double = true; ++bool Matcher::float_in_double() { return false; } ++// Do ints take an entire long register or just half? ++const bool Matcher::int_in_long = true; ++// Is it better to copy float constants, or load them directly from memory? ++// Intel can load a float constant from a direct address, requiring no ++// extra registers. Most RISCs will have to materialize an address into a ++// register first, so they would do better to copy the constant from stack. ++const bool Matcher::rematerialize_float_constants = false; ++// Advertise here if the CPU requires explicit rounding operations ++// to implement the UseStrictFP mode. ++const bool Matcher::strict_fp_requires_explicit_rounding = false; ++// false => size gets scaled to BytesPerLong, ok. ++const bool Matcher::init_array_count_is_in_bytes = false; ++ ++// Indicate if the safepoint node needs the polling page as an input. ++// it does if the polling page is more than disp32 away. ++bool SafePointNode::needs_polling_address_input() { ++ return SafepointMechanism::uses_thread_local_poll(); ++} ++ ++#ifndef PRODUCT ++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("BRK"); ++} ++#endif ++ ++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const { ++ MacroAssembler _masm(&cbuf); ++ __ brk(5); ++} ++ ++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++// !!!!! Special hack to get all type of calls to specify the byte offset ++// from the start of the call to the point where the return address ++// will point. ++int MachCallStaticJavaNode::ret_addr_offset() { ++ //lui ++ //ori ++ //nop ++ //nop ++ //jalr ++ //nop ++ return 24; ++} ++ ++int MachCallDynamicJavaNode::ret_addr_offset() { ++ //lui IC_Klass, ++ //ori IC_Klass, ++ //dsll IC_Klass ++ //ori IC_Klass ++ ++ //lui T9 ++ //ori T9 ++ //nop ++ //nop ++ //jalr T9 ++ //nop ++ return 4 * 4 + 4 * 6; ++} ++ ++//============================================================================= ++ ++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack ++enum RC { rc_bad, rc_int, rc_float, rc_stack }; ++static enum RC rc_class( OptoReg::Name reg ) { ++ if( !OptoReg::is_valid(reg) ) return rc_bad; ++ if (OptoReg::is_stack(reg)) return rc_stack; ++ VMReg r = OptoReg::as_VMReg(reg); ++ if (r->is_Register()) return rc_int; ++ assert(r->is_FloatRegister(), "must be"); ++ return rc_float; ++} ++ ++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const { ++ // Get registers to move ++ OptoReg::Name src_second = ra_->get_reg_second(in(1)); ++ OptoReg::Name src_first = ra_->get_reg_first(in(1)); ++ OptoReg::Name dst_second = ra_->get_reg_second(this ); ++ OptoReg::Name dst_first = ra_->get_reg_first(this ); ++ ++ enum RC src_second_rc = rc_class(src_second); ++ enum RC src_first_rc = rc_class(src_first); ++ enum RC dst_second_rc = rc_class(dst_second); ++ enum RC dst_first_rc = rc_class(dst_first); ++ ++ assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" ); ++ ++ // Generate spill code! ++ ++ if( src_first == dst_first && src_second == dst_second ) ++ return 0; // Self copy, no move ++ ++ if (src_first_rc == rc_stack) { ++ // mem -> ++ if (dst_first_rc == rc_stack) { ++ // mem -> mem ++ assert(src_second != dst_first, "overlap"); ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(AT, Address(SP, src_offset)); ++ __ sd(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t" ++ "sd AT, [SP + #%d]", ++ src_offset, dst_offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ // No pushl/popl, so: ++ int src_offset = ra_->reg2offset(src_first); ++ int dst_offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lw(AT, Address(SP, src_offset)); ++ __ sw(AT, Address(SP, dst_offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lw AT, [SP + #%d] spill 2\n\t" ++ "sw AT, [SP + #%d]\n\t", ++ src_offset, dst_offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // mem -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ld %s, [SP + #%d]\t# spill 3", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++ else ++ __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ if (this->ideal_reg() == Op_RegI) ++ st->print("lw %s, [SP + #%d]\t# spill 4", ++ Matcher::regName[dst_first], ++ offset); ++ else ++ st->print("lwu %s, [SP + #%d]\t# spill 5", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // mem-> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("ldc1 %s, [SP + #%d]\t# spill 6", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(src_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("lwc1 %s, [SP + #%d]\t# spill 7", ++ Matcher::regName[dst_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_int) { ++ // gpr -> ++ if (dst_first_rc == rc_stack) { ++ // gpr -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sd %s, [SP + #%d] # spill 8", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sw %s, [SP + #%d]\t# spill 9", ++ Matcher::regName[src_first], offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // gpr -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ move(as_Register(Matcher::_regEncode[dst_first]), ++ as_Register(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(64bit) %s <-- %s\t# spill 10", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ if (this->ideal_reg() == Op_RegI) ++ __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first])); ++ else ++ __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("move(32-bit) %s <-- %s\t# spill 11", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ return 0; ++ } ++ } else if (dst_first_rc == rc_float) { ++ // gpr -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmtc1 %s, %s\t# spill 12", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mtc1 %s, %s\t# spill 13", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } else if (src_first_rc == rc_float) { ++ // xmm -> ++ if (dst_first_rc == rc_stack) { ++ // xmm -> mem ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) ); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("sdc1 %s, [SP + #%d]\t# spill 14", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ int offset = ra_->reg2offset(dst_first); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset)); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("swc1 %s, [SP + #%d]\t# spill 15", ++ Matcher::regName[src_first], ++ offset); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_int) { ++ // xmm -> gpr ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("dmfc1 %s, %s\t# spill 16", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mfc1 %s, %s\t# spill 17", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } else if (dst_first_rc == rc_float) { ++ // xmm -> xmm ++ if ((src_first & 1) == 0 && src_first + 1 == src_second && ++ (dst_first & 1) == 0 && dst_first + 1 == dst_second) { ++ // 64-bit ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_d %s <-- %s\t# spill 18", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } else { ++ // 32-bit ++ assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform"); ++ assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform"); ++ if (cbuf) { ++ MacroAssembler _masm(cbuf); ++ __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first])); ++#ifndef PRODUCT ++ } else { ++ st->print("\n\t"); ++ st->print("mov_s %s <-- %s\t# spill 19", ++ Matcher::regName[dst_first], ++ Matcher::regName[src_first]); ++#endif ++ } ++ } ++ return 0; ++ } ++ } ++ ++ assert(0," foo "); ++ Unimplemented(); ++ return 0; ++} ++ ++#ifndef PRODUCT ++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ implementation( NULL, ra_, false, st ); ++} ++#endif ++ ++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ implementation( &cbuf, ra_, false, NULL ); ++} ++ ++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile *C = ra_->C; ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ st->print_cr("daddiu SP, SP, %d # Rlease stack @ MachEpilogNode", framesize); ++ st->print("\t"); ++ if (UseLEXT1) { ++ st->print_cr("gslq RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2); ++ } else { ++ st->print_cr("ld RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize); ++ st->print("\t"); ++ st->print_cr("ld FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2); ++ } ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ st->print("\t"); ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ st->print_cr("ld AT, poll_offset[thread] #polling_page_address\n\t" ++ "lw AT, [AT]\t" ++ "# Safepoint: poll for GC"); ++ } else { ++ st->print_cr("Poll Safepoint # MachEpilogNode"); ++ } ++ } ++} ++#endif ++ ++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile *C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ int framesize = C->frame_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int"); ++ ++ if (UseLEXT1) { ++ __ gslq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ ld(RA, SP, framesize - wordSize ); ++ __ ld(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(SP, SP, framesize); ++ ++ if (StackReservedPages > 0 && C->has_reserved_stack_access()) { ++ __ reserved_stack_check(); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if( do_polling() && C->is_method_compilation() ) { ++ if (SafepointMechanism::uses_thread_local_poll()) { ++ __ ld(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } else { ++ __ set64(AT, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_return_type); ++ __ lw(AT, AT, 0); ++ } ++ } ++} ++ ++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way fujie debug ++} ++ ++int MachEpilogNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++const Pipeline * MachEpilogNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++int MachEpilogNode::safepoint_offset() const { return 0; } ++ ++//============================================================================= ++ ++#ifndef PRODUCT ++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_reg_first(this); ++ st->print("ADDI %s, SP, %d @BoxLockNode",Matcher::regName[reg],offset); ++} ++#endif ++ ++ ++uint BoxLockNode::size(PhaseRegAlloc *ra_) const { ++ return 4; ++} ++ ++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int offset = ra_->reg2offset(in_RegMask(0).find_first_elem()); ++ int reg = ra_->get_encode(this); ++ ++ __ addiu(as_Register(reg), SP, offset); ++} ++ ++ ++//static int sizeof_FFree_Float_Stack_All = -1; ++ ++int MachCallRuntimeNode::ret_addr_offset() { ++ //lui ++ //ori ++ //dsll ++ //ori ++ //jalr ++ //nop ++ assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()"); ++ return NativeCall::instruction_size; ++} ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const { ++ st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count); ++} ++#endif ++ ++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const { ++ MacroAssembler _masm(&cbuf); ++ int i = 0; ++ for(i = 0; i < _count; i++) ++ __ nop(); ++} ++ ++uint MachNopNode::size(PhaseRegAlloc *) const { ++ return 4 * _count; ++} ++const Pipeline* MachNopNode::pipeline() const { ++ return MachNode::pipeline_class(); ++} ++ ++//============================================================================= ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ st->print_cr("load_klass(T9, T0)"); ++ st->print_cr("\tbeq(T9, iCache, L)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)"); ++ st->print_cr("\tnop"); ++ st->print_cr("\tnop"); ++ st->print_cr(" L:"); ++} ++#endif ++ ++ ++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ MacroAssembler _masm(&cbuf); ++ int ic_reg = Matcher::inline_cache_reg_encode(); ++ Label L; ++ Register receiver = T0; ++ Register iCache = as_Register(ic_reg); ++ ++ __ load_klass(T9, receiver); ++ __ beq(T9, iCache, L); ++ __ delayed()->nop(); ++ __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++ ++uint MachUEPNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); ++} ++ ++ ++ ++//============================================================================= ++ ++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask(); ++ ++int Compile::ConstantTable::calculate_table_base_offset() const { ++ return 0; // absolute addressing, no offset ++} ++ ++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; } ++void MachConstantBaseNode::postalloc_expand(GrowableArray *nodes, PhaseRegAlloc *ra_) { ++ ShouldNotReachHere(); ++} ++ ++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const { ++ Compile* C = ra_->C; ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ MacroAssembler _masm(&cbuf); ++ ++ Register Rtoc = as_Register(ra_->get_encode(this)); ++ CodeSection* consts_section = __ code()->consts(); ++ int consts_size = consts_section->align_at_start(consts_section->size()); ++ assert(constant_table.size() == consts_size, "must be equal"); ++ ++ if (consts_section->size()) { ++ // Materialize the constant table base. ++ address baseaddr = consts_section->start() + -(constant_table.table_base_offset()); ++ // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr); ++ __ relocate(relocInfo::internal_word_type); ++ __ patchable_set48(Rtoc, (long)baseaddr); ++ } ++} ++ ++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const { ++ // patchable_set48 (4 insts) ++ return 4 * 4; ++} ++ ++#ifndef PRODUCT ++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const { ++ Register r = as_Register(ra_->get_encode(this)); ++ st->print("patchable_set48 %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name()); ++} ++#endif ++ ++ ++//============================================================================= ++#ifndef PRODUCT ++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const { ++ Compile* C = ra_->C; ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ ++ // Calls to C2R adapters often do not accept exceptional returns. ++ // We require that their callers must bang for them. But be careful, because ++ // some VM calls (such as call site linkage) can use several kilobytes of ++ // stack. But the stack safety zone should account for that. ++ // See bugs 4446381, 4468289, 4497237. ++ if (C->need_stack_bang(bangsize)) { ++ st->print_cr("# stack bang"); st->print("\t"); ++ } ++ if (UseLEXT1) { ++ st->print("gssq RA, FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } else { ++ st->print("sd RA, %d(SP) @ MachPrologNode\n\t", -wordSize); ++ st->print("sd FP, %d(SP) @ MachPrologNode\n\t", -wordSize*2); ++ } ++ st->print("daddiu FP, SP, -%d \n\t", wordSize*2); ++ st->print("daddiu SP, SP, -%d \t",framesize); ++} ++#endif ++ ++ ++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { ++ Compile* C = ra_->C; ++ MacroAssembler _masm(&cbuf); ++ ++ int framesize = C->frame_size_in_bytes(); ++ int bangsize = C->bang_size_in_bytes(); ++ ++ assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned"); ++ assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int"); ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ if (C->need_stack_bang(bangsize)) { ++ __ generate_stack_overflow_check(bangsize); ++ } ++ ++ __ daddiu(SP, SP, -framesize); ++ if (UseLEXT1) { ++ __ gssq(RA, FP, SP, framesize - wordSize * 2); ++ } else { ++ __ sd(RA, SP, framesize - wordSize); ++ __ sd(FP, SP, framesize - wordSize * 2); ++ } ++ __ daddiu(FP, SP, framesize - wordSize * 2); ++ ++ C->set_frame_complete(cbuf.insts_size()); ++ if (C->has_mach_constant_base_node()) { ++ // NOTE: We set the table base offset here because users might be ++ // emitted before MachConstantBaseNode. ++ Compile::ConstantTable& constant_table = C->constant_table(); ++ constant_table.set_table_base_offset(constant_table.calculate_table_base_offset()); ++ } ++} ++ ++ ++uint MachPrologNode::size(PhaseRegAlloc *ra_) const { ++ return MachNode::size(ra_); // too many variables; just compute it the hard way ++} ++ ++int MachPrologNode::reloc() const { ++ return 0; // a large enough number ++} ++ ++%} ++ ++//----------ENCODING BLOCK----------------------------------------------------- ++// This block specifies the encoding classes used by the compiler to output ++// byte streams. Encoding classes generate functions which are called by ++// Machine Instruction Nodes in order to generate the bit encoding of the ++// instruction. Operands specify their base encoding interface with the ++// interface keyword. There are currently supported four interfaces, ++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER. REG_INTER causes an ++// operand to generate a function which returns its register number when ++// queried. CONST_INTER causes an operand to generate a function which ++// returns the value of the constant when queried. MEMORY_INTER causes an ++// operand to generate four functions which return the Base Register, the ++// Index Register, the Scale Value, and the Offset Value of the operand when ++// queried. COND_INTER causes an operand to generate six functions which ++// return the encoding code (ie - encoding bits for the instruction) ++// associated with each basic boolean condition for a conditional instruction. ++// Instructions specify two basic values for encoding. They use the ++// ins_encode keyword to specify their encoding class (which must be one of ++// the class names specified in the encoding block), and they use the ++// opcode keyword to specify, in order, their primary, secondary, and ++// tertiary opcode. Only the opcode sections which a particular instruction ++// needs for encoding need to be specified. ++encode %{ ++ ++ enc_class Java_To_Runtime (method meth) %{ // CALL Java_To_Runtime, Java_To_Runtime_Leaf ++ MacroAssembler _masm(&cbuf); ++ // This is the instruction starting address for relocation info. ++ __ block_comment("Java_To_Runtime"); ++ cbuf.set_insts_mark(); ++ __ relocate(relocInfo::runtime_call_type); ++ __ patchable_call((address)$meth$$method); ++ %} ++ ++ enc_class Java_Static_Call (method meth) %{ // JAVA STATIC CALL ++ // CALL to fixup routine. Fixup routine uses ScopeDesc info to determine ++ // who we intended to call. ++ MacroAssembler _masm(&cbuf); ++ address addr = (address)$meth$$method; ++ address call; ++ __ block_comment("Java_Static_Call"); ++ ++ if ( !_method ) { ++ // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap. ++ call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf); ++ } else { ++ int method_index = resolved_method_index(cbuf); ++ RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index) ++ : static_call_Relocation::spec(method_index); ++ call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf); ++ ++ // Emit stub for static call ++ address stub = CompiledStaticCall::emit_to_interp_stub(cbuf); ++ if (stub == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ } ++ if (call == NULL) { ++ ciEnv::current()->record_failure("CodeCache is full"); ++ return; ++ } ++ %} ++ ++ ++ // ++ // [Ref: LIR_Assembler::ic_call() ] ++ // ++ enc_class Java_Dynamic_Call (method meth) %{ // JAVA DYNAMIC CALL ++ MacroAssembler _masm(&cbuf); ++ __ block_comment("Java_Dynamic_Call"); ++ __ ic_call((address)$meth$$method, resolved_method_index(cbuf)); ++ %} ++ ++ ++ enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{ ++ Register result = $result$$Register; ++ Register sub = $sub$$Register; ++ Register super = $super$$Register; ++ Register length = $tmp$$Register; ++ Register tmp = T9; ++ Label miss; ++ ++ // result may be the same as sub ++ // 47c B40: # B21 B41 <- B20 Freq: 0.155379 ++ // 47c partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0 ++ // 4bc mov S2, NULL #@loadConP ++ // 4c0 beq S1, S2, B21 #@branchConP P=0.999999 C=-1.000000 ++ // ++ MacroAssembler _masm(&cbuf); ++ Label done; ++ __ check_klass_subtype_slow_path(sub, super, length, tmp, ++ NULL, &miss, ++ /*set_cond_codes:*/ true); ++ // Refer to X86_64's RDI ++ __ move(result, 0); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ __ bind(miss); ++ __ move(result, 1); ++ __ bind(done); ++ %} ++ ++%} ++ ++ ++//---------MIPS FRAME-------------------------------------------------------------- ++// Definition of frame structure and management information. ++// ++// S T A C K L A Y O U T Allocators stack-slot number ++// | (to get allocators register number ++// G Owned by | | v add SharedInfo::stack0) ++// r CALLER | | ++// o | +--------+ pad to even-align allocators stack-slot ++// w V | pad0 | numbers; owned by CALLER ++// t -----------+--------+----> Matcher::_in_arg_limit, unaligned ++// h ^ | in | 5 ++// | | args | 4 Holes in incoming args owned by SELF ++// | | old | | 3 ++// | | SP-+--------+----> Matcher::_old_SP, even aligned ++// v | | ret | 3 return address ++// Owned by +--------+ ++// Self | pad2 | 2 pad to align old SP ++// | +--------+ 1 ++// | | locks | 0 ++// | +--------+----> SharedInfo::stack0, even aligned ++// | | pad1 | 11 pad to align new SP ++// | +--------+ ++// | | | 10 ++// | | spills | 9 spills ++// V | | 8 (pad0 slot for callee) ++// -----------+--------+----> Matcher::_out_arg_limit, unaligned ++// ^ | out | 7 ++// | | args | 6 Holes in outgoing args owned by CALLEE ++// Owned by new | | ++// Callee SP-+--------+----> Matcher::_new_SP, even aligned ++// | | ++// ++// Note 1: Only region 8-11 is determined by the allocator. Region 0-5 is ++// known from SELF's arguments and the Java calling convention. ++// Region 6-7 is determined per call site. ++// Note 2: If the calling convention leaves holes in the incoming argument ++// area, those holes are owned by SELF. Holes in the outgoing area ++// are owned by the CALLEE. Holes should not be nessecary in the ++// incoming area, as the Java calling convention is completely under ++// the control of the AD file. Doubles can be sorted and packed to ++// avoid holes. Holes in the outgoing arguments may be nessecary for ++// varargs C calling conventions. ++// Note 3: Region 0-3 is even aligned, with pad2 as needed. Region 3-5 is ++// even aligned with pad0 as needed. ++// Region 6 is even aligned. Region 6-7 is NOT even aligned; ++// region 6-11 is even aligned; it may be padded out more so that ++// the region from SP to FP meets the minimum stack alignment. ++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack ++// alignment. Region 11, pad1, may be dynamically extended so that ++// SP meets the minimum alignment. ++ ++ ++frame %{ ++ ++ stack_direction(TOWARDS_LOW); ++ ++ // These two registers define part of the calling convention ++ // between compiled code and the interpreter. ++ // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention ++ // for more information. ++ ++ inline_cache_reg(T1); // Inline Cache Register ++ interpreter_method_oop_reg(S3); // Method Oop Register when calling interpreter ++ ++ // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset] ++ cisc_spilling_operand_name(indOffset32); ++ ++ // Number of stack slots consumed by locking an object ++ // generate Compile::sync_stack_slots ++ sync_stack_slots(2); ++ ++ frame_pointer(SP); ++ ++ // Interpreter stores its frame pointer in a register which is ++ // stored to the stack by I2CAdaptors. ++ // I2CAdaptors convert from interpreted java to compiled java. ++ ++ interpreter_frame_pointer(FP); ++ ++ // generate Matcher::stack_alignment ++ stack_alignment(StackAlignmentInBytes); //wordSize = sizeof(char*); ++ ++ // Number of stack slots between incoming argument block and the start of ++ // a new frame. The PROLOG must add this many slots to the stack. The ++ // EPILOG must remove this many slots. ++ in_preserve_stack_slots(4); //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp ++ ++ // Number of outgoing stack slots killed above the out_preserve_stack_slots ++ // for calls to C. Supports the var-args backing area for register parms. ++ varargs_C_out_slots_killed(0); ++ ++ // The after-PROLOG location of the return address. Location of ++ // return address specifies a type (REG or STACK) and a number ++ // representing the register number (i.e. - use a register name) or ++ // stack slot. ++ // Ret Addr is on stack in slot 0 if no locks or verification or alignment. ++ // Otherwise, it is above the locks and verification slot and alignment word ++ //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong)); ++ return_addr(REG RA); ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing) ++ // StartNode::calling_convention call this. ++ calling_convention %{ ++ SharedRuntime::java_calling_convention(sig_bt, regs, length, false); ++ %} ++ ++ ++ ++ ++ // Body of function which returns an integer array locating ++ // arguments either in registers or in stack slots. Passed an array ++ // of ideal registers called "sig" and a "length" count. Stack-slot ++ // offsets are based on outgoing arguments, i.e. a CALLER setting up ++ // arguments for a CALLEE. Incoming stack arguments are ++ // automatically biased by the preserve_stack_slots field above. ++ ++ ++ // SEE CallRuntimeNode::calling_convention for more information. ++ c_calling_convention %{ ++ (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length); ++ %} ++ ++ ++ // Location of C & interpreter return values ++ // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR. ++ // SEE Matcher::match. ++ c_return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num }; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++ // Location of return values ++ // register(s) contain(s) return value for Op_StartC2I and Op_Start. ++ // SEE Matcher::match. ++ ++ return_value %{ ++ assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" ); ++ /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */ ++ static int lo[Op_RegL+1] = { 0, 0, V0_num, V0_num, V0_num, F0_num, F0_num, V0_num }; ++ static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num, OptoReg::Bad, F0_H_num, V0_H_num}; ++ return OptoRegPair(hi[ideal_reg],lo[ideal_reg]); ++ %} ++ ++%} ++ ++//----------ATTRIBUTES--------------------------------------------------------- ++//----------Operand Attributes------------------------------------------------- ++op_attrib op_cost(0); // Required cost attribute ++ ++//----------Instruction Attributes--------------------------------------------- ++ins_attrib ins_cost(100); // Required cost attribute ++ins_attrib ins_size(32); // Required size attribute (in bits) ++ins_attrib ins_pc_relative(0); // Required PC Relative flag ++ins_attrib ins_short_branch(0); // Required flag: is this instruction a ++ // non-matching short branch variant of some ++ // long branch? ++ins_attrib ins_alignment(4); // Required alignment attribute (must be a power of 2) ++ // specifies the alignment that some part of the instruction (not ++ // necessarily the start) requires. If > 1, a compute_padding() ++ // function must be provided for the instruction ++ ++//----------OPERANDS----------------------------------------------------------- ++// Operand definitions must precede instruction definitions for correct parsing ++// in the ADLC because operands constitute user defined types which are used in ++// instruction definitions. ++ ++// Vectors ++operand vecD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(VecD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Flags register, used as output of compare instructions ++operand FlagsReg() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegFlags); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++//----------Simple Operands---------------------------------------------------- ++// TODO: Should we need to define some more special immediate number ? ++// Immediate Operands ++// Integer Immediate ++operand immI() %{ ++ match(ConI); ++ // TODO: should not match immI8 here LEE ++ match(immI8); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI8() %{ ++ predicate((-128 <= n->get_int()) && (n->get_int() <= 127)); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI16() %{ ++ predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M65536() %{ ++ predicate(n->get_int() == -65536); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for decrement ++operand immI_M1() %{ ++ predicate(n->get_int() == -1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for test vs zero ++operand immI_0() %{ ++ predicate(n->get_int() == 0); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for increment ++operand immI_1() %{ ++ predicate(n->get_int() == 1); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constants for increment ++operand immI_16() %{ ++ predicate(n->get_int() == 16); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_24() %{ ++ predicate(n->get_int() == 24); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for long shifts ++operand immI_32() %{ ++ predicate(n->get_int() == 32); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Constant for byte-wide masking ++operand immI_255() %{ ++ predicate(n->get_int() == 255); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_65535() %{ ++ predicate(n->get_int() == 65535); ++ match(ConI); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_MaxI() %{ ++ predicate(n->get_int() == 2147483647); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_M32767_32768() %{ ++ predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768)); ++ match(ConI); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Valid scale values for addressing modes ++operand immI_0_3() %{ ++ predicate(0 <= n->get_int() && (n->get_int() <= 3)); ++ match(ConI); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_31() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 31); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_32767() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 32767); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_0_65535() %{ ++ predicate(n->get_int() >= 0 && n->get_int() <= 65535); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immI_32_63() %{ ++ predicate(n->get_int() >= 32 && n->get_int() <= 63); ++ match(ConI); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive integer mask ++operand immI_nonneg_mask() %{ ++ predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1)); ++ match(ConI); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate ++operand immL() %{ ++ match(ConL); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 8-bit ++operand immL8() %{ ++ predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L); ++ match(ConL); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer for polling page ++operand immP_poll() %{ ++ predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page()); ++ match(ConP); ++ op_cost(5); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL16() %{ ++ predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate 32-bit signed ++operand immL32() %{ ++ predicate(n->get_long() == (int)(n->get_long())); ++ match(ConL); ++ ++ op_cost(15); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 3..6 zero ++operand immL_M121() %{ ++ predicate(n->get_long() == -121L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..2 zero ++operand immL_M8() %{ ++ predicate(n->get_long() == -8L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 1..2 zero ++operand immL_M7() %{ ++ predicate(n->get_long() == -7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 2 zero ++operand immL_M5() %{ ++ predicate(n->get_long() == -5L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// bit 0..1 zero ++operand immL_M4() %{ ++ predicate(n->get_long() == -4L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M1() %{ ++ predicate(n->get_long() == -1L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate zero ++operand immL_0() %{ ++ predicate(n->get_long() == 0L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_7() %{ ++ predicate(n->get_long() == 7L); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Long Immediate: low 32-bit mask ++operand immL_MaxUI() %{ ++ predicate(n->get_long() == 0xFFFFFFFFL); ++ match(ConL); ++ op_cost(20); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_M32767_32768() %{ ++ predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768)); ++ match(ConL); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immL_0_65535() %{ ++ predicate(n->get_long() >= 0 && n->get_long() <= 65535); ++ match(ConL); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Operand for non-negtive long mask ++operand immL_nonneg_mask() %{ ++ predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1)); ++ match(ConL); ++ ++ op_cost(0); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immP() %{ ++ match(ConP); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immP_0() %{ ++ predicate(n->get_ptr() == 0); ++ match(ConP); ++ op_cost(0); ++ ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate: 64-bit ++operand immP_no_oop_cheap() %{ ++ predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3)); ++ match(ConP); ++ ++ op_cost(5); ++ // formats are generated automatically for constants and base registers ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Pointer Immediate ++operand immN() %{ ++ match(ConN); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++operand immNKlass() %{ ++ match(ConNKlass); ++ ++ op_cost(10); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// NULL Pointer Immediate ++operand immN_0() %{ ++ predicate(n->get_narrowcon() == 0); ++ match(ConN); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point immediate ++operand immF() %{ ++ match(ConF); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Single-precision floating-point zero ++operand immF_0() %{ ++ predicate(jint_cast(n->getf()) == 0); ++ match(ConF); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point immediate ++operand immD() %{ ++ match(ConD); ++ ++ op_cost(20); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Double-precision floating-point zero ++operand immD_0() %{ ++ predicate(jlong_cast(n->getd()) == 0); ++ match(ConD); ++ ++ op_cost(5); ++ format %{ %} ++ interface(CONST_INTER); ++%} ++ ++// Register Operands ++// Integer Register ++operand mRegI() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_Ax_mRegI() %{ ++ constraint(ALLOC_IN_RC(no_Ax_int_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand mS0RegI() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S0" %} ++ interface(REG_INTER); ++%} ++ ++operand mS1RegI() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S1" %} ++ interface(REG_INTER); ++%} ++ ++operand mS3RegI() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S3" %} ++ interface(REG_INTER); ++%} ++ ++operand mS4RegI() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S4" %} ++ interface(REG_INTER); ++%} ++ ++operand mS5RegI() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S5" %} ++ interface(REG_INTER); ++%} ++ ++operand mS6RegI() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S6" %} ++ interface(REG_INTER); ++%} ++ ++operand mS7RegI() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "S7" %} ++ interface(REG_INTER); ++%} ++ ++ ++operand mT0RegI() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T0" %} ++ interface(REG_INTER); ++%} ++ ++operand mT1RegI() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T1" %} ++ interface(REG_INTER); ++%} ++ ++operand mT2RegI() %{ ++ constraint(ALLOC_IN_RC(t2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T2" %} ++ interface(REG_INTER); ++%} ++ ++operand mT3RegI() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T3" %} ++ interface(REG_INTER); ++%} ++ ++operand mT8RegI() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T8" %} ++ interface(REG_INTER); ++%} ++ ++operand mT9RegI() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "T9" %} ++ interface(REG_INTER); ++%} ++ ++operand mA0RegI() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand mA1RegI() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A1" %} ++ interface(REG_INTER); ++%} ++ ++operand mA2RegI() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A2" %} ++ interface(REG_INTER); ++%} ++ ++operand mA3RegI() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A3" %} ++ interface(REG_INTER); ++%} ++ ++operand mA4RegI() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A4" %} ++ interface(REG_INTER); ++%} ++ ++operand mA5RegI() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A5" %} ++ interface(REG_INTER); ++%} ++ ++operand mA6RegI() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A6" %} ++ interface(REG_INTER); ++%} ++ ++operand mA7RegI() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "A7" %} ++ interface(REG_INTER); ++%} ++ ++operand mV0RegI() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V0" %} ++ interface(REG_INTER); ++%} ++ ++operand mV1RegI() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegI); ++ match(mRegI); ++ ++ format %{ "V1" %} ++ interface(REG_INTER); ++%} ++ ++operand mRegN() %{ ++ constraint(ALLOC_IN_RC(int_reg)); ++ match(RegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegN() %{ ++ constraint(ALLOC_IN_RC(t0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegN() %{ ++ constraint(ALLOC_IN_RC(t1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegN() %{ ++ constraint(ALLOC_IN_RC(t3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegN() %{ ++ constraint(ALLOC_IN_RC(t8_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegN() %{ ++ constraint(ALLOC_IN_RC(t9_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegN() %{ ++ constraint(ALLOC_IN_RC(a0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegN() %{ ++ constraint(ALLOC_IN_RC(a1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegN() %{ ++ constraint(ALLOC_IN_RC(a2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegN() %{ ++ constraint(ALLOC_IN_RC(a3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegN() %{ ++ constraint(ALLOC_IN_RC(a4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5_RegN() %{ ++ constraint(ALLOC_IN_RC(a5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegN() %{ ++ constraint(ALLOC_IN_RC(a6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegN() %{ ++ constraint(ALLOC_IN_RC(a7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0_RegN() %{ ++ constraint(ALLOC_IN_RC(s0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegN() %{ ++ constraint(ALLOC_IN_RC(s1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s2_RegN() %{ ++ constraint(ALLOC_IN_RC(s2_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegN() %{ ++ constraint(ALLOC_IN_RC(s3_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegN() %{ ++ constraint(ALLOC_IN_RC(s4_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegN() %{ ++ constraint(ALLOC_IN_RC(s5_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegN() %{ ++ constraint(ALLOC_IN_RC(s6_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegN() %{ ++ constraint(ALLOC_IN_RC(s7_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegN() %{ ++ constraint(ALLOC_IN_RC(v0_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegN() %{ ++ constraint(ALLOC_IN_RC(v1_reg)); ++ match(RegN); ++ match(mRegN); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Pointer Register ++operand mRegP() %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(RegP); ++ match(a0_RegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand no_T8_mRegP() %{ ++ constraint(ALLOC_IN_RC(no_T8_p_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegP); ++ match(mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t9_RegP() ++%{ ++ constraint(ALLOC_IN_RC(t9_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++ ++operand a5_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7_RegP() ++%{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1_RegP() ++%{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegP); ++ match(mRegP); ++ match(no_T8_mRegP); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++/* ++operand mSPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(sp_reg)); ++ match(reg); ++ ++ format %{ "SP" %} ++ interface(REG_INTER); ++%} ++ ++operand mFPRegP(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(fp_reg)); ++ match(reg); ++ ++ format %{ "FP" %} ++ interface(REG_INTER); ++%} ++*/ ++ ++operand mRegL() %{ ++ constraint(ALLOC_IN_RC(long_reg)); ++ match(RegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v0RegL() %{ ++ constraint(ALLOC_IN_RC(v0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand v1RegL() %{ ++ constraint(ALLOC_IN_RC(v1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a0RegL() %{ ++ constraint(ALLOC_IN_RC(a0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ "A0" %} ++ interface(REG_INTER); ++%} ++ ++operand a1RegL() %{ ++ constraint(ALLOC_IN_RC(a1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a2RegL() %{ ++ constraint(ALLOC_IN_RC(a2_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a3RegL() %{ ++ constraint(ALLOC_IN_RC(a3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t0RegL() %{ ++ constraint(ALLOC_IN_RC(t0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t1RegL() %{ ++ constraint(ALLOC_IN_RC(t1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t3RegL() %{ ++ constraint(ALLOC_IN_RC(t3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand t8RegL() %{ ++ constraint(ALLOC_IN_RC(t8_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a4RegL() %{ ++ constraint(ALLOC_IN_RC(a4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a5RegL() %{ ++ constraint(ALLOC_IN_RC(a5_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a6RegL() %{ ++ constraint(ALLOC_IN_RC(a6_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand a7RegL() %{ ++ constraint(ALLOC_IN_RC(a7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s0RegL() %{ ++ constraint(ALLOC_IN_RC(s0_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s1RegL() %{ ++ constraint(ALLOC_IN_RC(s1_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s3RegL() %{ ++ constraint(ALLOC_IN_RC(s3_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s4RegL() %{ ++ constraint(ALLOC_IN_RC(s4_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++operand s7RegL() %{ ++ constraint(ALLOC_IN_RC(s7_long_reg)); ++ match(RegL); ++ match(mRegL); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++// Floating register operands ++operand regF() %{ ++ constraint(ALLOC_IN_RC(flt_reg)); ++ match(RegF); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//Double Precision Floating register operands ++operand regD() %{ ++ constraint(ALLOC_IN_RC(dbl_reg)); ++ match(RegD); ++ ++ format %{ %} ++ interface(REG_INTER); ++%} ++ ++//----------Memory Operands---------------------------------------------------- ++// Indirect Memory Operand ++operand indirect(mRegP reg) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(reg); ++ ++ format %{ "[$reg] @ indirect" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8(mRegP reg, immL8 off) ++%{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg off); ++ ++ op_cost(10); ++ format %{ "[$reg + $off (8-bit)] @ indOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Times Scale Plus Index Register ++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP reg (LShiftL lreg scale)); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg << $scale] @ indIndexScale" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale($scale); ++ disp(0x0); ++ %} ++%} ++ ++ ++// [base + index + offset] ++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base index) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index + offset] ++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off) ++%{ ++ predicate(UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(5); ++ match(AddP (AddP base (ConvI2L index)) off); ++ ++ format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// [base + index<in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0); ++ op_cost(10); ++ match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off); ++ ++ format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %} ++ interface(MEMORY_INTER) %{ ++ base($base); ++ index($index); ++ scale($scale); ++ disp($off); ++ %} ++%} ++ ++//FIXME: I think it's better to limit the immI to be 16-bit at most! ++// Indirect Memory Plus Long Offset Operand ++operand indOffset32(mRegP reg, immL32 off) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(20); ++ match(AddP reg off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); /* NO_INDEX */ ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register ++operand indIndex(mRegP addr, mRegL index) %{ ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP addr index); ++ ++ op_cost(20); ++ format %{"[$addr + $index] @ indIndex" %} ++ interface(MEMORY_INTER) %{ ++ base($addr); ++ index($index); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indirectNarrowKlass(mRegN reg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeNKlass reg); ++ ++ format %{ "[$reg] @ indirectNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++operand indOffset8NarrowKlass(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indOffset32NarrowKlass(mRegN reg, immL32 off) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeNKlass reg) off); ++ ++ format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off) ++%{ ++ predicate(UseLEXT1); ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeNKlass reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++operand indIndexNarrowKlass(mRegN reg, mRegL lreg) ++%{ ++ predicate(Universe::narrow_klass_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (DecodeNKlass reg) lreg); ++ ++ op_cost(10); ++ format %{"[$reg + $lreg] @ indIndexNarrowKlass" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Operand ++operand indirectNarrow(mRegN reg) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(DecodeN reg); ++ ++ format %{ "[$reg] @ indirectNarrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp(0x0); ++ %} ++%} ++ ++// Indirect Memory Plus Short Offset Operand ++operand indOffset8Narrow(mRegN reg, immL8 off) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ constraint(ALLOC_IN_RC(p_reg)); ++ op_cost(10); ++ match(AddP (DecodeN reg) off); ++ ++ format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index(0x0); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++// Indirect Memory Plus Index Register Plus Offset Operand ++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off) ++%{ ++ predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1); ++ constraint(ALLOC_IN_RC(p_reg)); ++ match(AddP (AddP (DecodeN reg) lreg) off); ++ ++ op_cost(10); ++ format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %} ++ interface(MEMORY_INTER) %{ ++ base($reg); ++ index($lreg); ++ scale(0x0); ++ disp($off); ++ %} ++%} ++ ++//----------Conditional Branch Operands---------------------------------------- ++// Comparison Op - This is the operation of the comparison, and is limited to ++// the following set of codes: ++// L (<), LE (<=), G (>), GE (>=), E (==), NE (!=) ++// ++// Other attributes of the comparison, such as unsignedness, are specified ++// by the comparison instruction that sets a condition code flags register. ++// That result is represented by a flags operand whose subtype is appropriate ++// to the unsignedness (etc.) of the comparison. ++// ++// Later, the instruction which matches both the Comparison Op (a Bool) and ++// the flags (produced by the Cmp) specifies the coding of the comparison op ++// by matching a specific subtype of Bool operand below, such as cmpOpU. ++ ++// Comparision Code ++operand cmpOp() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++// Comparision Code ++// Comparison Code, unsigned compare. Used by FP also, with ++// C2 (unordered) turned into GT or LT already. The other bits ++// C0 and C3 are turned into Carry & Zero flags. ++operand cmpOpU() %{ ++ match(Bool); ++ ++ format %{ "" %} ++ interface(COND_INTER) %{ ++ equal(0x01); ++ not_equal(0x02); ++ greater(0x03); ++ greater_equal(0x04); ++ less(0x05); ++ less_equal(0x06); ++ overflow(0x7); ++ no_overflow(0x8); ++ %} ++%} ++ ++ ++//----------Special Memory Operands-------------------------------------------- ++// Stack Slot Operand - This operand is used for loading and storing temporary ++// values on the stack where a match requires a value to ++// flow through memory. ++operand stackSlotP(sRegP reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotI(sRegI reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotF(sRegF reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotD(sRegD reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++operand stackSlotL(sRegL reg) %{ ++ constraint(ALLOC_IN_RC(stack_slots)); ++ // No match rule because this operand is only generated in matching ++ op_cost(50); ++ format %{ "[$reg]" %} ++ interface(MEMORY_INTER) %{ ++ base(0x1d); // SP ++ index(0x0); // No Index ++ scale(0x0); // No Scale ++ disp($reg); // Stack Offset ++ %} ++%} ++ ++ ++//------------------------OPERAND CLASSES-------------------------------------- ++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset ); ++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow); ++ ++ ++//----------PIPELINE----------------------------------------------------------- ++// Rules which define the behavior of the target architectures pipeline. ++ ++pipeline %{ ++ ++ //----------ATTRIBUTES--------------------------------------------------------- ++ attributes %{ ++ fixed_size_instructions; // Fixed size instructions ++ branch_has_delay_slot; // branch have delay slot in gs2 ++ max_instructions_per_bundle = 1; // 1 instruction per bundle ++ max_bundles_per_cycle = 4; // Up to 4 bundles per cycle ++ bundle_unit_size=4; ++ instruction_unit_size = 4; // An instruction is 4 bytes long ++ instruction_fetch_unit_size = 16; // The processor fetches one line ++ instruction_fetch_units = 1; // of 16 bytes ++ ++ // List of nop instructions ++ nops( MachNop ); ++ %} ++ ++ //----------RESOURCES---------------------------------------------------------- ++ // Resources are the functional units available to the machine ++ ++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4, ALU1, ALU2, ALU = ALU1 | ALU2, FPU1, FPU2, FPU = FPU1 | FPU2, MEM, BR); ++ ++ //----------PIPELINE DESCRIPTION----------------------------------------------- ++ // Pipeline Description specifies the stages in the machine's pipeline ++ ++ // IF: fetch ++ // ID: decode ++ // RD: read ++ // CA: caculate ++ // WB: write back ++ // CM: commit ++ ++ pipe_desc(IF, ID, RD, CA, WB, CM); ++ ++ ++ //----------PIPELINE CLASSES--------------------------------------------------- ++ // Pipeline Classes describe the stages in which input and output are ++ // referenced by the hardware pipeline. ++ ++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{ ++ single_instruction; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+1; ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.19 Integer mult operation : dst <-- reg1 mult reg2 ++ pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer div operation : dst <-- reg1 div reg2 ++ pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.19 Integer mod operation : dst <-- reg1 mod reg2 ++ pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write)+10; ++ DECODE : ID; ++ ALU2 : CA; ++ %} ++ ++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{ ++ instruction_count(2); ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{ ++ instruction_count(2); ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //no.16 load Long from memory : ++ pipe_class ialu_loadL(mRegL dst, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ dst : WB(write)+5; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.17 Store Long to Memory : ++ pipe_class ialu_storeL(mRegL src, memory mem) %{ ++ instruction_count(2); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16 ++ pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{ ++ single_instruction; ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.3 Integer move operation : dst <-- reg ++ pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ ALU : CA; ++ %} ++ ++ //No.4 No instructions : do nothing ++ pipe_class empty( ) %{ ++ instruction_count(0); ++ %} ++ ++ //No.5 UnConditional branch : ++ pipe_class pipe_jump( label labl ) %{ ++ multiple_bundles; ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //No.6 ALU Conditional branch : ++ pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++ //no.7 load integer from memory : ++ pipe_class ialu_loadI(mRegI dst, memory mem) %{ ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.8 Store Integer to Memory : ++ pipe_class ialu_storeI(mRegI src, memory mem) %{ ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ ++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2 ++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ //No.22 Floating div operation : dst <-- reg1 div reg2 ++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{ ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ pipe_class fcvt_I2D(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class fcvt_D2I(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU1 : CA; ++ %} ++ ++ pipe_class pipe_mfc1(mRegI dst, regD src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ pipe_class pipe_mtc1(regD dst, mRegI src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ MEM : RD(5); ++ %} ++ ++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2 ++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU2 : CA; ++ %} ++ ++ //No.11 Load Floating from Memory : ++ pipe_class fpu_loadF(regF dst, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ dst : WB(write)+3; ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.12 Store Floating to Memory : ++ pipe_class fpu_storeF(regF src, memory mem) %{ ++ instruction_count(1); ++ mem : RD(read); ++ src : RD(read); ++ DECODE : ID; ++ MEM : RD; ++ %} ++ ++ //No.13 FPU Conditional branch : ++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{ ++ multiple_bundles; ++ src1 : RD(read); ++ src2 : RD(read); ++ DECODE : ID; ++ BR : RD; ++ %} ++ ++//No.14 Floating FPU reg operation : dst <-- op reg ++ pipe_class fpu1_regF(regF dst, regF src) %{ ++ src : RD(read); ++ dst : WB(write); ++ DECODE : ID; ++ FPU : CA; ++ %} ++ ++ pipe_class long_memory_op() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(30); ++ %} ++ ++ pipe_class simple_call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ BR : RD; ++ %} ++ ++ pipe_class call() %{ ++ instruction_count(10); multiple_bundles; force_serialization; ++ fixed_latency(200); ++ %} ++ ++ //FIXME: ++ //No.9 Piple slow : for multi-instructions ++ pipe_class pipe_slow( ) %{ ++ instruction_count(20); ++ force_serialization; ++ multiple_bundles; ++ fixed_latency(50); ++ %} ++ ++%} ++ ++ ++ ++//----------INSTRUCTIONS------------------------------------------------------- ++// ++// match -- States which machine-independent subtree may be replaced ++// by this instruction. ++// ins_cost -- The estimated cost of this instruction is used by instruction ++// selection to identify a minimum cost tree of machine ++// instructions that matches a tree of machine-independent ++// instructions. ++// format -- A string providing the disassembly for this instruction. ++// The value of an instruction's operand may be inserted ++// by referring to it with a '$' prefix. ++// opcode -- Three instruction opcodes may be provided. These are referred ++// to within an encode class as $primary, $secondary, and $tertiary ++// respectively. The primary opcode is commonly used to ++// indicate the type of machine instruction, while secondary ++// and tertiary are often used for prefix options or addressing ++// modes. ++// ins_encode -- A list of encode classes with parameters. The encode class ++// name must have been defined in an 'enc_class' specification ++// in the encode section of the architecture description. ++ ++ ++// Load Integer ++instruct loadI(mRegI dst, memory mem) %{ ++ match(Set dst (LoadI mem)); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadI_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadI mem))); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $mem #@loadI_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Integer (32 bit signed) to Byte (8 bit signed) ++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# int -> byte #@loadI2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned) ++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem\t# int -> ubyte #@loadI2UB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Short (16 bit signed) ++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{ ++ match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem\t# int -> short #@loadI2S" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned) ++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{ ++ match(Set dst (AndI (LoadI mem) mask)); ++ ++ ins_cost(125); ++ format %{ "lhu $dst, $mem\t# int -> ushort/char #@loadI2US" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++// Load Long. ++instruct loadL(mRegL dst, memory mem) %{ ++// predicate(!((LoadLNode*)n)->require_atomic_access()); ++ match(Set dst (LoadL mem)); ++ ++ ins_cost(250); ++ format %{ "ld $dst, $mem #@loadL" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Load Long - UNaligned ++instruct loadL_unaligned(mRegL dst, memory mem) %{ ++ match(Set dst (LoadL_unaligned mem)); ++ ++ // FIXME: Need more effective ldl/ldr ++ ins_cost(450); ++ format %{ "ld $dst, $mem #@loadL_unaligned\n\t" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadL ); ++%} ++ ++// Store Long ++instruct storeL_reg(memory mem, mRegL src) %{ ++ match(Set mem (StoreL mem src)); ++ ++ ins_cost(200); ++ format %{ "sd $mem, $src #@storeL_reg\n" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++instruct storeL_immL_0(memory mem, immL_0 zero) %{ ++ match(Set mem (StoreL mem zero)); ++ ++ ins_cost(180); ++ format %{ "sd zero, $mem #@storeL_immL_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeL ); ++%} ++ ++// Load Compressed Pointer ++instruct loadN(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadN mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed ptr @ loadN" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2P(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeN (LoadN mem))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# @ loadN2P" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Pointer ++instruct loadP(mRegP dst, memory mem) %{ ++ match(Set dst (LoadP mem)); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $mem #@loadP" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Klass Pointer ++instruct loadKlass(mRegP dst, memory mem) %{ ++ match(Set dst (LoadKlass mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load narrow Klass Pointer ++instruct loadNKlass(mRegN dst, memory mem) ++%{ ++ match(Set dst (LoadNKlass mem)); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadNKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++instruct loadN2PKlass(mRegP dst, memory mem) ++%{ ++ match(Set dst (DecodeNKlass (LoadNKlass mem))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "lwu $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe( ialu_loadI ); // XXX ++%} ++ ++// Load Constant ++instruct loadConI(mRegI dst, immI src) %{ ++ match(Set dst src); ++ ++ ins_cost(150); ++ format %{ "mov $dst, $src #@loadConI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int value = $src$$constant; ++ __ move(dst, value); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct loadConL_set64(mRegL dst, immL src) %{ ++ match(Set dst src); ++ ins_cost(120); ++ format %{ "li $dst, $src @ loadConL_set64" %} ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ins_pipe(ialu_regL_regL); ++%} ++ ++instruct loadConL16(mRegL dst, immL16 src) %{ ++ match(Set dst src); ++ ins_cost(105); ++ format %{ "mov $dst, $src #@loadConL16" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ int value = $src$$constant; ++ __ daddiu(dst_reg, R0, value); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{ ++ match(Set dst src); ++ ins_cost(100); ++ format %{ "mov $dst, zero #@loadConL_immL_0" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Load Range ++instruct loadRange(mRegI dst, memory mem) %{ ++ match(Set dst (LoadRange mem)); ++ ++ ins_cost(125); ++ format %{ "MOV $dst,$mem @ loadRange" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct storeP(memory mem, mRegP src ) %{ ++ match(Set mem (StoreP mem src)); ++ ++ ins_cost(125); ++ format %{ "sd $src, $mem #@storeP" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store NULL Pointer, mark word, or other simple pointer constant. ++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{ ++ match(Set mem (StoreP mem zero)); ++ ++ ins_cost(125); ++ format %{ "mov $mem, $zero #@storeImmP_immP_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Compressed Pointer ++instruct storeN(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreN mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed ptr @ storeN" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2N(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreN mem (EncodeP src))); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2N" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeNKlass(memory mem, mRegN src) ++%{ ++ match(Set mem (StoreNKlass mem src)); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# compressed klass ptr @ storeNKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeP2NKlass(memory mem, mRegP src) ++%{ ++ match(Set mem (StoreNKlass mem (EncodePKlass src))); ++ predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0); ++ ++ ins_cost(125); // XXX ++ format %{ "sw $mem, $src\t# @ storeP2NKlass" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeImmN_immN_0(memory mem, immN_0 zero) ++%{ ++ match(Set mem (StoreN mem zero)); ++ ++ ins_cost(125); // XXX ++ format %{ "storeN0 zero, $mem\t# compressed ptr" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Byte ++instruct storeB_immB_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreB mem zero)); ++ ++ format %{ "mov $mem, zero #@storeB_immB_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB(memory mem, mRegI src) %{ ++ match(Set mem (StoreB mem src)); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeB_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreB mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sb $src, $mem #@storeB_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Byte (8bit signed) ++instruct loadB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadB mem)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadB mem))); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem #@loadB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Byte (8bit UNsigned) ++instruct loadUB(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUB mem)); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUB_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUB mem))); ++ ++ ins_cost(125); ++ format %{ "lbu $dst, $mem #@loadUB_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16bit signed) ++instruct loadS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadS mem)); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Short (16 bit signed) to Byte (8 bit signed) ++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{ ++ match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour)); ++ ++ ins_cost(125); ++ format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadS mem))); ++ ++ ins_cost(125); ++ format %{ "lh $dst, $mem #@loadS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Integer Immediate ++instruct storeI_immI_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreI mem zero)); ++ ++ format %{ "mov $mem, zero #@storeI_immI_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Store Integer ++instruct storeI(memory mem, mRegI src) %{ ++ match(Set mem (StoreI mem src)); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct storeI_convL2I(memory mem, mRegL src) %{ ++ match(Set mem (StoreI mem (ConvL2I src))); ++ ++ ins_cost(125); ++ format %{ "sw $mem, $src #@storeI_convL2I" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Float ++instruct loadF(regF dst, memory mem) %{ ++ match(Set dst (LoadF mem)); ++ ++ ins_cost(150); ++ format %{ "loadF $dst, $mem #@loadF" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadConP_general(mRegP dst, immP src) %{ ++ match(Set dst src); ++ ++ ins_cost(120); ++ format %{ "li $dst, $src #@loadConP_general" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ long* value = (long*)$src$$constant; ++ ++ if($src->constant_reloc() == relocInfo::metadata_type){ ++ int klass_index = __ oop_recorder()->find_index((Klass*)value); ++ RelocationHolder rspec = metadata_Relocation::spec(klass_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if($src->constant_reloc() == relocInfo::oop_type){ ++ int oop_index = __ oop_recorder()->find_index((jobject)value); ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ ++ __ relocate(rspec); ++ __ patchable_set48(dst, (long)value); ++ } else if ($src->constant_reloc() == relocInfo::none) { ++ __ set64(dst, (long)value); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{ ++ match(Set dst src); ++ ++ ins_cost(80); ++ format %{ "li $dst, $src @ loadConP_no_oop_cheap" %} ++ ++ ins_encode %{ ++ __ set64($dst$$Register, $src$$constant); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct loadConP_poll(mRegP dst, immP_poll src) %{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "li $dst, $src #@loadConP_poll" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ intptr_t value = (intptr_t)$src$$constant; ++ ++ __ set64(dst, (jlong)value); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConP_immP_0(mRegP dst, immP_0 src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(50); ++ format %{ "mov $dst, R0\t# ptr" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ __ daddu(dst_reg, R0, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{ ++ match(Set dst src); ++ format %{ "move $dst, R0\t# compressed NULL ptr" %} ++ ins_encode %{ ++ __ move($dst$$Register, R0); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct loadConN(mRegN dst, immN src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed ptr @ loadConN" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_oop(dst, (jobject)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++instruct loadConNKlass(mRegN dst, immNKlass src) %{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "li $dst, $src\t# compressed klass ptr @ loadConNKlass" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ __ set_narrow_klass(dst, (Klass*)$src$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); // XXX ++%} ++ ++//FIXME ++// Tail Call; Jump from runtime stub to Java code. ++// Also known as an 'interprocedural jump'. ++// Target of jump will eventually return to caller. ++// TailJump below removes the return address. ++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{ ++ match(TailCall jump_target method_oop ); ++ ins_cost(300); ++ format %{ "JMP $jump_target \t# @TailCalljmpInd" %} ++ ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ Register oop = $method_oop$$Register; ++ ++ // RA will be used in generate_forward_exception() ++ __ push(RA); ++ ++ __ move(S3, oop); ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++// Create exception oop: created by stack-crawling runtime code. ++// Created exception is now available to this handler, and is setup ++// just prior to jumping to this handler. No code emitted. ++instruct CreateException( a0_RegP ex_oop ) ++%{ ++ match(Set ex_oop (CreateEx)); ++ ++ // use the following format syntax ++ format %{ "# exception oop is in A0; no code emitted @CreateException" %} ++ ins_encode %{ ++ // X86 leaves this function empty ++ __ block_comment("CreateException is empty in MIPS"); ++ %} ++ ins_pipe( empty ); ++// ins_pipe( pipe_jump ); ++%} ++ ++ ++/* The mechanism of exception handling is clear now. ++ ++- Common try/catch: ++ [stubGenerator_mips.cpp] generate_forward_exception() ++ |- V0, V1 are created ++ |- T9 <= SharedRuntime::exception_handler_for_return_address ++ `- jr T9 ++ `- the caller's exception_handler ++ `- jr OptoRuntime::exception_blob ++ `- here ++- Rethrow(e.g. 'unwind'): ++ * The callee: ++ |- an exception is triggered during execution ++ `- exits the callee method through RethrowException node ++ |- The callee pushes exception_oop(T0) and exception_pc(RA) ++ `- The callee jumps to OptoRuntime::rethrow_stub() ++ * In OptoRuntime::rethrow_stub: ++ |- The VM calls _rethrow_Java to determine the return address in the caller method ++ `- exits the stub with tailjmpInd ++ |- pops exception_oop(V0) and exception_pc(V1) ++ `- jumps to the return address(usually an exception_handler) ++ * The caller: ++ `- continues processing the exception_blob with V0/V1 ++*/ ++ ++// Rethrow exception: ++// The exception oop will come in the first argument position. ++// Then JUMP (not call) to the rethrow stub code. ++instruct RethrowException() ++%{ ++ match(Rethrow); ++ ++ // use the following format syntax ++ format %{ "JMP rethrow_stub #@RethrowException" %} ++ ins_encode %{ ++ __ block_comment("@ RethrowException"); ++ ++ cbuf.set_insts_mark(); ++ cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec()); ++ ++ // call OptoRuntime::rethrow_stub to get the exception handler in parent method ++ __ patchable_jump((address)OptoRuntime::rethrow_stub()); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Branch Instructions --- long offset versions ++ ++// Jump Direct ++instruct jmpDir_long(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ __ jmp_far(*L); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ //ins_pc_relative(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label* L = $labl$$label; ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %} ++ ++ ins_encode %{ ++ Label* L = $labl$$label; ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ __ bne_long($cr$$Register, R0, *L); ++ break; ++ case 0x02: //not equal ++ __ beq_long($cr$$Register, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_long" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_long" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_long" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1_reg, op2_reg, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ __ bne_long(AT, R0, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, op2, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, op2, *L); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, R0, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, R0); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, R0); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, R0, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(op1, R0, *L); ++ break; ++ case 0x02: //not_equal ++ __ bne_long(op1, R0, *L); ++ break; ++ case 0x03: //above ++ __ bne_long(R0, op1, *L); ++ break; ++ case 0x04: //above_equal ++ __ beq_long(R0, R0, *L); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ __ beq_long(op1, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_long" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ __ beq_long(op1, AT, *L); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ __ bne_long(op1, AT, *L); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ __ beq_long(AT, R0, *L); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ __ bne_long(R0, AT, *L); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ __ beq_long(AT, R0, *L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++ ++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_long" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_long" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = R0; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x05: // less ++ __ beq_long(R0, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: //not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++%} ++ ++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_long" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label* target = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ beq_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x02: // not_equal ++ __ bne_long(opr1_reg, opr2_reg, *target); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ __ bne_long(AT, R0, *target); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ __ beq_long(AT, R0, *target); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_long" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label* L = $labl$$label; ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1f_long(*L); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ __ bc1t_long(*L); ++ break; ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// ============================================================================ ++// Branch Instructions -- short offset versions ++ ++// Jump Direct ++instruct jmpDir_short(label labl) %{ ++ match(Goto); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "JMP $labl #@jmpDir_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ if(&L) ++ __ b(L); ++ else ++ __ b(int(0)); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Jump Direct Conditional - Label defines a relative address from Jcc+1 ++instruct jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++instruct jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{ ++ match(CountedLoopEnd cop (CmpI src1 src2)); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $src1, $src2, $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %} ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = AT; ++ Label &L = *($labl$$label); ++ int flag = $cop$$cmpcode; ++ ++ __ move(op2, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++ ++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags! ++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{ ++ match(If cop cr); ++ effect(USE labl); ++ ++ ins_cost(300); ++ format %{ "J$cop $labl #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %} ++ ++ ins_encode %{ ++ Label &L = *($labl$$label); ++ switch($cop$$cmpcode) { ++ case 0x01: //equal ++ if (&L) ++ __ bne($cr$$Register, R0, L); ++ else ++ __ bne($cr$$Register, R0, (int)0); ++ break; ++ case 0x02: //not equal ++ if (&L) ++ __ beq($cr$$Register, R0, L); ++ else ++ __ beq($cr$$Register, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++ ins_pc_relative(1); ++ ins_short_branch(1); ++%} ++ ++// Conditional jumps ++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP op1 zero)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConP_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{ ++ match(If cmp (CmpP (DecodeN op1) zero)); ++ predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "b$cmp $op1, R0, $labl #@branchConN2P_zero_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) ++ { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{ ++ match(If cmp (CmpP op1 op2)); ++// predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf)); ++ effect(USE labl); ++ ++ ins_cost(200); ++ format %{ "b$cmp $op1, $op2, $labl #@branchConP_short" %} ++ ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{ ++ match(If cmp (CmpN op1 null)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,0\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_null_branch_short" %} ++ ins_encode %{ ++ Register op1 = $op1$$Register; ++ Register op2 = R0; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++//TODO: pipe_branchP or create pipe_branchN LEE ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{ ++ match(If cmp (CmpN op1 op2)); ++ effect(USE labl); ++ ++ ins_cost(180); ++ format %{ "CMP $op1,$op2\t! compressed ptr\n\t" ++ "BP$cmp $labl @ cmpN_reg_branch_short" %} ++ ins_encode %{ ++ Register op1_reg = $op1$$Register; ++ Register op2_reg = $op2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1_reg, op2_reg, L); ++ else ++ __ beq(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1_reg, op2_reg, L); ++ else ++ __ bne(op1_reg, op2_reg, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1_reg, op2_reg); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2_reg, op1_reg); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, op2); ++ if(&L) ++ __ bne(AT, R0, L); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltu(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_reg_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Register op2 = $src2$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, op2, L); ++ else ++ __ beq(op1, op2, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, op2, L); ++ else ++ __ bne(op1, op2, (int)0); ++ break; ++ case 0x03: //above ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ slt(AT, op1, op2); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ slt(AT, op2, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(170); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //greater ++ if(&L) ++ __ bgtz(op1, L); ++ else ++ __ bgtz(op1, (int)0); ++ break; ++ case 0x04: //greater_equal ++ if(&L) ++ __ bgez(op1, L); ++ else ++ __ bgez(op1, (int)0); ++ break; ++ case 0x05: //less ++ if(&L) ++ __ bltz(op1, L); ++ else ++ __ bltz(op1, (int)0); ++ break; ++ case 0x06: //less_equal ++ if(&L) ++ __ blez(op1, L); ++ else ++ __ blez(op1, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{ ++ match( If cmp (CmpI src1 src2) ); ++ effect(USE labl); ++ ins_cost(200); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConI_reg_imm_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ move(AT, val); ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //greater ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //greater_equal ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //less ++ __ slt(AT, op1, AT); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //less_equal ++ __ slt(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{ ++ match( If cmp (CmpU src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConIU_reg_immI_0_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ case 0x02: //not_equal ++ if (&L) ++ __ bne(op1, R0, L); ++ else ++ __ bne(op1, R0, (int)0); ++ break; ++ case 0x03: //above ++ if(&L) ++ __ bne(R0, op1, L); ++ else ++ __ bne(R0, op1, (int)0); ++ break; ++ case 0x04: //above_equal ++ if(&L) ++ __ beq(R0, R0, L); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ case 0x05: //below ++ return; ++ break; ++ case 0x06: //below_equal ++ if(&L) ++ __ beq(op1, R0, L); ++ else ++ __ beq(op1, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{ ++ match( If cmp (CmpU src1 src2) ); ++ effect(USE labl); ++ ins_cost(180); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConIU_reg_immI16_short" %} ++ ++ ins_encode %{ ++ Register op1 = $src1$$Register; ++ int val = $src2$$constant; ++ Label &L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ __ move(AT, val); ++ if (&L) ++ __ beq(op1, AT, L); ++ else ++ __ beq(op1, AT, (int)0); ++ break; ++ case 0x02: //not_equal ++ __ move(AT, val); ++ if (&L) ++ __ bne(op1, AT, L); ++ else ++ __ bne(op1, AT, (int)0); ++ break; ++ case 0x03: //above ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x04: //above_equal ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ case 0x05: //below ++ __ sltiu(AT, op1, val); ++ if(&L) ++ __ bne(R0, AT, L); ++ else ++ __ bne(R0, AT, (int)0); ++ break; ++ case 0x06: //below_equal ++ __ move(AT, val); ++ __ sltu(AT, AT, op1); ++ if(&L) ++ __ beq(AT, R0, L); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++ ++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{ ++ match( If cmp (CmpUL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_regL_short" %} ++ ins_cost(250); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = as_Register($src2$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ __ delayed()->nop(); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match( If cmp (CmpL src1 zero) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x03: //greater ++ if(&target) ++ __ bgtz(opr1_reg, target); ++ else ++ __ bgtz(opr1_reg, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ if(&target) ++ __ bgez(opr1_reg, target); ++ else ++ __ bgez(opr1_reg, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, R0); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ if (&target) ++ __ blez(opr1_reg, target); ++ else ++ __ blez(opr1_reg, int(0)); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{ ++ match(If cmp (CmpUL src1 zero)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, zero, $labl #@branchConUL_regL_immL_0_short" %} ++ ins_cost(150); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ case 0x04: // greater_equal ++ case 0x06: // less_equal ++ if (&target) ++ __ beq(opr1_reg, R0, target); ++ else ++ __ beq(opr1_reg, R0, int(0)); ++ break; ++ ++ case 0x02: // not_equal ++ case 0x03: // greater ++ if(&target) ++ __ bne(opr1_reg, R0, target); ++ else ++ __ bne(opr1_reg, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ if(&target) ++ __ beq(R0, R0, target); ++ else ++ __ beq(R0, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match( If cmp (CmpL src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: //equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: //not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: //greater ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: //greater_equal ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: //less ++ __ slt(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: //less_equal ++ __ slt(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ++ ins_pc_relative(1); ++ ins_pipe( pipe_alu_branch ); ++ ins_short_branch(1); ++%} ++ ++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{ ++ match(If cmp (CmpUL src1 src2)); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConUL_regL_immL_short" %} ++ ins_cost(180); ++ ++ ins_encode %{ ++ Register opr1_reg = as_Register($src1$$reg); ++ Register opr2_reg = AT; ++ Label &target = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ __ set64(opr2_reg, $src2$$constant); ++ ++ switch(flag) { ++ case 0x01: // equal ++ if (&target) ++ __ beq(opr1_reg, opr2_reg, target); ++ else ++ __ beq(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x02: // not_equal ++ if(&target) ++ __ bne(opr1_reg, opr2_reg, target); ++ else ++ __ bne(opr1_reg, opr2_reg, (int)0); ++ break; ++ ++ case 0x03: // greater ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x04: // greater_equal ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ case 0x05: // less ++ __ sltu(AT, opr1_reg, opr2_reg); ++ if(&target) ++ __ bne(AT, R0, target); ++ else ++ __ bne(AT, R0, (int)0); ++ break; ++ ++ case 0x06: // less_equal ++ __ sltu(AT, opr2_reg, opr1_reg); ++ if(&target) ++ __ beq(AT, R0, target); ++ else ++ __ beq(AT, R0, (int)0); ++ break; ++ ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_alu_branch); ++ ins_short_branch(1); ++%} ++ ++//FIXME ++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{ ++ match( If cmp (CmpF src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConF_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ __ c_eq_s(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_s(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{ ++ match( If cmp (CmpD src1 src2) ); ++ effect(USE labl); ++ format %{ "BR$cmp $src1, $src2, $labl #@branchConD_reg_reg_short" %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $src1$$FloatRegister; ++ FloatRegister reg_op2 = $src2$$FloatRegister; ++ Label& L = *($labl$$label); ++ int flag = $cmp$$cmpcode; ++ ++ switch(flag) { ++ case 0x01: // equal ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x02: // not_equal ++ // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs. ++ __ c_eq_d(reg_op1, reg_op2); ++ if (&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x03: // greater ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x04: // greater_equal ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1f(L); ++ else ++ __ bc1f((int)0); ++ break; ++ case 0x05: // less ++ __ c_ult_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ case 0x06: // less_equal ++ __ c_ule_d(reg_op1, reg_op2); ++ if(&L) ++ __ bc1t(L); ++ else ++ __ bc1t((int)0); ++ break; ++ default: ++ Unimplemented(); ++ } ++ __ delayed()->nop(); ++ %} ++ ++ ins_pc_relative(1); ++ ins_pipe(pipe_fpu_branch); ++ ins_short_branch(1); ++%} ++ ++// =================== End of branch instructions ========================== ++ ++// Call Runtime Instruction ++instruct CallRuntimeDirect(method meth) %{ ++ match(CallRuntime ); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,runtime #@CallRuntimeDirect" %} ++ ins_encode( Java_To_Runtime( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_alignment(16); ++%} ++ ++ ++ ++//------------------------MemBar Instructions------------------------------- ++//Memory barrier flavors ++ ++instruct membar_acquire() %{ ++ match(MemBarAcquire); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-acquire @ membar_acquire" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++instruct load_fence() %{ ++ match(LoadFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ load_fence" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_acquire_lock() ++%{ ++ match(MemBarAcquireLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct membar_release() %{ ++ match(MemBarRelease); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-release @ membar_release" %} ++ ++ ins_encode %{ ++ // Attention: DO NOT DELETE THIS GUY! ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct store_fence() %{ ++ match(StoreFence); ++ ins_cost(400); ++ ++ format %{ "MEMBAR @ store_fence" %} ++ ++ ins_encode %{ ++ __ sync(); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct membar_release_lock() ++%{ ++ match(MemBarReleaseLock); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %} ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++ ++instruct membar_volatile() %{ ++ match(MemBarVolatile); ++ ins_cost(400); ++ ++ format %{ "MEMBAR-volatile" %} ++ ins_encode %{ ++ if( !os::is_MP() ) return; // Not needed on single CPU ++ __ sync(); ++ ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++instruct unnecessary_membar_volatile() %{ ++ match(MemBarVolatile); ++ predicate(Matcher::post_store_load_barrier(n)); ++ ins_cost(0); ++ ++ size(0); ++ format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %} ++ ins_encode( ); ++ ins_pipe(empty); ++%} ++ ++instruct membar_storestore() %{ ++ match(MemBarStoreStore); ++ ++ ins_cost(400); ++ format %{ "MEMBAR-storestore @ membar_storestore" %} ++ ins_encode %{ ++ __ sync(); ++ %} ++ ins_pipe(empty); ++%} ++ ++//----------Move Instructions-------------------------------------------------- ++instruct castX2P(mRegP dst, mRegL src) %{ ++ match(Set dst (CastX2P src)); ++ format %{ "castX2P $dst, $src @ castX2P" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_cost(10); ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct castP2X(mRegL dst, mRegP src ) %{ ++ match(Set dst (CastP2X src)); ++ ++ format %{ "mov $dst, $src\t #@castP2X" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ if(src != dst) ++ __ move(dst, src); ++ %} ++ ins_pipe( ialu_regI_mov ); ++%} ++ ++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{ ++ match(Set dst (MoveF2I src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveF2I $dst, $src @ MoveF2I_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ mfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{ ++ match(Set dst (MoveI2F src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveI2F $dst, $src @ MoveI2F_reg_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ mtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{ ++ match(Set dst (MoveD2L src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveD2L $dst, $src @ MoveD2L_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ __ dmfc1(dst, src); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{ ++ match(Set dst (MoveL2D src)); ++ effect(DEF dst, USE src); ++ ins_cost(85); ++ format %{ "MoveL2D $dst, $src @ MoveL2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ dmtc1(src, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Conditional Move--------------------------------------------------- ++// Conditional move ++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovI_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovP_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovP_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovP_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpP_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovN_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovN_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovN_cmpI_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpU_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpI_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{ ++ match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpUL_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpUL_reg_reg" ++ %} ++ ins_encode %{ ++ Register opr1 = as_Register($tmp1$$reg); ++ Register opr2 = as_Register($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe(pipe_slow); ++%} ++ ++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t" ++ "CMOV $dst,$src\t @cmovL_cmpN_reg_reg" ++ %} ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovL_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovL_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpD_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovD_cmpD_reg_reg" ++ %} ++ ins_encode %{ ++ FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg); ++ FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovF_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpI_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpI_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovD_cmpP_reg_reg\n" ++ "\tCMOV $dst, $src \t @cmovD_cmpP_reg_reg" ++ %} ++ ++ ins_encode %{ ++ Register op1 = $tmp1$$Register; ++ Register op2 = $tmp2$$Register; ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//FIXME ++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(80); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovI_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovI_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{ ++ match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src))); ++ ins_cost(200); ++ format %{ ++ "CMP$cop $tmp1, $tmp2\t @cmovF_cmpF_reg_reg\n" ++ "\tCMOV $dst,$src \t @cmovF_cmpF_reg_reg" ++ %} ++ ++ ins_encode %{ ++ FloatRegister reg_op1 = $tmp1$$FloatRegister; ++ FloatRegister reg_op2 = $tmp2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ int flag = $cop$$cmpcode; ++ ++ __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Manifest a CmpL result in an integer register. Very painful. ++// This is the test to avoid. ++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (CmpL3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpL3 $dst, $src1, $src2 @ cmpL3_reg_reg" %} ++ ins_encode %{ ++ Register opr1 = as_Register($src1$$reg); ++ Register opr2 = as_Register($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ slt(AT, opr1, opr2); ++ __ slt(dst, opr2, opr1); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// ++// less_rsult = -1 ++// greater_result = 1 ++// equal_result = 0 ++// nan_result = -1 ++// ++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{ ++ match(Set dst (CmpF3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpF3 $dst, $src1, $src2 @ cmpF3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_s(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_s(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{ ++ match(Set dst (CmpD3 src1 src2)); ++ ins_cost(1000); ++ format %{ "cmpD3 $dst, $src1, $src2 @ cmpD3_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ Register dst = as_Register($dst$$reg); ++ ++ __ ori(dst, R0, 1); ++ __ ori(AT, R0, 1); ++ __ c_olt_d(src2, src1); ++ __ movf(dst, R0); ++ __ c_ult_d(src1, src2); ++ __ movf(AT, R0); ++ __ subu(dst, dst, AT); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{ ++ match(Set dummy (ClearArray cnt base)); ++ format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %} ++ ins_encode %{ ++ //Assume cnt is the number of bytes in an array to be cleared, ++ //and base points to the starting address of the array. ++ Register base = $base$$Register; ++ Register num = $cnt$$Register; ++ Label Loop, done; ++ ++ __ beq(num, R0, done); ++ __ delayed()->daddu(AT, base, R0); ++ ++ __ move(T9, num); /* T9 = words */ ++ ++ __ bind(Loop); ++ __ sd(R0, AT, 0); ++ __ daddiu(T9, T9, -1); ++ __ bne(T9, R0, Loop); ++ __ delayed()->daddiu(AT, AT, wordSize); ++ ++ __ bind(done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::LU); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2, mA7RegI cnt2, no_Ax_mRegI result) %{ ++ predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL); ++ match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2))); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2); ++ ++ format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %} ++ ins_encode %{ ++ __ string_compare($str1$$Register, $str2$$Register, ++ $cnt1$$Register, $cnt2$$Register, $result$$Register, ++ StrIntrinsicNode::UL); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// intrinsic optimization ++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{ ++ match(Set result (StrEquals (Binary str1 str2) cnt)); ++ effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp); ++ ++ format %{ "String Equal $str1, $str2, len:$cnt tmp:$temp -> $result @ string_equals" %} ++ ins_encode %{ ++ __ arrays_equals($str1$$Register, $str2$$Register, ++ $cnt$$Register, $temp$$Register, $result$$Register, ++ false/* byte */); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++//----------Arithmetic Instructions------------------------------------------- ++//----------Addition Instructions--------------------------------------------- ++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ addu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addI_Reg_imm(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AddI src1 src2)); ++ ++ format %{ "add $dst, $src1, $src2 #@addI_Reg_imm" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ int imm = $src2$$constant; ++ ++ if(Assembler::is_simm16(imm)) { ++ __ addiu32(dst, src1, imm); ++ } else { ++ __ move(AT, imm); ++ __ addu32(dst, src1, AT); ++ } ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{ ++ match(Set dst (AddP src1 (ConvI2L src2))); ++ ++ format %{ "dadd $dst, $src1, $src2 #@addP_reg_reg_convI2L" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ daddu(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct addP_reg_imm(mRegP dst, mRegP src1, immL16 src2) %{ ++ match(Set dst (AddP src1 src2)); ++ ++ format %{ "daddi $dst, $src1, $src2 #@addP_reg_imm" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ long src2 = $src2$$constant; ++ Register dst = $dst$$Register; ++ ++ __ daddiu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++// Add Long Register with Register ++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AddL src1 src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2) ++%{ ++ match(Set dst (AddL src1 src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2) ++%{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_imm " %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ int src2_imm = $src2$$constant; ++ ++ __ daddiu(dst_reg, src1_reg, src2_imm); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (AddL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AddL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AddL src1 (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "ADD $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %} ++ ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ daddu(dst_reg, src1_reg, src2_reg); ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//----------Abs Instructions------------------------------------------- ++ ++// Integer Absolute Instructions ++instruct absI_rReg(mRegI dst, mRegI src) ++%{ ++ match(Set dst (AbsI src)); ++ effect(TEMP dst); ++ format %{ "AbsI $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ sra(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu32(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Long Absolute Instructions ++instruct absL_rReg(mRegL dst, mRegL src) ++%{ ++ match(Set dst (AbsL src)); ++ effect(TEMP dst); ++ format %{ "AbsL $dst, $src" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dsra32(AT, src, 31); ++ __ xorr(dst, src, AT); ++ __ subu(dst, dst, AT); ++ %} ++ ++ ins_pipe(ialu_regL_regL); ++%} ++ ++//----------Subtraction Instructions------------------------------------------- ++// Integer Subtraction Instructions ++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(100); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ subu32(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1, immI_M32767_32768 src2) %{ ++ match(Set dst (SubI src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ addiu32(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negI_Reg(mRegI dst, immI_0 zero, mRegI src) %{ ++ match(Set dst (SubI zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negI_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu32(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct negL_Reg(mRegL dst, immL_0 zero, mRegL src) %{ ++ match(Set dst (SubL zero src)); ++ ins_cost(80); ++ ++ format %{ "neg $dst, $src #@negL_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ __ subu(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1, immL_M32767_32768 src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(80); ++ ++ format %{ "sub $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ __ daddiu(dst, src1, -1 * $src2$$constant); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Subtract Long Register with Register. ++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (SubL src1 src2)); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (SubL src1 (ConvI2L src2))); ++ ins_cost(100); ++ format %{ "SubL $dst, $src1, $src2 @ subL_Reg_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{ ++ match(Set dst (SubL (ConvI2L src1) src2)); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_Reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (SubL (ConvI2L src1) (ConvI2L src2))); ++ ins_cost(200); ++ format %{ "SubL $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src1 = as_Register($src1$$reg); ++ Register src2 = as_Register($src2$$reg); ++ ++ __ subu(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Integer MOD with Register ++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (ModI src1 src2)); ++ ins_cost(300); ++ format %{ "modi $dst, $src1, $src2 @ modI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ //if (UseLEXT1) { ++ if (0) { ++ // Experiments show that gsmod is slower that div+mfhi. ++ // So I just disable it here. ++ __ gsmod(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ __ mfhi(dst); ++ } ++ %} ++ ++ //ins_pipe( ialu_mod ); ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (ModL src1 src2)); ++ format %{ "modL $dst, $src1, $src2 @modL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmod(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mfhi(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (MulI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "mul $dst, $src1, $src2 @ mulI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mul(dst, src1, src2); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{ ++ match(Set dst (AddI (MulI src1 src2) src3)); ++ ++ ins_cost(999); ++ format %{ "madd $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register src3 = $src3$$Register; ++ Register dst = $dst$$Register; ++ ++ __ mtlo(src3); ++ __ madd(src1, src2); ++ __ mflo(dst); ++ %} ++ ins_pipe( ialu_mult ); ++%} ++ ++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (DivI src1 src2)); ++ ++ ins_cost(300); ++ format %{ "div $dst, $src1, $src2 @ divI_Reg_Reg" %} ++ ins_encode %{ ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ Register dst = $dst$$Register; ++ ++ // In MIPS, div does not cause exception. ++ // We must trap an exception manually. ++ __ teq(R0, src2, 0x7); ++ ++ if (UseLEXT1) { ++ __ gsdiv(dst, src1, src2); ++ } else { ++ __ div(src1, src2); ++ ++ __ nop(); ++ __ nop(); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( ialu_mod ); ++%} ++ ++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (DivF src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divF $dst, $src1, $src2 @ divF_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_s(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (DivD src1 src2)); ++ ++ ins_cost(300); ++ format %{ "divD $dst, $src1, $src2 @ divD_Reg_Reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ /* Here do we need to trap an exception manually ? */ ++ __ div_d(dst, src1, src2); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (MulL src1 src2)); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (MulL src1 (ConvI2L src2))); ++ format %{ "mulL $dst, $src1, $src2 @mulL_reg_regI2L" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsdmult(dst, op1, op2); ++ } else { ++ __ dmult(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (DivL src1 src2)); ++ format %{ "divL $dst, $src1, $src2 @divL_reg_reg" %} ++ ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register op1 = as_Register($src1$$reg); ++ Register op2 = as_Register($src2$$reg); ++ ++ if (UseLEXT1) { ++ __ gsddiv(dst, op1, op2); ++ } else { ++ __ ddiv(op1, op2); ++ __ mflo(dst); ++ } ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (AddF src1 src2)); ++ format %{ "AddF $dst, $src1, $src2 @addF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (SubF src1 src2)); ++ format %{ "SubF $dst, $src1, $src2 @subF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (AddD src1 src2)); ++ format %{ "AddD $dst, $src1, $src2 @addD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ add_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (SubD src1 src2)); ++ format %{ "SubD $dst, $src1, $src2 @subD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = as_FloatRegister($src1$$reg); ++ FloatRegister src2 = as_FloatRegister($src2$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sub_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negF_reg(regF dst, regF src) %{ ++ match(Set dst (NegF src)); ++ format %{ "negF $dst, $src @negF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct negD_reg(regD dst, regD src) %{ ++ match(Set dst (NegD src)); ++ format %{ "negD $dst, $src @negD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ neg_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{ ++ match(Set dst (MulF src1 src2)); ++ format %{ "MULF $dst, $src1, $src2 @mulF_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_s(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// Mul two double precision floating piont number ++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{ ++ match(Set dst (MulD src1 src2)); ++ format %{ "MULD $dst, $src1, $src2 @mulD_reg_reg" %} ++ ins_encode %{ ++ FloatRegister src1 = $src1$$FloatRegister; ++ FloatRegister src2 = $src2$$FloatRegister; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mul_d(dst, src1, src2); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct absF_reg(regF dst, regF src) %{ ++ match(Set dst (AbsF src)); ++ ins_cost(100); ++ format %{ "absF $dst, $src @absF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// intrinsics for math_native. ++// AbsD SqrtD CosD SinD TanD LogD Log10D ++ ++instruct absD_reg(regD dst, regD src) %{ ++ match(Set dst (AbsD src)); ++ ins_cost(100); ++ format %{ "absD $dst, $src @absD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ abs_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtD_reg(regD dst, regD src) %{ ++ match(Set dst (SqrtD src)); ++ ins_cost(100); ++ format %{ "SqrtD $dst, $src @sqrtD_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct sqrtF_reg(regF dst, regF src) %{ ++ match(Set dst (ConvD2F (SqrtD (ConvF2D src)))); ++ ins_cost(100); ++ format %{ "SqrtF $dst, $src @sqrtF_reg" %} ++ ins_encode %{ ++ FloatRegister src = as_FloatRegister($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ sqrt_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// src1 * src2 + src3 ++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary src1 src2))); ++ ++ format %{ "madd_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 + src3 ++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary src1 src2))); ++ ++ format %{ "madd_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary src1 src2))); ++ ++ format %{ "msub_s $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// src1 * src2 - src3 ++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary src1 src2))); ++ ++ format %{ "msub_d $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2))); ++ match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2)))); ++ ++ format %{ "nmadds $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 - src3 ++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2))); ++ match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2)))); ++ ++ format %{ "nmaddd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaF src3 (Binary (NegF src1) src2))); ++ match(Set dst (FmaF src3 (Binary src1 (NegF src2)))); ++ ++ format %{ "nmsubs $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++ ++// -src1 * src2 + src3 ++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{ ++ predicate(UseFMA); ++ match(Set dst (FmaD src3 (Binary (NegD src1) src2))); ++ match(Set dst (FmaD src3 (Binary src1 (NegD src2)))); ++ ++ format %{ "nmsubd $dst, $src3, $src2, $src1" %} ++ ++ ins_encode %{ ++ __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg), ++ as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg)); ++ %} ++ ++ ins_pipe(fpu_regF_regF); ++%} ++//----------------------------------Logical Instructions---------------------- ++//__________________________________Integer Logical Instructions------------- ++ ++//And Instuctions ++// And Register with Immediate ++instruct andI_Reg_immI(mRegI dst, mRegI src1, immI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_immI" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ move(AT, val); ++ __ andr(dst, src, AT); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1, immL_nonneg_mask mask) %{ ++ match(Set dst (AndL src1 mask)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int size = Assembler::is_jlong_mask($mask$$constant); ++ ++ __ dext(dst, src, 0, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1, immI_0_65535 src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1, immI_M1 M1) %{ ++ match(Set dst (XorI src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorI_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1, immI_M1 M1) %{ ++ match(Set dst (XorI (ConvL2I src1) M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (XorL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "xori $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ int val = $src2$$constant; ++ ++ __ xori(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1, immL_M1 M1) %{ ++ match(Set dst (XorL src1 M1)); ++ predicate(UseLEXT3); ++ ins_cost(60); ++ ++ format %{ "xor $dst, $src1, $M1 #@xorL_Reg_immL_M1" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ ++ __ gsorn(dst, R0, src); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct lbu_and_lmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI mask (LoadB mem))); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_lmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct lbu_and_rmask(mRegI dst, memory mem, immI_255 mask) %{ ++ match(Set dst (AndI (LoadB mem) mask)); ++ ins_cost(60); ++ ++ format %{ "lhu $dst, $mem #@lbu_and_rmask" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct andI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (AndI src1 src2)); ++ ++ format %{ "and $dst, $src1, $src2 #@andI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ andr(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_Reg_nReg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI src1 (XorI src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornI_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andnI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (AndI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct ornI_nReg_Reg(mRegI dst, mRegI src1, mRegI src2, immI_M1 M1) %{ ++ match(Set dst (OrI (XorI src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornI_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// And Long Register with Register ++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (AndL src1 src2)); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{ ++ match(Set dst (AndL src1 (ConvI2L src2))); ++ format %{ "AND $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ andr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (AndL src1 src2)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1, immL_0_65535 src2) %{ ++ match(Set dst (ConvL2I (AndL src1 src2))); ++ ins_cost(60); ++ ++ format %{ "and $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src1$$Register; ++ long val = $src2$$constant; ++ ++ __ andi(dst, src, val); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++/* ++instruct andnL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src1, $src2 #@andnL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_Reg_nReg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL src1 (XorL src2 M1))); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src1, $src2 #@ornL_Reg_nReg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src1, src2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct andnL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (AndL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "andn $dst, $src2, $src1 #@andnL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsandn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++/* ++instruct ornL_nReg_Reg(mRegL dst, mRegL src1, mRegL src2, immL_M1 M1) %{ ++ match(Set dst (OrL (XorL src1 M1) src2)); ++ predicate(UseLEXT3); ++ ++ format %{ "orn $dst, $src2, $src1 #@ornL_nReg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ __ gsorn(dst, src2, src1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++*/ ++ ++instruct andL_Reg_immL_M8(mRegL dst, immL_M8 M8) %{ ++ match(Set dst (AndL dst M8)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M8 #@andL_Reg_immL_M8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 3); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M5(mRegL dst, immL_M5 M5) %{ ++ match(Set dst (AndL dst M5)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M5 #@andL_Reg_immL_M5" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 2, 1); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M7(mRegL dst, immL_M7 M7) %{ ++ match(Set dst (AndL dst M7)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M7 #@andL_Reg_immL_M7" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 1, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M4(mRegL dst, immL_M4 M4) %{ ++ match(Set dst (AndL dst M4)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M4 #@andL_Reg_immL_M4" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 0, 2); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct andL_Reg_immL_M121(mRegL dst, immL_M121 M121) %{ ++ match(Set dst (AndL dst M121)); ++ ins_cost(60); ++ ++ format %{ "and $dst, $dst, $M121 #@andL_Reg_immL_M121" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ dins(dst, R0, 3, 4); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Long Register with Register ++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (OrL src1 src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{ ++ match(Set dst (OrL (CastP2X src1) src2)); ++ format %{ "OR $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %} ++ ins_encode %{ ++ Register dst_reg = $dst$$Register; ++ Register src1_reg = $src1$$Register; ++ Register src2_reg = $src2$$Register; ++ ++ __ orr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Long Register with Register ++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{ ++ match(Set dst (XorL src1 src2)); ++ format %{ "XOR $dst, $src1, $src2 @ xorL_Reg_Reg\t" %} ++ ins_encode %{ ++ Register dst_reg = as_Register($dst$$reg); ++ Register src1_reg = as_Register($src1$$reg); ++ Register src2_reg = as_Register($src2$$reg); ++ ++ __ xorr(dst_reg, src1_reg, src2_reg); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salL2I_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{ ++ match(Set dst (AndI (LShiftI src shift) mask)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_imm_and_M65536" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sll(dst, src, 16); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen)); ++ ++ format %{ "andi $dst, $src, 7\t# @land7_2_s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ andi(dst, src, 7); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16. ++// This idiom is used by the compiler the i2s bytecode. ++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen) ++%{ ++ match(Set dst (RShiftI (LShiftI src sixteen) sixteen)); ++ ++ format %{ "i2s $dst, $src\t# @i2s" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seh(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24. ++// This idiom is used by the compiler for the i2b bytecode. ++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour) ++%{ ++ match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour)); ++ ++ format %{ "i2b $dst, $src\t# @i2b" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ seb(dst, src); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++ ++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftI (ConvL2I src) shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_RegL2I_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shamt = $shift$$constant; ++ ++ __ sll(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Shift Left by 8-bit immediate ++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (LShiftI src shift)); ++ ++ format %{ "SHL $dst, $src, $shift #@salI_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shamt = $shift$$Register; ++ __ sllv(dst, src, shamt); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++// Shift Left Long ++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{ ++ match(Set dst (LShiftL (ConvI2L src) shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_RegI2L_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ if (__ is_simm(shamt, 5)) ++ __ dsll(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsll(dst_reg, src_reg, sa); ++ } else { ++ __ dsll32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Left Long ++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (LShiftL src shift)); ++ ins_cost(100); ++ format %{ "salL $dst, $src, $shift @ salL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsllv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long ++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_imm" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = ($shift$$constant & 0x3f); ++ if (__ is_simm(shamt, 5)) ++ __ dsra(dst_reg, src_reg, shamt); ++ else { ++ int sa = Assembler::low(shamt, 6); ++ if (sa < 32) { ++ __ dsra(dst_reg, src_reg, sa); ++ } else { ++ __ dsra32(dst_reg, src_reg, sa - 32); ++ } ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (RShiftL src shift))); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsra32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long arithmetically ++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (RShiftL src shift)); ++ ins_cost(100); ++ format %{ "sarL $dst, $src, $shift @ sarL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrav(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Shift Right Long logically ++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(100); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_Reg" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ ++ __ dsrlv(dst_reg, src_reg, $shift$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{ ++ match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int)); ++ ins_cost(80); ++ format %{ "dext $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dext(dst_reg, src_reg, shamt, 31); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl(dst_reg, src_reg, shamt); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL src shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{ ++ match(Set dst (ConvL2I (URShiftL src shift))); ++ predicate(n->in(1)->in(2)->get_int() > 32); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_Reg_immI_convL2I" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{ ++ match(Set dst (URShiftL (CastP2X src) shift)); ++ ins_cost(80); ++ format %{ "slrL $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %} ++ ins_encode %{ ++ Register src_reg = as_Register($src$$reg); ++ Register dst_reg = as_Register($dst$$reg); ++ int shamt = $shift$$constant; ++ ++ __ dsrl32(dst_reg, src_reg, shamt - 32); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// Xor Instructions ++// Xor Register with Register ++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (XorI src1 src2)); ++ ++ format %{ "XOR $dst, $src1, $src2 #@xorI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ xorr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Or Instructions ++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_imm" %} ++ ins_encode %{ ++ __ ori($dst$$Register, $src1$$Register, $src2$$constant); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++// Or Register with Register ++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{ ++ match(Set dst (OrI src1 src2)); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{ ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift))); ++ predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()))); ++ ++ format %{ "rotr $dst, $src, 1 ...\n\t" ++ "srl $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int rshift = $rshift$$constant; ++ ++ __ rotr(dst, src, 1); ++ if (rshift - 1) { ++ __ srl(dst, dst, rshift - 1); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{ ++ match(Set dst (OrI src1 (CastP2X src2))); ++ ++ format %{ "OR $dst, $src1, $src2 #@orI_Reg_castP2X" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ __ orr(dst, src1, src2); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right by 8-bit immediate ++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (URShiftI src shift)); ++ //effect(KILL cr); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ ++ __ srl(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{ ++ match(Set dst (AndI (URShiftI src shift) mask)); ++ ++ format %{ "ext $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int pos = $shift$$constant; ++ int size = Assembler::is_int_mask($mask$$constant); ++ ++ __ ext(dst, src, pos, size); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, dst, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rolL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)); ++ match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorI_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ rotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_0_31" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr(dst, src, sa); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift) ++%{ ++ predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)); ++ match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift))); ++ ++ ins_cost(100); ++ format %{ "rotr $dst, $src, $rshift #@rorL_Reg_immI_32_63" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ int sa = $rshift$$constant; ++ ++ __ drotr32(dst, src, sa - 32); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++// Logical Shift Right ++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (URShiftI src shift)); ++ ++ format %{ "SRL $dst, $src, $shift #@shr_logical_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srlv(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++ ++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_imm" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ int shift = $shift$$constant; ++ __ sra(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{ ++ match(Set dst (RShiftI src shift)); ++ // effect(KILL cr); ++ ++ format %{ "SRA $dst, $src, $shift #@shr_arith_Reg_Reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ Register shift = $shift$$Register; ++ __ srav(dst, src, shift); ++ %} ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++//----------Convert Int to Boolean--------------------------------------------- ++ ++instruct convI2B(mRegI dst, mRegI src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convI2B $dst, $src @ convI2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct convI2L_reg( mRegL dst, mRegI src) %{ ++ match(Set dst (ConvI2L src)); ++ ++ ins_cost(100); ++ format %{ "SLL $dst, $src @ convI2L_reg\t" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if(dst != src) __ sll(dst, src, 0); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convL2I_reg( mRegI dst, mRegL src ) %{ ++ match(Set dst (ConvL2I src)); ++ ++ format %{ "MOV $dst, $src @ convL2I_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{ ++ match(Set dst (ConvI2L (ConvL2I src))); ++ ++ format %{ "sll $dst, $src, 0 @ convL2I2L_reg" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ __ sll(dst, src, 0); ++ %} ++ ++ ins_pipe( ialu_regI_regI ); ++%} ++ ++instruct convL2D_reg( regD dst, mRegL src ) %{ ++ match(Set dst (ConvL2D src)); ++ format %{ "convL2D $dst, $src @ convL2D_reg" %} ++ ins_encode %{ ++ Register src = as_Register($src$$reg); ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(src, dst); ++ __ cvt_d_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_fast( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(150); ++ format %{ "convD2L $dst, $src @ convD2L_reg_fast" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label Done; ++ ++ __ trunc_l_d(F30, src); ++ // max_long: 0x7fffffffffffffff ++ // __ set64(AT, 0x7fffffffffffffff); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2L_reg_slow( mRegL dst, regD src ) %{ ++ match(Set dst (ConvD2L src)); ++ ins_cost(250); ++ format %{ "convD2L $dst, $src @ convD2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister src = as_FloatRegister($src$$reg); ++ ++ Label L; ++ ++ __ c_un_d(src, src); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2I_reg_fast( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(150); ++ format %{ "convf2i $dst, $src @ convF2I_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_w_s(F30, fval); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++ ++instruct convF2I_reg_slow( mRegI dst, regF src ) %{ ++ match(Set dst (ConvF2I src)); ++ ins_cost(250); ++ format %{ "convf2i $dst, $src @ convF2I_reg_slow" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dreg, R0); ++ ++ __ trunc_w_s(F30, fval); ++ ++ /* Call SharedRuntime:f2i() to do valid convention */ ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dreg, F30); ++ ++ __ mov_s(F12, fval); ++ ++ //This bug was found when running ezDS's control-panel. ++ // J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74 ++ // ++ // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE. ++ // V0 is corrupted during call_VM_leaf(), and should be preserved. ++ // ++ __ push(fval); ++ if(dreg != V0) { ++ __ push(V0); ++ } ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1); ++ if(dreg != V0) { ++ __ move(dreg, V0); ++ __ pop(V0); ++ } ++ __ pop(fval); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_fast( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(150); ++ format %{ "convf2l $dst, $src @ convF2L_reg_fast" %} ++ ins_encode %{ ++ Register dreg = $dst$$Register; ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ trunc_l_s(F30, fval); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(dreg, F30); ++ __ c_un_s(fval, fval); //NaN? ++ __ movt(dreg, R0); ++ ++ __ bne(AT, dreg, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, fval); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(dreg, T9, AT); ++ ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convF2L_reg_slow( mRegL dst, regF src ) %{ ++ match(Set dst (ConvF2L src)); ++ ins_cost(250); ++ format %{ "convf2l $dst, $src @ convF2L_reg_slow" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ FloatRegister fval = $src$$FloatRegister; ++ Label L; ++ ++ __ c_un_s(fval, fval); //NaN? ++ __ bc1t(L); ++ __ delayed(); ++ __ move(dst, R0); ++ ++ __ trunc_l_s(F30, fval); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->dmfc1(dst, F30); ++ ++ __ mov_s(F12, fval); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convL2F_reg( regF dst, mRegL src ) %{ ++ match(Set dst (ConvL2F src)); ++ format %{ "convl2f $dst, $src @ convL2F_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ Register src = as_Register($src$$reg); ++ Label L; ++ ++ __ dmtc1(src, dst); ++ __ cvt_s_l(dst, dst); ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convI2F_reg( regF dst, mRegI src ) %{ ++ match(Set dst (ConvI2F src)); ++ format %{ "convi2f $dst, $src @ convI2F_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(src, dst); ++ __ cvt_s_w(dst, dst); ++ %} ++ ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{ ++ match(Set dst (CmpLTMask p zero)); ++ ins_cost(100); ++ ++ format %{ "sra $dst, $p, 31 @ cmpLTMask_immI_0" %} ++ ins_encode %{ ++ Register src = $p$$Register; ++ Register dst = $dst$$Register; ++ ++ __ sra(dst, src, 31); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{ ++ match(Set dst (CmpLTMask p q)); ++ ins_cost(400); ++ ++ format %{ "cmpLTMask $dst, $p, $q @ cmpLTMask" %} ++ ins_encode %{ ++ Register p = $p$$Register; ++ Register q = $q$$Register; ++ Register dst = $dst$$Register; ++ ++ __ slt(dst, p, q); ++ __ subu(dst, R0, dst); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct convP2B(mRegI dst, mRegP src) %{ ++ match(Set dst (Conv2B src)); ++ ++ ins_cost(100); ++ format %{ "convP2B $dst, $src @ convP2B" %} ++ ins_encode %{ ++ Register dst = as_Register($dst$$reg); ++ Register src = as_Register($src$$reg); ++ ++ if (dst != src) { ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, src); ++ } else { ++ __ move(AT, src); ++ __ daddiu(dst, R0, 1); ++ __ movz(dst, R0, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++ ++instruct convI2D_reg_reg(regD dst, mRegI src) %{ ++ match(Set dst (ConvI2D src)); ++ format %{ "conI2D $dst, $src @convI2D_reg" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ FloatRegister dst = $dst$$FloatRegister; ++ __ mtc1(src, dst); ++ __ cvt_d_w(dst, dst); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convF2D_reg_reg(regD dst, regF src) %{ ++ match(Set dst (ConvF2D src)); ++ format %{ "convF2D $dst, $src\t# @convF2D_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_d_s(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct convD2F_reg_reg(regF dst, regD src) %{ ++ match(Set dst (ConvD2F src)); ++ format %{ "convD2F $dst, $src\t# @convD2F_reg_reg" %} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ FloatRegister src = $src$$FloatRegister; ++ ++ __ cvt_s_d(dst, src); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++ ++// Convert a double to an int. If the double is a NAN, stuff a zero in instead. ++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(150); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ ++ Label Done; ++ ++ __ trunc_w_d(F30, src); ++ // max_int: 2147483647 ++ __ move(AT, 0x7fffffff); ++ __ mfc1(dst, F30); ++ ++ __ bne(dst, AT, Done); ++ __ delayed()->mtc1(R0, F30); ++ ++ __ cvt_d_w(F30, F30); ++ __ c_ult_d(src, F30); ++ __ bc1f(Done); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(src, src); //NaN? ++ __ subu32(dst, T9, AT); ++ __ movt(dst, R0); ++ ++ __ bind(Done); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++ ++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{ ++ match(Set dst (ConvD2I src)); ++ ++ ins_cost(250); ++ format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %} ++ ++ ins_encode %{ ++ FloatRegister src = $src$$FloatRegister; ++ Register dst = $dst$$Register; ++ Label L; ++ ++ __ trunc_w_d(F30, src); ++ __ cfc1(AT, 31); ++ __ li(T9, 0x10000); ++ __ andr(AT, AT, T9); ++ __ beq(AT, R0, L); ++ __ delayed()->mfc1(dst, F30); ++ ++ __ mov_d(F12, src); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1); ++ __ move(dst, V0); ++ __ bind(L); ++ ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Convert oop pointer into compressed form ++instruct encodeHeapOop(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop $dst,$src" %} ++ ins_encode %{ ++ Register src = $src$$Register; ++ Register dst = $dst$$Register; ++ ++ __ encode_heap_oop(dst, src); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{ ++ predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull); ++ match(Set dst (EncodeP src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %} ++ ins_encode %{ ++ __ encode_heap_oop_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull && ++ n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ ++ __ decode_heap_oop(d, s); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{ ++ predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull || ++ n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant); ++ match(Set dst (DecodeN src)); ++ format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_heap_oop_not_null(d, s); ++ } else { ++ __ decode_heap_oop_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{ ++ match(Set dst (EncodePKlass src)); ++ format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %} ++ ins_encode %{ ++ __ encode_klass_not_null($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{ ++ match(Set dst (DecodeNKlass src)); ++ format %{ "decode_heap_klass_not_null $dst,$src" %} ++ ins_encode %{ ++ Register s = $src$$Register; ++ Register d = $dst$$Register; ++ if (s != d) { ++ __ decode_klass_not_null(d, s); ++ } else { ++ __ decode_klass_not_null(d); ++ } ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++//FIXME ++instruct tlsLoadP(mRegP dst) %{ ++ match(Set dst (ThreadLocal)); ++ ++ ins_cost(0); ++ format %{ " get_thread in $dst #@tlsLoadP" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++#ifdef OPT_THREAD ++ __ move(dst, TREG); ++#else ++ __ get_thread(dst); ++#endif ++ %} ++ ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct checkCastPP( mRegP dst ) %{ ++ match(Set dst (CheckCastPP dst)); ++ ++ format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_pipe( empty ); ++%} ++ ++instruct castPP(mRegP dst) ++%{ ++ match(Set dst (CastPP dst)); ++ ++ size(0); ++ format %{ "# castPP of $dst" %} ++ ins_encode(/* empty encoding */); ++ ins_pipe(empty); ++%} ++ ++instruct castII( mRegI dst ) %{ ++ match(Set dst (CastII dst)); ++ format %{ "#castII of $dst empty encoding" %} ++ ins_encode( /*empty encoding*/ ); ++ ins_cost(0); ++ ins_pipe( empty ); ++%} ++ ++// Return Instruction ++// Remove the return address & jump to it. ++instruct Ret() %{ ++ match(Return); ++ format %{ "RET #@Ret" %} ++ ++ ins_encode %{ ++ __ jr(RA); ++ __ delayed()->nop(); ++ %} ++ ++ ins_pipe( pipe_jump ); ++%} ++ ++/* ++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported. ++instruct jumpXtnd(mRegL switch_val) %{ ++ match(Jump switch_val); ++ ++ ins_cost(350); ++ ++ format %{ "load T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t" ++ "jr T9\n\t" ++ "nop" %} ++ ins_encode %{ ++ Register table_base = $constanttablebase; ++ int con_offset = $constantoffset; ++ Register switch_reg = $switch_val$$Register; ++ ++ if (UseLEXT1) { ++ if (Assembler::is_simm(con_offset, 8)) { ++ __ gsldx(T9, table_base, switch_reg, con_offset); ++ } else if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ gsldx(T9, AT, T9, 0); ++ } ++ } else { ++ if (Assembler::is_simm16(con_offset)) { ++ __ daddu(T9, table_base, switch_reg); ++ __ ld(T9, T9, con_offset); ++ } else { ++ __ move(T9, con_offset); ++ __ daddu(AT, table_base, switch_reg); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ } ++ } ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ %} ++ ins_pipe(pipe_jump); ++%} ++*/ ++ ++ ++// Tail Jump; remove the return address; jump to target. ++// TailCall above leaves the return address around. ++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2). ++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a ++// "restore" before this instruction (in Epilogue), we need to materialize it ++// in %i0. ++//FIXME ++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{ ++ match( TailJump jump_target ex_oop ); ++ ins_cost(200); ++ format %{ "Jmp $jump_target ; ex_oop = $ex_oop #@tailjmpInd" %} ++ ins_encode %{ ++ Register target = $jump_target$$Register; ++ ++ // V0, V1 are indicated in: ++ // [stubGenerator_mips.cpp] generate_forward_exception() ++ // [runtime_mips.cpp] OptoRuntime::generate_exception_blob() ++ // ++ Register oop = $ex_oop$$Register; ++ Register exception_oop = V0; ++ Register exception_pc = V1; ++ ++ __ move(exception_pc, RA); ++ __ move(exception_oop, oop); ++ ++ __ jr(target); ++ __ delayed()->nop(); ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++// ============================================================================ ++// Procedure Call/Return Instructions ++// Call Java Static Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallStaticJavaDirect(method meth) %{ ++ match(CallStaticJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL,static #@CallStaticJavaDirect " %} ++ ins_encode( Java_Static_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Call Java Dynamic Instruction ++// Note: If this code changes, the corresponding ret_addr_offset() and ++// compute_padding() functions will have to be adjusted. ++instruct CallDynamicJavaDirect(method meth) %{ ++ match(CallDynamicJava); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t" ++ "CallDynamic @ CallDynamicJavaDirect" %} ++ ins_encode( Java_Dynamic_Call( meth ) ); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++instruct CallLeafNoFPDirect(method meth) %{ ++ match(CallLeafNoFP); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF_NOFP,runtime " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Prefetch instructions for allocation. ++ ++instruct prefetchAllocNTA( memory mem ) %{ ++ match(PrefetchAllocation mem); ++ ins_cost(125); ++ format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE); ++ %} ++ ins_pipe(pipe_slow); ++%} ++ ++ ++// Call runtime without safepoint ++instruct CallLeafDirect(method meth) %{ ++ match(CallLeaf); ++ effect(USE meth); ++ ++ ins_cost(300); ++ format %{ "CALL_LEAF,runtime #@CallLeafDirect " %} ++ ins_encode(Java_To_Runtime(meth)); ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++ ins_alignment(16); ++%} ++ ++// Load Char (16bit unsigned) ++instruct loadUS(mRegI dst, memory mem) %{ ++ match(Set dst (LoadUS mem)); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadC" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct loadUS_convI2L(mRegL dst, memory mem) %{ ++ match(Set dst (ConvI2L (LoadUS mem))); ++ ++ ins_cost(125); ++ format %{ "loadUS $dst,$mem @ loadUS_convI2L" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Store Char (16bit unsigned) ++instruct storeC(memory mem, mRegI src) %{ ++ match(Set mem (StoreC mem src)); ++ ++ ins_cost(125); ++ format %{ "storeC $src, $mem @ storeC" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeC_0(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreC mem zero)); ++ ++ ins_cost(125); ++ format %{ "storeC $zero, $mem @ storeC_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct loadConF_immF_0(regF dst, immF_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConF_immF_0\n"%} ++ ins_encode %{ ++ FloatRegister dst = $dst$$FloatRegister; ++ ++ __ mtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConF(regF dst, immF src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "lwc1 $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ lwc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++ ++instruct loadConD_immD_0(regD dst, immD_0 zero) %{ ++ match(Set dst zero); ++ ins_cost(100); ++ ++ format %{ "mov $dst, zero @ loadConD_immD_0"%} ++ ins_encode %{ ++ FloatRegister dst = as_FloatRegister($dst$$reg); ++ ++ __ dmtc1(R0, dst); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++instruct loadConD(regD dst, immD src) %{ ++ match(Set dst src); ++ ins_cost(125); ++ ++ format %{ "ldc1 $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %} ++ ins_encode %{ ++ int con_offset = $constantoffset($src); ++ ++ if (Assembler::is_simm16(con_offset)) { ++ __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset); ++ } else { ++ __ set64(AT, con_offset); ++ if (UseLEXT1) { ++ __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0); ++ } else { ++ __ daddu(AT, $constanttablebase, AT); ++ __ ldc1($dst$$FloatRegister, AT, 0); ++ } ++ } ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store register Float value (it is faster than store from FPU register) ++instruct storeF_reg( memory mem, regF src) %{ ++ match(Set mem (StoreF mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeF_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeF_immF_0( memory mem, immF_0 zero) %{ ++ match(Set mem (StoreF mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeF_immF_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Load Double ++instruct loadD(regD dst, memory mem) %{ ++ match(Set dst (LoadD mem)); ++ ++ ins_cost(150); ++ format %{ "loadD $dst, $mem #@loadD" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++// Load Double - UNaligned ++instruct loadD_unaligned(regD dst, memory mem ) %{ ++ match(Set dst (LoadD_unaligned mem)); ++ ins_cost(250); ++ // FIXME: Need more effective ldl/ldr ++ format %{ "loadD_unaligned $dst, $mem #@loadD_unaligned" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++instruct storeD_reg( memory mem, regD src) %{ ++ match(Set mem (StoreD mem src)); ++ ++ ins_cost(50); ++ format %{ "store $mem, $src\t# store float @ storeD_reg" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct storeD_immD_0( memory mem, immD_0 zero) %{ ++ match(Set mem (StoreD mem zero)); ++ ++ ins_cost(40); ++ format %{ "store $mem, zero\t# store float @ storeD_immD_0" %} ++ ins_encode %{ ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct loadSSI(mRegI dst, stackSlotI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lw $dst, $src\t# int stk @ loadSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !"); ++ __ lw($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSI(stackSlotI dst, mRegI src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sw $dst, $src\t# int stk @ storeSSI" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !"); ++ __ sw($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSL(mRegL dst, stackSlotL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# long stk @ loadSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSL(stackSlotL dst, mRegL src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# long stk @ storeSSL" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSP(mRegP dst, stackSlotP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ld $dst, $src\t# ptr stk @ loadSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !"); ++ __ ld($dst$$Register, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSP(stackSlotP dst, mRegP src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sd $dst, $src\t# ptr stk @ storeSSP" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !"); ++ __ sd($src$$Register, SP, $dst$$disp); ++ %} ++ ins_pipe(ialu_storeI); ++%} ++ ++instruct loadSSF(regF dst, stackSlotF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "lwc1 $dst, $src\t# float stk @ loadSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !"); ++ __ lwc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSF(stackSlotF dst, regF src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "swc1 $dst, $src\t# float stk @ storeSSF" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !"); ++ __ swc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++// Use the same format since predicate() can not be used here. ++instruct loadSSD(regD dst, stackSlotD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(125); ++ format %{ "ldc1 $dst, $src\t# double stk @ loadSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !"); ++ __ ldc1($dst$$FloatRegister, SP, $src$$disp); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct storeSSD(stackSlotD dst, regD src) ++%{ ++ match(Set dst src); ++ ++ ins_cost(100); ++ format %{ "sdc1 $dst, $src\t# double stk @ storeSSD" %} ++ ins_encode %{ ++ guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !"); ++ __ sdc1($src$$FloatRegister, SP, $dst$$disp); ++ %} ++ ins_pipe(fpu_storeF); ++%} ++ ++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastLock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %} ++ ins_encode %{ ++ __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{ ++ match(Set cr (FastUnlock object box)); ++ effect(TEMP tmp, TEMP scr); ++ ins_cost(300); ++ format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %} ++ ins_encode %{ ++ __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register); ++ %} ++ ++ ins_pipe( pipe_slow ); ++ ins_pc_relative(1); ++%} ++ ++// Store CMS card-mark Immediate 0 ++instruct storeImmCM(memory mem, immI_0 zero) %{ ++ match(Set mem (StoreCM mem zero)); ++ ++ ins_cost(150); ++ format %{ "MEMBAR\n\t" ++ "sb $mem, zero\t! CMS card-mark imm0" %} ++ ins_encode %{ ++ __ sync(); ++ __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE); ++ %} ++ ins_pipe( ialu_storeI ); ++%} ++ ++// Die now ++instruct ShouldNotReachHere( ) ++%{ ++ match(Halt); ++ ins_cost(300); ++ ++ // Use the following format syntax ++ format %{ "ILLTRAP ;#@ShouldNotReachHere" %} ++ ins_encode %{ ++ // Here we should emit illtrap ! ++ ++ __ stop("in ShoudNotReachHere"); ++ ++ %} ++ ins_pipe( pipe_jump ); ++%} ++ ++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem) ++%{ ++ predicate(Universe::narrow_oop_shift() == 0); ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ int disp = $mem$$disp; ++ ++ __ daddiu(dst, base, disp); ++ %} ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ PosIdxScaleOff8" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ if (scale == 0) { ++ __ daddu(AT, base, index); ++ __ daddiu(dst, AT, disp); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(AT, base, AT); ++ __ daddiu(dst, AT, disp); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++instruct leaPIdxScale(mRegP dst, indIndexScale mem) ++%{ ++ match(Set dst mem); ++ ++ ins_cost(110); ++ format %{ "leaq $dst, $mem\t# @ leaPIdxScale" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register base = as_Register($mem$$base); ++ Register index = as_Register($mem$$index); ++ int scale = $mem$$scale; ++ ++ if (scale == 0) { ++ __ daddu(dst, base, index); ++ } else { ++ __ dsll(AT, index, scale); ++ __ daddu(dst, base, AT); ++ } ++ %} ++ ++ ins_pipe( ialu_regI_imm16 ); ++%} ++ ++ ++// ============================================================================ ++// The 2nd slow-half of a subtype check. Scan the subklass's 2ndary superklass ++// array for an instance of the superklass. Set a hidden internal cache on a ++// hit (cache is checked with exposed code in gen_subtype_check()). Return ++// NZ for a miss or zero for a hit. The encoding ALSO sets flags. ++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{ ++ match(Set result (PartialSubtypeCheck sub super)); ++ effect(KILL tmp); ++ ins_cost(1100); // slightly larger than the next version ++ format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %} ++ ++ ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) ); ++ ins_pipe( pipe_slow ); ++%} ++ ++// Conditional-store of the updated heap-top. ++// Used during allocation of the shared heap. ++ ++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{ ++ match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval))); ++ ++ format %{ "move AT, $newval\n\t" ++ "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t" ++ "move $cr, AT\n" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp); ++ ++ int index = $heap_top_ptr$$index; ++ int scale = $heap_top_ptr$$scale; ++ int disp = $heap_top_ptr$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storePConditional: index != 0"); ++ } else { ++ __ move(AT, newval); ++ __ scd(AT, addr); ++ __ move($cr$$Register, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of an int value. ++// AT flag is set on success, reset otherwise. ++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{ ++ match(Set cr (StoreIConditional mem (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, $mem, $oldval \t# @storeIConditional" %} ++ ++ ins_encode %{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg32(addr, oldval, newval, cr, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(cr, AT); ++ } ++ } ++%} ++ ++ ins_pipe(long_memory_op); ++%} ++ ++// Conditional-store of a long value. ++// ZF flag is set on success, reset otherwise. Implemented with a CMPXCHG. ++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr) ++%{ ++ match(Set cr (StoreLConditional mem (Binary oldval newval))); ++ ++ format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %} ++ ins_encode%{ ++ Register oldval = $oldval$$Register; ++ Register newval = $newval$$Register; ++ Register cr = $cr$$Register; ++ Address addr(as_Register($mem$$base), $mem$$disp); ++ ++ int index = $mem$$index; ++ int scale = $mem$$scale; ++ int disp = $mem$$disp; ++ ++ guarantee(Assembler::is_simm16(disp), ""); ++ ++ if (index != 0) { ++ __ stop("in storeIConditional: index != 0"); ++ } else { ++ if (cr != addr.base() && cr != oldval && cr != newval) { ++ __ cmpxchg(addr, oldval, newval, cr, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(cr, AT); ++ } ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++// Implement LoadPLocked. Must be ordered against changes of the memory location ++// by storePConditional. ++instruct loadPLocked(mRegP dst, memory mem) %{ ++ match(Set dst (LoadPLocked mem)); ++ ins_cost(MEMORY_REF_COST); ++ ++ format %{ "lld $dst, $mem #@loadPLocked\n\t" %} ++ size(12); ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG); ++ %} ++ ins_pipe( ialu_loadI ); ++%} ++ ++ ++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{ ++ match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, true, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, true, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{ ++ predicate(VM_Version::supports_cx8()); ++ match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{ ++ match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg(addr, oldval, newval, res, false, true); ++ } else { ++ __ cmpxchg(addr, oldval, newval, AT, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{ ++ match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval))); ++ format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %} ++ ins_encode %{ ++ Register newval = $newval$$Register; ++ Register oldval = $oldval$$Register; ++ Register res = $res$$Register; ++ Address addr($mem_ptr$$Register, 0); ++ ++ if (res != addr.base() && res != oldval && res != newval) { ++ __ cmpxchg32(addr, oldval, newval, res, false, false, true); ++ } else { ++ __ cmpxchg32(addr, oldval, newval, AT, false, false, true); ++ __ move(res, AT); ++ } ++ %} ++ ins_pipe(long_memory_op); ++%} ++ ++//----------Max and Min-------------------------------------------------------- ++// Min Instructions ++//// ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for min ++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVlt $op2,$op1\t! min" %} ++// opcode(0x4C,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++//// Min Register with Register (P6 version) ++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MinI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_lt(op2,op1,cr); ++// %} ++//%} ++ ++// Min Register with Register (generic version) ++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MinI dst src)); ++ //effect(KILL flags); ++ ins_cost(80); ++ ++ format %{ "MIN $dst, $src @minI_Reg_Reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, src, dst); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++// Max Register with Register ++// *** Min and Max using the conditional move are slower than the ++// *** branch version on a Pentium III. ++// // Conditional move for max ++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{ ++// effect( USE_DEF op2, USE op1, USE cr ); ++// format %{ "CMOVgt $op2,$op1\t! max" %} ++// opcode(0x4F,0x0F); ++// ins_encode( OpcS, OpcP, RegReg( op2, op1 ) ); ++// ins_pipe( pipe_cmov_reg ); ++//%} ++// ++// // Max Register with Register (P6 version) ++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{ ++// predicate(VM_Version::supports_cmov() ); ++// match(Set op2 (MaxI op1 op2)); ++// ins_cost(200); ++// expand %{ ++// eFlagsReg cr; ++// compI_eReg(cr,op1,op2); ++// cmovI_reg_gt(op2,op1,cr); ++// %} ++//%} ++ ++// Max Register with Register (generic version) ++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{ ++ match(Set dst (MaxI dst src)); ++ ins_cost(80); ++ ++ format %{ "MAX $dst, $src @maxI_Reg_Reg" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ slt(AT, dst, src); ++ __ movn(dst, src, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{ ++ match(Set dst (MaxI dst zero)); ++ ins_cost(50); ++ ++ format %{ "MAX $dst, 0 @maxI_Reg_zero" %} ++ ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ ++ __ slt(AT, dst, R0); ++ __ movn(dst, R0, AT); ++ ++ %} ++ ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL src mask)); ++ ++ format %{ "movl $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32) ++%{ ++ match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32))); ++ ++ format %{ "combine_i2l $dst, $src2(H), $src1(L) @ combine_i2l" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src1 = $src1$$Register; ++ Register src2 = $src2$$Register; ++ ++ if (src1 == dst) { ++ __ dinsu(dst, src2, 32, 32); ++ } else if (src2 == dst) { ++ __ dsll32(dst, dst, 0); ++ __ dins(dst, src1, 0, 32); ++ } else { ++ __ dext(dst, src1, 0, 32); ++ __ dinsu(dst, src2, 32, 32); ++ } ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Zero-extend convert int to long ++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L src) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask) ++%{ ++ match(Set dst (AndL (ConvI2L (ConvL2I src)) mask)); ++ ++ format %{ "movl $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %} ++ ins_encode %{ ++ Register dst = $dst$$Register; ++ Register src = $src$$Register; ++ ++ __ dext(dst, src, 0, 32); ++ %} ++ ins_pipe(ialu_regI_regI); ++%} ++ ++// Match loading integer and casting it to unsigned int in long register. ++// LoadI + ConvI2L + AndL 0xffffffff. ++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL (ConvI2L (LoadI mem)) mask)); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{ ++ match(Set dst (AndL mask (ConvI2L (LoadI mem)))); ++ ++ format %{ "lwu $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %} ++ ins_encode %{ ++ relocInfo::relocType disp_reloc = $mem->disp_reloc(); ++ assert(disp_reloc == relocInfo::none, "cannot have disp"); ++ __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT); ++ %} ++ ins_pipe(ialu_loadI); ++%} ++ ++ ++// ============================================================================ ++// Safepoint Instruction ++ ++instruct safePoint_poll() %{ ++ predicate(SafepointMechanism::uses_global_page_poll()); ++ match(SafePoint); ++ ++ ins_cost(105); ++ format %{ "poll for GC @ safePoint_poll" %} ++ ++ ins_encode %{ ++ __ block_comment("Safepoint:"); ++ __ set64(T9, (long)os::get_polling_page()); ++ __ relocate(relocInfo::poll_type); ++ __ lw(AT, T9, 0); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++instruct safePoint_poll_tls(mRegP poll) %{ ++ match(SafePoint poll); ++ predicate(SafepointMechanism::uses_thread_local_poll()); ++ effect(USE poll); ++ ++ ins_cost(125); ++ format %{ "lw AT, [$poll]\t" ++ "Safepoint @ [$poll] : poll for GC" %} ++ size(4); ++ ins_encode %{ ++ Register poll_reg = $poll$$Register; ++ ++ __ block_comment("Safepoint:"); ++ __ relocate(relocInfo::poll_type); ++ address pre_pc = __ pc(); ++ __ lw(AT, poll_reg, 0); ++ assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]"); ++ %} ++ ++ ins_pipe( ialu_storeI ); ++%} ++ ++//----------Arithmetic Conversion Instructions--------------------------------- ++ ++instruct roundFloat_nop(regF dst) ++%{ ++ match(Set dst (RoundFloat dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++instruct roundDouble_nop(regD dst) ++%{ ++ match(Set dst (RoundDouble dst)); ++ ++ ins_cost(0); ++ ins_encode(); ++ ins_pipe(empty); ++%} ++ ++//---------- Zeros Count Instructions ------------------------------------------ ++// CountLeadingZerosINode CountTrailingZerosINode ++instruct countLeadingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosI src)); ++ ++ format %{ "clz $dst, $src\t# count leading zeros (int)" %} ++ ins_encode %{ ++ __ clz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countLeadingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountLeadingZerosInstructionMIPS64); ++ match(Set dst (CountLeadingZerosL src)); ++ ++ format %{ "dclz $dst, $src\t# count leading zeros (long)" %} ++ ins_encode %{ ++ __ dclz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosI(mRegI dst, mRegI src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosI src)); ++ ++ format %{ "ctz $dst, $src\t# count trailing zeros (int)" %} ++ ins_encode %{ ++ // ctz and dctz is gs instructions. ++ __ ctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++instruct countTrailingZerosL(mRegI dst, mRegL src) %{ ++ predicate(UseCountTrailingZerosInstructionMIPS64); ++ match(Set dst (CountTrailingZerosL src)); ++ ++ format %{ "dcto $dst, $src\t# count trailing zeros (long)" %} ++ ins_encode %{ ++ __ dctz($dst$$Register, $src$$Register); ++ %} ++ ins_pipe( ialu_regL_regL ); ++%} ++ ++// ====================VECTOR INSTRUCTIONS===================================== ++ ++// Load vectors (8 bytes long) ++instruct loadV8(vecD dst, memory mem) %{ ++ predicate(n->as_LoadVector()->memory_size() == 8); ++ match(Set dst (LoadVector mem)); ++ ins_cost(125); ++ format %{ "load $dst, $mem\t! load vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE); ++ %} ++ ins_pipe( fpu_loadF ); ++%} ++ ++// Store vectors (8 bytes long) ++instruct storeV8(memory mem, vecD src) %{ ++ predicate(n->as_StoreVector()->memory_size() == 8); ++ match(Set mem (StoreVector mem src)); ++ ins_cost(145); ++ format %{ "store $mem, $src\t! store vector (8 bytes)" %} ++ ins_encode %{ ++ __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE); ++ %} ++ ins_pipe( fpu_storeF ); ++%} ++ ++instruct Repl8B_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3); ++ match(Set dst (ReplicateB src)); ++ ins_cost(100); ++ format %{ "replv_ob AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ replv_ob(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB src)); ++ ins_cost(140); ++ format %{ "move AT, $src\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate8B" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateB con)); ++ ins_cost(110); ++ format %{ "repl_ob AT, [$con]\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ repl_ob(AT, val); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB con)); ++ ins_cost(150); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 8, 8\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 8, 8); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB zero)); ++ ins_cost(90); ++ format %{ "dmtc1 R0, $dst\t! replicate8B zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 8); ++ match(Set dst (ReplicateB M1)); ++ ins_cost(80); ++ format %{ "dmtc1 -1, $dst\t! replicate8B -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_DSP(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS src)); ++ ins_cost(100); ++ format %{ "replv_qh AT, $src\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ replv_qh(AT, $src$$Register); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS src)); ++ ins_cost(120); ++ format %{ "move AT, $src \n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S" %} ++ ins_encode %{ ++ __ move(AT, $src$$Register); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm_DSP(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp()); ++ match(Set dst (ReplicateS con)); ++ ins_cost(100); ++ format %{ "repl_qh AT, [$con]\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ if ( Assembler::is_simm(val, 10)) { ++ //repl_qh supports 10 bits immediate ++ __ repl_qh(AT, val); ++ } else { ++ __ li32(AT, val); ++ __ replv_qh(AT, AT); ++ } ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_imm(vecD dst, immI con) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS con)); ++ ins_cost(110); ++ format %{ "move AT, [$con]\n\t" ++ "dins AT, AT, 16, 16\n\t" ++ "dinsu AT, AT, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate4S($con)" %} ++ ins_encode %{ ++ __ move(AT, $con$$constant); ++ __ dins(AT, AT, 16, 16); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate4S zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 4); ++ match(Set dst (ReplicateS M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate4S -1" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar to be vector ++instruct Repl2I(vecD dst, mRegI src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI src)); ++ format %{ "dins AT, $src, 0, 32\n\t" ++ "dinsu AT, $src, 32, 32\n\t" ++ "dmtc1 AT, $dst\t! replicate2I" %} ++ ins_encode %{ ++ __ dins(AT, $src$$Register, 0, 32); ++ __ dinsu(AT, $src$$Register, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table. ++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI con)); ++ effect(KILL tmp); ++ format %{ "li32 AT, [$con], 32\n\t" ++ "dinsu AT, AT\n\t" ++ "dmtc1 AT, $dst\t! replicate2I($con)" %} ++ ins_encode %{ ++ int val = $con$$constant; ++ __ li32(AT, val); ++ __ dinsu(AT, AT, 32, 32); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar zero to be vector ++instruct Repl2I_zero(vecD dst, immI_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2I zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate integer (4 byte) scalar -1 to be vector ++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateI M1)); ++ format %{ "dmtc1 -1, $dst\t! replicate2I -1, use AT" %} ++ ins_encode %{ ++ __ nor(AT, R0, R0); ++ __ dmtc1(AT, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++// Replicate float (4 byte) scalar to be vector ++instruct Repl2F(vecD dst, regF src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF src)); ++ format %{ "cvt.ps $dst, $src, $src\t! replicate2F" %} ++ ins_encode %{ ++ __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++// Replicate float (4 byte) scalar zero to be vector ++instruct Repl2F_zero(vecD dst, immF_0 zero) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (ReplicateF zero)); ++ format %{ "dmtc1 R0, $dst\t! replicate2F zero" %} ++ ins_encode %{ ++ __ dmtc1(R0, $dst$$FloatRegister); ++ %} ++ ins_pipe( pipe_mtc1 ); ++%} ++ ++ ++// ====================VECTOR ARITHMETIC======================================= ++ ++// --------------------------------- ADD -------------------------------------- ++ ++// Floats vector add ++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled. ++instruct vadd2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF dst src)); ++ format %{ "add.ps $dst,$src\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( pipe_slow ); ++%} ++ ++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (AddVF src1 src2)); ++ format %{ "add.ps $dst,$src1,$src2\t! add packed2F" %} ++ ins_encode %{ ++ __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- SUB -------------------------------------- ++ ++// Floats vector sub ++instruct vsub2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (SubVF dst src)); ++ format %{ "sub.ps $dst,$src\t! sub packed2F" %} ++ ins_encode %{ ++ __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- MUL -------------------------------------- ++ ++// Floats vector mul ++instruct vmul2F(vecD dst, vecD src) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF dst src)); ++ format %{ "mul.ps $dst, $src\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{ ++ predicate(n->as_Vector()->length() == 2); ++ match(Set dst (MulVF src1 src2)); ++ format %{ "mul.ps $dst, $src1, $src2\t! mul packed2F" %} ++ ins_encode %{ ++ __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++ %} ++ ins_pipe( fpu_regF_regF ); ++%} ++ ++// --------------------------------- DIV -------------------------------------- ++// MIPS do not have div.ps ++ ++// --------------------------------- MADD -------------------------------------- ++// Floats vector madd ++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{ ++// predicate(n->as_Vector()->length() == 2); ++// match(Set dst (AddVF (MulVF src1 src2) src3)); ++// ins_cost(50); ++// format %{ "madd.ps $dst, $src3, $src1, $src2\t! madd packed2F" %} ++// ins_encode %{ ++// __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister); ++// %} ++// ins_pipe( fpu_regF_regF ); ++//%} ++ ++ ++//----------PEEPHOLE RULES----------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++// ++// peepmatch ( root_instr_name [preceeding_instruction]* ); ++// ++// peepconstraint %{ ++// (instruction_number.operand_name relational_op instruction_number.operand_name ++// [, ...] ); ++// // instruction numbers are zero-based using left to right order in peepmatch ++// ++// peepreplace ( instr_name ( [instruction_number.operand_name]* ) ); ++// // provide an instruction_number.operand_name for each operand that appears ++// // in the replacement instruction's match rule ++// ++// ---------VM FLAGS--------------------------------------------------------- ++// ++// All peephole optimizations can be turned off using -XX:-OptoPeephole ++// ++// Each peephole rule is given an identifying number starting with zero and ++// increasing by one in the order seen by the parser. An individual peephole ++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=# ++// on the command-line. ++// ++// ---------CURRENT LIMITATIONS---------------------------------------------- ++// ++// Only match adjacent instructions in same basic block ++// Only equality constraints ++// Only constraints between operands, not (0.dest_reg == EAX_enc) ++// Only one replacement instruction ++// ++// ---------EXAMPLE---------------------------------------------------------- ++// ++// // pertinent parts of existing instructions in architecture description ++// instruct movI(eRegI dst, eRegI src) %{ ++// match(Set dst (CopyI src)); ++// %} ++// ++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{ ++// match(Set dst (AddI dst src)); ++// effect(KILL cr); ++// %} ++// ++// // Change (inc mov) to lea ++// peephole %{ ++// // increment preceeded by register-register move ++// peepmatch ( incI_eReg movI ); ++// // require that the destination register of the increment ++// // match the destination register of the move ++// peepconstraint ( 0.dst == 1.dst ); ++// // construct a replacement instruction that sets ++// // the destination to ( move's source register + one ) ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// Implementation no longer uses movX instructions since ++// machine-independent system no longer uses CopyX nodes. ++// ++// peephole %{ ++// peepmatch ( incI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( decI_eReg movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addI_eReg_imm movI ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++// ++// peephole %{ ++// peepmatch ( addP_eReg_imm movP ); ++// peepconstraint ( 0.dst == 1.dst ); ++// peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) ); ++// %} ++ ++// // Change load of spilled value to only a spill ++// instruct storeI(memory mem, eRegI src) %{ ++// match(Set mem (StoreI mem src)); ++// %} ++// ++// instruct loadI(eRegI dst, memory mem) %{ ++// match(Set dst (LoadI mem)); ++// %} ++// ++//peephole %{ ++// peepmatch ( loadI storeI ); ++// peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem ); ++// peepreplace ( storeI( 1.mem 1.mem 1.src ) ); ++//%} ++ ++//----------SMARTSPILL RULES--------------------------------------------------- ++// These must follow all instruction definitions as they use the names ++// defined in the instructions definitions. ++ +diff --git a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp +new file mode 100644 +index 0000000000..514298bd8b +--- /dev/null ++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp +@@ -0,0 +1,1820 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "compiler/disassembler.hpp" ++#include "code/compiledIC.hpp" ++#include "memory/resourceArea.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/handles.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "utilities/ostream.hpp" ++ ++#include ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++void NativeInstruction::wrote(int offset) { ++ ICache::invalidate_word(addr_at(offset)); ++} ++ ++void NativeInstruction::set_long_at(int offset, long i) { ++ address addr = addr_at(offset); ++ *(long*)addr = i; ++ ICache::invalidate_range(addr, 8); ++} ++ ++static int illegal_instruction_bits = 0; ++ ++int NativeInstruction::illegal_instruction() { ++ if (illegal_instruction_bits == 0) { ++ ResourceMark rm; ++ char buf[40]; ++ CodeBuffer cbuf((address)&buf[0], 20); ++ MacroAssembler* a = new MacroAssembler(&cbuf); ++ address ia = a->pc(); ++ a->brk(11); ++ int bits = *(int*)ia; ++ illegal_instruction_bits = bits; ++ } ++ return illegal_instruction_bits; ++} ++ ++bool NativeInstruction::is_int_branch() { ++ switch(Assembler::opcode(insn_word())) { ++ case Assembler::beq_op: ++ case Assembler::beql_op: ++ case Assembler::bgtz_op: ++ case Assembler::bgtzl_op: ++ case Assembler::blez_op: ++ case Assembler::blezl_op: ++ case Assembler::bne_op: ++ case Assembler::bnel_op: ++ return true; ++ case Assembler::regimm_op: ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bgez_op: ++ case Assembler::bgezal_op: ++ case Assembler::bgezall_op: ++ case Assembler::bgezl_op: ++ case Assembler::bltz_op: ++ case Assembler::bltzal_op: ++ case Assembler::bltzall_op: ++ case Assembler::bltzl_op: ++ return true; ++ } ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_float_branch() { ++ if (!is_op(Assembler::cop1_op) || ++ !is_rs((Register)Assembler::bc1f_op)) return false; ++ ++ switch(Assembler::rt(insn_word())) { ++ case Assembler::bcf_op: ++ case Assembler::bcfl_op: ++ case Assembler::bct_op: ++ case Assembler::bctl_op: ++ return true; ++ } ++ ++ return false; ++} ++ ++ ++void NativeCall::verify() { ++ // make sure code pattern is actually a call instruction ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return; ++ } ++ ++ // jal targe ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ // FIXME: why add jr_op here? ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return; ++ } ++ ++ if (nativeInstruction_at(addr_at(0))->is_trampoline_call()) ++ return; ++ ++ fatal("not a call"); ++} ++ ++address NativeCall::target_addr_for_insn() const { ++ // jal target ++ // nop ++ if ( is_op(int_at(0), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ld_op) ) { ++ ++ address dest = (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop <-- optional ++ //nop <-- optional ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0))); ++ tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0))); ++ Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty); ++ tty->print_cr("======= End of decoding ======="); ++ fatal("not a call"); ++ return NULL; // unreachable ++} ++ ++// Extract call destination from a NativeCall. The call might use a trampoline stub. ++address NativeCall::destination() const { ++ address addr = (address)this; ++ address destination = target_addr_for_insn(); ++ // Do we use a trampoline stub for this call? ++ // Trampoline stubs are located behind the main code. ++ if (destination > addr) { ++ // Filter out recursive method invocation (call to verified/unverified entry point). ++ CodeBlob* cb = CodeCache::find_blob_unsafe(addr); // Else we get assertion if nmethod is zombie. ++ assert(cb && cb->is_nmethod(), "sanity"); ++ nmethod *nm = (nmethod *)cb; ++ NativeInstruction* ni = nativeInstruction_at(addr); ++ if (nm->stub_contains(destination) && ni->is_trampoline_call()) { ++ // Yes we do, so get the destination from the trampoline stub. ++ const address trampoline_stub_addr = destination; ++ destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination(); ++ } ++ } ++ return destination; ++} ++ ++// Similar to replace_mt_safe, but just changes the destination. The ++// important thing is that free-running threads are able to execute this ++// call instruction at all times. ++// ++// Used in the runtime linkage of calls; see class CompiledIC. ++// ++// Add parameter assert_lock to switch off assertion ++// during code generation, where no patching lock is needed. ++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) { ++ assert(!assert_lock || ++ (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()), ++ "concurrent code patching"); ++ ++ ResourceMark rm; ++ address addr_call = addr_at(0); ++ assert(NativeCall::is_call_at(addr_call), "unexpected code at call site"); ++ // Patch the constant in the call's trampoline stub. ++ if (MacroAssembler::reachable_from_cache()) { ++ set_destination(dest); ++ } else { ++ address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn(); ++ assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline"); ++ nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest); ++ } ++} ++ ++address NativeCall::get_trampoline() { ++ address call_addr = addr_at(0); ++ ++ CodeBlob *code = CodeCache::find_blob(call_addr); ++ assert(code != NULL, "Could not find the containing code blob"); ++ ++ if (code->is_nmethod()) { ++ return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code); ++ } ++ return NULL; ++} ++ ++// manual implementation of GSSQ ++// ++// 00000001200009c0 : ++// 1200009c0: 0085202d daddu a0, a0, a1 ++// 1200009c4: e8860027 gssq a2, a3, 0(a0) ++// 1200009c8: 03e00008 jr ra ++// 1200009cc: 00000000 nop ++// ++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64); ++ ++static int *buf; ++ ++static atomic_store128_ptr get_atomic_store128_func() { ++ assert(UseLEXT1, "UseLEXT1 must be true"); ++ static atomic_store128_ptr p = NULL; ++ if (p != NULL) ++ return p; ++ ++ buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS, ++ -1, 0); ++ buf[0] = 0x0085202d; ++ buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27; /* gssq $a2, $a3, 0($a0) */ ++ buf[2] = 0x03e00008; ++ buf[3] = 0; ++ ++ asm("sync"); ++ p = (atomic_store128_ptr)buf; ++ return p; ++} ++ ++void NativeCall::patch_on_jal_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(0, jal_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint jal_inst = (Assembler::jal_op << 26) | dest; ++ set_int_at(16, jal_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jal(address dst) { ++ patch_on_jal_gs(dst); ++} ++ ++void NativeCall::patch_on_trampoline(address dest) { ++ assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site"); ++ jlong dst = (jlong) dest; ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ld dst, dst, imm16 ++ if ((dst> 0) && Assembler::is_simm16(dst >> 32)) { ++ dst += (dst & 0x8000) << 1; ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff)); ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeCall::patch_on_jalr_gs(address dst) { ++ patch_set48_gs(dst); ++} ++ ++void NativeCall::patch_on_jalr(address dst) { ++ patch_set48(dst); ++} ++ ++void NativeCall::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ int count = 0; ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[2] = {0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 2) { ++ //nop(); ++ //set_int_at(count << 2, 0); ++ insts[count] = 0; ++ count++; ++ } ++ ++ long inst = insts[1]; ++ inst = inst << 32; ++ inst = inst + insts[0]; ++ ++ set_long_at(0, inst); ++} ++ ++void NativeCall::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ //ori(d, R0, julong(value) >> 16); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ //lui(d, value >> 32); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ //ori(d, d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ //dsll(d, d, 16); ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ //nop(); ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeCall::patch_set32(address dest) { ++ patch_set32_gs(dest); ++} ++ ++void NativeCall::set_destination(address dest) { ++ OrderAccess::fence(); ++ ++ // li64 ++ if (is_special_op(int_at(16), Assembler::dsll_op)) { ++ int first_word = int_at(0); ++ set_int_at(0, 0x1000ffff); /* .1: b .1 */ ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff)); ++ set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff)); ++ ICache::invalidate_range(addr_at(0), 24); ++ } else if (is_op(int_at(16), Assembler::jal_op)) { ++ if (UseLEXT1) { ++ patch_on_jal_gs(dest); ++ } else { ++ patch_on_jal(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::jal_op)) { ++ patch_on_jal_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jalr_op)) { ++ if (UseLEXT1) { ++ patch_on_jalr_gs(dest); ++ } else { ++ patch_on_jalr(dest); ++ } ++ } else if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8"); ++ if (UseLEXT1) { ++ patch_set32_gs(dest); ++ } else { ++ patch_set32(dest); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++ } else { ++ fatal("not a call"); ++ } ++} ++ ++void NativeCall::print() { ++ tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT, ++ p2i(instruction_address()), p2i(destination())); ++} ++ ++// Inserts a native call instruction at a given pc ++void NativeCall::insert(address code_pos, address entry) { ++ NativeCall *call = nativeCall_at(code_pos); ++ CodeBuffer cb(call->addr_at(0), instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ __ li48(T9, (long)entry); ++ __ jalr (); ++ __ delayed()->nop(); ++#undef __ ++ ++ ICache::invalidate_range(call->addr_at(0), instruction_size); ++} ++ ++// MT-safe patching of a call instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) { ++ Unimplemented(); ++} ++ ++//------------------------------------------------------------------- ++ ++void NativeMovConstReg::verify() { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ return; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ return; ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++} ++ ++void NativeMovConstReg::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT, ++ p2i(instruction_address()), data()); ++} ++ ++intptr_t NativeMovConstReg::data() const { ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a mov reg, imm64/imm48"); ++ return 0; // unreachable ++} ++ ++void NativeMovConstReg::patch_set48(intptr_t x) { ++ jlong value = (jlong) x; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ //daddiu(d, R0, value); ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ //lui(d, split_low(value >> 16)); ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ //ori(d, d, split_low(value)); ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("value = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++} ++ ++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) { ++ // li64 or li48 ++ if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff)); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff)); ++ set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff)); ++ } else { ++ patch_set48(x); ++ } ++ ++ ICache::invalidate_range(addr_at(0), 24); ++ ++ // Find and replace the oop/metadata corresponding to this ++ // instruction in oops section. ++ CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address()); ++ nmethod* nm = blob->as_nmethod_or_null(); ++ if (nm != NULL) { ++ o = o ? o : x; ++ RelocIterator iter(nm, instruction_address(), next_instruction_address()); ++ while (iter.next()) { ++ if (iter.type() == relocInfo::oop_type) { ++ oop* oop_addr = iter.oop_reloc()->oop_addr(); ++ *oop_addr = cast_to_oop(o); ++ break; ++ } else if (iter.type() == relocInfo::metadata_type) { ++ Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr(); ++ *metadata_addr = (Metadata*)o; ++ break; ++ } ++ } ++ } ++} ++ ++//------------------------------------------------------------------- ++ ++int NativeMovRegMem::offset() const{ ++ if (is_immediate()) ++ return (short)(int_at(instruction_offset)&0xffff); ++ else ++ return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff); ++} ++ ++void NativeMovRegMem::set_offset(int x) { ++ if (is_immediate()) { ++ assert(Assembler::is_simm16(x), "just check"); ++ set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) ); ++ if (is_64ldst()) { ++ assert(Assembler::is_simm16(x+4), "just check"); ++ set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) ); ++ } ++ } else { ++ set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff)); ++ set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff)); ++ } ++ ICache::invalidate_range(addr_at(0), 8); ++} ++ ++void NativeMovRegMem::verify() { ++ int offset = 0; ++ ++ if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) { ++ ++ if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) { ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++ ++ offset += 12; ++ } ++ ++ switch(Assembler::opcode(int_at(offset))) { ++ case Assembler::lb_op: ++ case Assembler::lbu_op: ++ case Assembler::lh_op: ++ case Assembler::lhu_op: ++ case Assembler::lw_op: ++ case Assembler::lwu_op: ++ case Assembler::ld_op: ++ case Assembler::lwc1_op: ++ case Assembler::ldc1_op: ++ case Assembler::sb_op: ++ case Assembler::sh_op: ++ case Assembler::sw_op: ++ case Assembler::sd_op: ++ case Assembler::swc1_op: ++ case Assembler::sdc1_op: ++ break; ++ default: ++ fatal ("not a mov [reg+offs], reg instruction"); ++ } ++} ++ ++ ++void NativeMovRegMem::print() { ++ tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset()); ++} ++ ++bool NativeInstruction::is_sigill_zombie_not_entrant() { ++ return uint_at(0) == NativeIllegalInstruction::instruction_code; ++} ++ ++void NativeIllegalInstruction::insert(address code_pos) { ++ *(juint*)code_pos = instruction_code; ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++void NativeJump::verify() { ++ assert(((NativeInstruction *)this)->is_jump() || ++ ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction"); ++} ++ ++void NativeJump::patch_set48_gs(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ ++ if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9 ++ ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ int insts[4] = {0, 0, 0, 0}; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16); ++ count += 1; ++ insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6); ++ count += 1; ++ insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ insts[count] = 0; ++ count++; ++ } ++ ++ guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned"); ++ atomic_store128_ptr func = get_atomic_store128_func(); ++ (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]); ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_set48(address dest) { ++ jlong value = (jlong) dest; ++ int rt_reg = (int_at(0) & (0x1f << 16)); ++ int rs_reg = rt_reg << 5; ++ int rd_reg = rt_reg >> 5; ++ ++ int hi = (int)(value >> 32); ++ int lo = (int)(value & ~0); ++ ++ int count = 0; ++ ++ if (value == lo) { // 32-bit integer ++ if (Assembler::is_simm16(value)) { ++ set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } ++ } else if (hi == 0) { // hardware zero-extends to upper 32 ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ if (Assembler::split_low(value)) { ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } ++ } else if ((value> 0) && Assembler::is_simm16(value >> 32)) { ++ set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6)); ++ count += 1; ++ set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value)); ++ count += 1; ++ } else { ++ tty->print_cr("dest = 0x%lx", value); ++ guarantee(false, "Not supported yet !"); ++ } ++ ++ while (count < 4) { ++ set_int_at(count << 2, 0); ++ count++; ++ } ++ ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_j_only(address dst) { ++ long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(0, j_inst); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++ ++void NativeJump::patch_on_j_gs(address dst) { ++ long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2; ++ if ((dest >= 0) && (dest < (1<<26))) { ++ jint j_inst = (Assembler::j_op << 26) | dest; ++ set_int_at(16, j_inst); ++ ICache::invalidate_range(addr_at(16), 4); ++ } else { ++ ShouldNotReachHere(); ++ } ++} ++ ++void NativeJump::patch_on_j(address dst) { ++ patch_on_j_gs(dst); ++} ++ ++void NativeJump::patch_on_jr_gs(address dst) { ++ patch_set48_gs(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++void NativeJump::patch_on_jr(address dst) { ++ patch_set48(dst); ++ ICache::invalidate_range(addr_at(0), 16); ++} ++ ++ ++void NativeJump::set_jump_destination(address dest) { ++ OrderAccess::fence(); ++ ++ if (is_short()) { ++ assert(Assembler::is_simm16(dest-addr_at(4)), "change this code"); ++ set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff ); ++ ICache::invalidate_range(addr_at(0), 4); ++ } else if (is_b_far()) { ++ int offset = dest - addr_at(12); ++ set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16)); ++ set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff)); ++ } else { ++ if (is_op(int_at(16), Assembler::j_op)) { ++ if (UseLEXT1) { ++ patch_on_j_gs(dest); ++ } else { ++ patch_on_j(dest); ++ } ++ } else if (is_op(int_at(0), Assembler::j_op)) { ++ patch_on_j_only(dest); ++ } else if (is_special_op(int_at(16), Assembler::jr_op)) { ++ if (UseLEXT1) { ++ //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD"); ++ //patch_on_jr_gs(dest); ++ patch_on_jr(dest); ++ } else { ++ patch_on_jr(dest); ++ } ++ } else { ++ fatal("not a jump"); ++ } ++ } ++} ++ ++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) { ++ CodeBuffer cb(code_pos, instruction_size); ++ MacroAssembler masm(&cb); ++#define __ masm. ++ if (Assembler::is_simm16((entry - code_pos - 4) / 4)) { ++ __ b(entry); ++ __ delayed()->nop(); ++ } else { ++ // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here. ++ int offset = entry - code_pos; ++ ++ Label L; ++ __ bgezal(R0, L); ++ __ delayed()->lui(T9, (offset - 8) >> 16); ++ __ bind(L); ++ __ ori(T9, T9, (offset - 8) & 0xffff); ++ __ daddu(T9, T9, RA); ++ __ jr(T9); ++ __ delayed()->nop(); ++ } ++ ++#undef __ ++ ++ ICache::invalidate_range(code_pos, instruction_size); ++} ++ ++bool NativeJump::is_b_far() { ++// ++// 0x000000556809f198: daddu at, ra, zero ++// 0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4 ++// ++// 0x000000556809f1a0: nop ++// 0x000000556809f1a4: lui t9, 0xfffffffd ++// 0x000000556809f1a8: ori t9, t9, 0x14dc ++// 0x000000556809f1ac: daddu t9, t9, ra ++// 0x000000556809f1b0: daddu ra, at, zero ++// 0x000000556809f1b4: jr t9 ++// 0x000000556809f1b8: nop ++// ;; ImplicitNullCheckStub slow case ++// 0x000000556809f1bc: lui t9, 0x55 ++// ++ return is_op(int_at(12), Assembler::lui_op); ++} ++ ++address NativeJump::jump_destination() { ++ if ( is_short() ) { ++ return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4; ++ } ++ // Assembler::merge() is not correct in MIPS_64! ++ // ++ // Example: ++ // hi16 = 0xfffd, ++ // lo16 = f7a4, ++ // ++ // offset=0xfffdf7a4 (Right) ++ // Assembler::merge = 0xfffcf7a4 (Wrong) ++ // ++ if ( is_b_far() ) { ++ int hi16 = int_at(12)&0xffff; ++ int low16 = int_at(16)&0xffff; ++ address target = addr_at(12) + (hi16 << 16) + low16; ++ return target; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_op(int_at(16), Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop()) { ++ int instr_index = int_at(16) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // j target ++ // nop ++ if ( is_op(int_at(0), Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop()) { ++ int instr_index = int_at(0) & 0x3ffffff; ++ intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000; ++ intptr_t target = target_high | (instr_index << 2); ++ return (address)target; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff), ++ (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff)); ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) ) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff), ++ (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop()) { ++ ++ return (address)Assembler::merge( (intptr_t)(0), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff), ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() ) { ++ ++ int sign = int_at(0) & 0x8000; ++ if (sign == 0) { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)0, ++ (intptr_t)0); ++ } else { ++ return (address)Assembler::merge( (intptr_t)0, ++ (intptr_t)(int_at(0) & 0xffff), ++ (intptr_t)(0xffff), ++ (intptr_t)(0xffff)); ++ } ++ } ++ ++ fatal("not a jump"); ++ return NULL; // unreachable ++} ++ ++// MT-safe patching of a long jump instruction. ++// First patches first word of instruction to two jmp's that jmps to them ++// selfs (spinlock). Then patches the last byte, and then atomicly replaces ++// the jmp's with the first 4 byte of the new instruction. ++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) { ++ NativeGeneralJump* h_jump = nativeGeneralJump_at (instr_addr); ++ assert((int)instruction_size == (int)NativeCall::instruction_size, ++ "note::Runtime1::patch_code uses NativeCall::instruction_size"); ++ ++ // ensure 100% atomicity ++ guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD"); ++ ++ int *p = (int *)instr_addr; ++ int jr_word = p[4]; ++ ++ p[4] = 0x1000fffb; /* .1: --; --; --; --; b .1; nop */ ++ memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8); ++ *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16); ++} ++ ++// Must ensure atomicity ++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) { ++ assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch"); ++ assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump"); ++ ++ if (MacroAssembler::reachable_from_cache(dest)) { ++ CodeBuffer cb(verified_entry, 1 * BytesPerInstWord); ++ MacroAssembler masm(&cb); ++ masm.j(dest); ++ } else { ++ // We use an illegal instruction for marking a method as ++ // not_entrant or zombie ++ NativeIllegalInstruction::insert(verified_entry); ++ } ++ ++ ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord); ++} ++ ++bool NativeInstruction::is_jump() ++{ ++ if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode) ++ return true; ++ if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far ++ return true; ++ if (is_op(int_at(12), Assembler::lui_op)) // original b_far ++ return true; ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // j target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // lui rd, imm(63...48); ++ // ori rd, rd, imm(47...32); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(31...16); ++ // dsll rd, rd, 16; ++ // ori rd, rd, imm(15...0); ++ // jr rd ++ // nop ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if (is_op(int_at(0), Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jr_op)) { ++ return true; ++ } ++ ++ return false; ++} ++ ++bool NativeInstruction::is_dtrace_trap() { ++ //return (*(int32_t*)this & 0xff) == 0xcc; ++ Unimplemented(); ++ return false; ++} ++ ++bool NativeInstruction::is_safepoint_poll() { ++ // ++ // 390 li T2, 0x0000000000400000 #@loadConP ++ // 394 sw [SP + #12], V1 # spill 9 ++ // 398 Safepoint @ [T2] : poll for GC @ safePoint_poll # spec.benchmarks.compress.Decompressor::decompress @ bci:224 L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1 ++ // ++ // 0x000000ffe5815130: lui t2, 0x40 ++ // 0x000000ffe5815134: sw v1, 0xc(sp) ; OopMap{a6=Oop off=920} ++ // ;*goto ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ // 0x000000ffe5815138: lw at, 0x0(t2) ;*goto <--- PC ++ // ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584) ++ // ++ ++ // Since there may be some spill instructions between the safePoint_poll and loadConP, ++ // we check the safepoint instruction like the this. ++ return is_op(Assembler::lw_op) && is_rt(AT); ++} +diff --git a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp +new file mode 100644 +index 0000000000..fb4f99c9c6 +--- /dev/null ++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp +@@ -0,0 +1,734 @@ ++/* ++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP ++ ++#include "asm/assembler.hpp" ++#include "asm/macroAssembler.hpp" ++#include "runtime/icache.hpp" ++#include "runtime/os.hpp" ++#include "runtime/safepointMechanism.hpp" ++ ++// We have interfaces for the following instructions: ++// - NativeInstruction ++// - - NativeCall ++// - - NativeMovConstReg ++// - - NativeMovConstRegPatching ++// - - NativeMovRegMem ++// - - NativeMovRegMemPatching ++// - - NativeJump ++// - - NativeIllegalOpCode ++// - - NativeGeneralJump ++// - - NativeReturn ++// - - NativeReturnX (return with argument) ++// - - NativePushConst ++// - - NativeTstRegMem ++ ++// The base class for different kinds of native instruction abstractions. ++// Provides the primitive operations to manipulate code relative to this. ++ ++class NativeInstruction { ++ friend class Relocation; ++ ++ public: ++ enum mips_specific_constants { ++ nop_instruction_code = 0, ++ nop_instruction_size = 4, ++ sync_instruction_code = 0xf ++ }; ++ ++ bool is_nop() { return long_at(0) == nop_instruction_code; } ++ bool is_sync() { return long_at(0) == sync_instruction_code; } ++ bool is_dtrace_trap(); ++ inline bool is_call(); ++ inline bool is_illegal(); ++ inline bool is_return(); ++ bool is_jump(); ++ inline bool is_cond_jump(); ++ bool is_safepoint_poll(); ++ ++ //mips has no instruction to generate a illegal instrucion exception ++ //we define ours: break 11 ++ static int illegal_instruction(); ++ ++ bool is_int_branch(); ++ bool is_float_branch(); ++ ++ inline bool is_trampoline_call(); ++ ++ //We use an illegal instruction for marking a method as not_entrant or zombie. ++ bool is_sigill_zombie_not_entrant(); ++ ++ protected: ++ address addr_at(int offset) const { return address(this) + offset; } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(BytesPerInstWord); } ++ address prev_instruction_address() const { return addr_at(-BytesPerInstWord); } ++ ++ s_char sbyte_at(int offset) const { return *(s_char*) addr_at(offset); } ++ u_char ubyte_at(int offset) const { return *(u_char*) addr_at(offset); } ++ ++ jint int_at(int offset) const { return *(jint*) addr_at(offset); } ++ juint uint_at(int offset) const { return *(juint*) addr_at(offset); } ++ ++ intptr_t ptr_at(int offset) const { return *(intptr_t*) addr_at(offset); } ++ ++ oop oop_at (int offset) const { return *(oop*) addr_at(offset); } ++ int long_at(int offset) const { return *(jint*)addr_at(offset); } ++ ++ ++ void set_char_at(int offset, char c) { *addr_at(offset) = (u_char)c; wrote(offset); } ++ void set_int_at(int offset, jint i) { *(jint*)addr_at(offset) = i; wrote(offset); } ++ void set_ptr_at (int offset, intptr_t ptr) { *(intptr_t*) addr_at(offset) = ptr; wrote(offset); } ++ void set_oop_at (int offset, oop o) { *(oop*) addr_at(offset) = o; wrote(offset); } ++ void set_long_at(int offset, long i); ++ ++ int insn_word() const { return long_at(0); } ++ static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; } ++ bool is_op (Assembler::ops op) const { return is_op(insn_word(), op); } ++ bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); } ++ bool is_rs (Register rs) const { return is_rs(insn_word(), rs); } ++ bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); } ++ bool is_rt (Register rt) const { return is_rt(insn_word(), rt); } ++ ++ static bool is_special_op (int insn, Assembler::special_ops op) { ++ return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op; ++ } ++ bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); } ++ ++ void wrote(int offset); ++ ++ public: ++ ++ // unit test stuff ++ static void test() {} // override for testing ++ ++ inline friend NativeInstruction* nativeInstruction_at(address address); ++}; ++ ++inline NativeInstruction* nativeInstruction_at(address address) { ++ NativeInstruction* inst = (NativeInstruction*)address; ++#ifdef ASSERT ++ //inst->verify(); ++#endif ++ return inst; ++} ++ ++inline NativeCall* nativeCall_at(address address); ++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64 ++// instructions (used to manipulate inline caches, primitive & dll calls, etc.). ++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this: ++// 32 bits: ++// lui rt, imm16 ++// addiu rt, rt, imm16 ++// jalr rt ++// nop ++// ++// 64 bits: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++ ++// we just consider the above for instruction as one call instruction ++class NativeCall: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 6 * BytesPerInstWord, ++ return_address_offset_short = 4 * BytesPerInstWord, ++ return_address_offset_long = 6 * BytesPerInstWord, ++ displacement_offset = 0 ++ }; ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ ++ address next_instruction_address() const { ++ if (is_special_op(int_at(8), Assembler::jalr_op)) { ++ return addr_at(return_address_offset_short); ++ } else { ++ return addr_at(return_address_offset_long); ++ } ++ } ++ ++ address return_address() const { ++ return next_instruction_address(); ++ } ++ ++ address target_addr_for_insn() const; ++ address destination() const; ++ void set_destination(address dest); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jalr_gs(address dest); ++ void patch_on_jalr(address dest); ++ ++ void patch_on_jal_gs(address dest); ++ void patch_on_jal(address dest); ++ ++ void patch_on_trampoline(address dest); ++ ++ void patch_on_jal_only(address dest); ++ ++ void patch_set32_gs(address dest); ++ void patch_set32(address dest); ++ ++ void verify_alignment() { } ++ void verify(); ++ void print(); ++ ++ // Creation ++ inline friend NativeCall* nativeCall_at(address address); ++ inline friend NativeCall* nativeCall_before(address return_address); ++ ++ static bool is_call_at(address instr) { ++ return nativeInstruction_at(instr)->is_call(); ++ } ++ ++ static bool is_call_before(address return_address) { ++ return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long); ++ } ++ ++ static bool is_call_to(address instr, address target) { ++ return nativeInstruction_at(instr)->is_call() && ++nativeCall_at(instr)->destination() == target; ++ } ++ ++ // MT-safe patching of a call instruction. ++ static void insert(address code_pos, address entry); ++ ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++ ++ // Similar to replace_mt_safe, but just changes the destination. The ++ // important thing is that free-running threads are able to execute ++ // this call instruction at all times. If the call is an immediate jal ++ // instruction we can simply rely on atomicity of 32-bit writes to ++ // make sure other threads will see no intermediate states. ++ ++ // We cannot rely on locks here, since the free-running threads must run at ++ // full speed. ++ // ++ // Used in the runtime linkage of calls; see class CompiledIC. ++ ++ // The parameter assert_lock disables the assertion during code generation. ++ void set_destination_mt_safe(address dest, bool assert_lock = true); ++ ++ address get_trampoline(); ++}; ++ ++inline NativeCall* nativeCall_at(address address) { ++ NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset); ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++inline NativeCall* nativeCall_before(address return_address) { ++ NativeCall* call = NULL; ++ if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_long); ++ } else { ++ call = (NativeCall*)(return_address - NativeCall::return_address_offset_short); ++ } ++#ifdef ASSERT ++ call->verify(); ++#endif ++ return call; ++} ++ ++class NativeMovConstReg: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ instruction_size = 4 * BytesPerInstWord, ++ next_instruction_offset = 4 * BytesPerInstWord, ++ }; ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ intptr_t data() const; ++ void set_data(intptr_t x, intptr_t o = 0); ++ ++ void patch_set48(intptr_t x); ++ ++ void verify(); ++ void print(); ++ ++ // unit test stuff ++ static void test() {} ++ ++ // Creation ++ inline friend NativeMovConstReg* nativeMovConstReg_at(address address); ++ inline friend NativeMovConstReg* nativeMovConstReg_before(address address); ++}; ++ ++inline NativeMovConstReg* nativeMovConstReg_at(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++inline NativeMovConstReg* nativeMovConstReg_before(address address) { ++ NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovConstRegPatching: public NativeMovConstReg { ++ private: ++ friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) { ++ NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++// An interface for accessing/manipulating native moves of the form: ++// lui AT, split_high(offset) ++// addiu AT, split_low(offset) ++// addu reg, reg, AT ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0 ++// [lw/sw/lwc1/swc1 dest, reg, 4] ++// or ++// lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset ++// [lw/sw/lwc1/swc1 dest, reg, offset+4] ++// ++// Warning: These routines must be able to handle any instruction sequences ++// that are generated as a result of the load/store byte,word,long ++// macros. ++ ++class NativeMovRegMem: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ hiword_offset = 4, ++ ldst_offset = 12, ++ immediate_size = 4, ++ ldst_size = 16 ++ }; ++ ++ //offset is less than 16 bits. ++ bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); } ++ bool is_64ldst() const { ++ if (is_immediate()) { ++ return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) && ++ (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize); ++ } else { ++ return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) && ++ (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize); ++ } ++ } ++ ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address next_instruction_address() const { ++ return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0)); ++ } ++ ++ int offset() const; ++ ++ void set_offset(int x); ++ ++ void add_offset_in_bytes(int add_offset) { set_offset ( ( offset() + add_offset ) ); } ++ ++ void verify(); ++ void print (); ++ ++ // unit test stuff ++ static void test() {} ++ ++ private: ++ inline friend NativeMovRegMem* nativeMovRegMem_at (address address); ++}; ++ ++inline NativeMovRegMem* nativeMovRegMem_at (address address) { ++ NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset); ++#ifdef ASSERT ++ test->verify(); ++#endif ++ return test; ++} ++ ++class NativeMovRegMemPatching: public NativeMovRegMem { ++ private: ++ friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) { ++ NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset); ++ #ifdef ASSERT ++ test->verify(); ++ #endif ++ return test; ++ } ++}; ++ ++ ++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional ++// 32 bits: ++// far jump: ++// lui reg, split_high(addr) ++// addiu reg, split_low(addr) ++// jr reg ++// nop ++// or ++// beq ZERO, ZERO, offset ++// nop ++// ++ ++//64 bits: ++// far jump: ++// lui rd, imm(63...48); ++// ori rd, rd, imm(47...32); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(31...16); ++// dsll rd, rd, 16; ++// ori rd, rd, imm(15...0); ++// jalr rd ++// nop ++// ++class NativeJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_offset = 0, ++ beq_opcode = 0x10000000,//000100|00000|00000|offset ++ b_mask = 0xffff0000, ++ short_size = 8, ++ instruction_size = 6 * BytesPerInstWord ++ }; ++ ++ bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; } ++ bool is_b_far(); ++ address instruction_address() const { return addr_at(instruction_offset); } ++ address jump_destination(); ++ ++ void patch_set48_gs(address dest); ++ void patch_set48(address dest); ++ ++ void patch_on_jr_gs(address dest); ++ void patch_on_jr(address dest); ++ ++ void patch_on_j_gs(address dest); ++ void patch_on_j(address dest); ++ ++ void patch_on_j_only(address dest); ++ ++ void set_jump_destination(address dest); ++ ++ // Creation ++ inline friend NativeJump* nativeJump_at(address address); ++ ++ // Insertion of native jump instruction ++ static void insert(address code_pos, address entry) { Unimplemented(); } ++ // MT-safe insertion of native jump at verified method entry ++ static void check_verified_entry_alignment(address entry, address verified_entry) {} ++ static void patch_verified_entry(address entry, address verified_entry, address dest); ++ ++ void verify(); ++}; ++ ++inline NativeJump* nativeJump_at(address address) { ++ NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeGeneralJump: public NativeJump { ++ public: ++ // Creation ++ inline friend NativeGeneralJump* nativeGeneralJump_at(address address); ++ ++ // Insertion of native general jump instruction ++ static void insert_unconditional(address code_pos, address entry); ++ static void replace_mt_safe(address instr_addr, address code_buffer); ++}; ++ ++inline NativeGeneralJump* nativeGeneralJump_at(address address) { ++ NativeGeneralJump* jump = (NativeGeneralJump*)(address); ++ debug_only(jump->verify();) ++ return jump; ++} ++ ++class NativeIllegalInstruction: public NativeInstruction { ++public: ++ enum mips_specific_constants { ++ instruction_code = 0x42000029, // mips reserved instruction ++ instruction_size = 4, ++ instruction_offset = 0, ++ next_instruction_offset = 4 ++ }; ++ ++ // Insert illegal opcode as specific address ++ static void insert(address code_pos); ++}; ++ ++// return instruction that does not pop values of the stack ++// jr RA ++// delay slot ++class NativeReturn: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 8, ++ instruction_offset = 0, ++ next_instruction_offset = 8 ++ }; ++}; ++ ++ ++ ++ ++class NativeCondJump; ++inline NativeCondJump* nativeCondJump_at(address address); ++class NativeCondJump: public NativeInstruction { ++ public: ++ enum mips_specific_constants { ++ instruction_size = 16, ++ instruction_offset = 12, ++ next_instruction_offset = 20 ++ }; ++ ++ ++ int insn_word() const { return long_at(instruction_offset); } ++ address instruction_address() const { return addr_at(0); } ++ address next_instruction_address() const { return addr_at(next_instruction_offset); } ++ ++ // Creation ++ inline friend NativeCondJump* nativeCondJump_at(address address); ++ ++ address jump_destination() const { ++ return ::nativeCondJump_at(addr_at(12))->jump_destination(); ++ } ++ ++ void set_jump_destination(address dest) { ++ ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest); ++ } ++ ++}; ++ ++inline NativeCondJump* nativeCondJump_at(address address) { ++ NativeCondJump* jump = (NativeCondJump*)(address); ++ return jump; ++} ++ ++ ++ ++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); } ++ ++inline bool NativeInstruction::is_call() { ++ // jal target ++ // nop ++ if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() ) { ++ return true; ++ } ++ ++ // nop ++ // nop ++ // nop ++ // nop ++ // jal target ++ // nop ++ if ( is_nop() && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ // li64 ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::dsll_op) && ++ is_op(int_at(20), Assembler::ori_op) && ++ is_special_op(int_at(24), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op (int_at(12), Assembler::ori_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //ori dst, dst, imm16 ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ is_op (int_at(8), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //ori dst, R0, imm16 ++ //dsll dst, dst, 16 ++ //nop ++ //nop ++ if ( is_op(Assembler::ori_op) && ++ is_special_op(int_at(4), Assembler::dsll_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ //nop ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ nativeInstruction_at(addr_at(8))->is_nop() && ++ nativeInstruction_at(addr_at(12))->is_nop() && ++ is_special_op(int_at(16), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ ++ //daddiu dst, R0, imm16 ++ //nop ++ if ( is_op(Assembler::daddiu_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //ori dst, dst, imm16 ++ if ( is_op(Assembler::lui_op) && ++ is_op (int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ //lui dst, imm16 ++ //nop ++ if ( is_op(Assembler::lui_op) && ++ nativeInstruction_at(addr_at(4))->is_nop() && ++ is_special_op(int_at(8), Assembler::jalr_op) ) { ++ return true; ++ } ++ ++ if(is_trampoline_call()) ++ return true; ++ ++ return false; ++ ++} ++ ++inline bool NativeInstruction::is_return() { return is_special_op(Assembler::jr_op) && is_rs(RA);} ++ ++inline bool NativeInstruction::is_cond_jump() { return is_int_branch() || is_float_branch(); } ++ ++// Call trampoline stubs. ++class NativeCallTrampolineStub : public NativeInstruction { ++ public: ++ ++ enum mips_specific_constants { ++ instruction_size = 2 * BytesPerInstWord, ++ instruction_offset = 0, ++ next_instruction_offset = 2 * BytesPerInstWord ++ }; ++ ++ address destination() const { ++ return (address)ptr_at(0); ++ } ++ ++ void set_destination(address new_destination) { ++ set_ptr_at(0, (intptr_t)new_destination); ++ } ++}; ++ ++inline bool NativeInstruction::is_trampoline_call() { ++ // lui dst, imm16 ++ // ori dst, dst, imm16 ++ // dsll dst, dst, 16 ++ // ld target, dst, imm16 ++ // jalr target ++ // nop ++ if ( is_op(Assembler::lui_op) && ++ is_op(int_at(4), Assembler::ori_op) && ++ is_special_op(int_at(8), Assembler::dsll_op) && ++ is_op(int_at(12), Assembler::ld_op) && ++ is_special_op(int_at(16), Assembler::jalr_op) && ++ nativeInstruction_at(addr_at(20))->is_nop() ) { ++ return true; ++ } ++ ++ return false; ++} ++ ++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) { ++ return (NativeCallTrampolineStub*)addr; ++} ++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp +new file mode 100644 +index 0000000000..7f800eb107 +--- /dev/null ++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp +@@ -0,0 +1,47 @@ ++/* ++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP ++ ++// machine-dependent implemention for register maps ++ friend class frame; ++ ++ private: ++#ifndef CORE ++ // This is the hook for finding a register in an "well-known" location, ++ // such as a register block of a predetermined format. ++ // Since there is none, we just return NULL. ++ // See registerMap_sparc.hpp for an example of grabbing registers ++ // from register save areas of a standard layout. ++ address pd_location(VMReg reg) const {return NULL;} ++#endif ++ ++ // no PD state to clear or copy: ++ void pd_clear() {} ++ void pd_initialize() {} ++ void pd_initialize_from(const RegisterMap* map) {} ++ ++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp +new file mode 100644 +index 0000000000..4af2531834 +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp +@@ -0,0 +1,103 @@ ++/* ++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "asm/register.hpp" ++#include "register_mips.hpp" ++#ifdef TARGET_ARCH_MODEL_mips_32 ++# include "interp_masm_mips_32.hpp" ++#endif ++#ifdef TARGET_ARCH_MODEL_mips_64 ++# include "interp_masm_mips_64.hpp" ++#endif ++ ++REGISTER_DEFINITION(Register, noreg); ++REGISTER_DEFINITION(Register, i0); ++REGISTER_DEFINITION(Register, i1); ++REGISTER_DEFINITION(Register, i2); ++REGISTER_DEFINITION(Register, i3); ++REGISTER_DEFINITION(Register, i4); ++REGISTER_DEFINITION(Register, i5); ++REGISTER_DEFINITION(Register, i6); ++REGISTER_DEFINITION(Register, i7); ++REGISTER_DEFINITION(Register, i8); ++REGISTER_DEFINITION(Register, i9); ++REGISTER_DEFINITION(Register, i10); ++REGISTER_DEFINITION(Register, i11); ++REGISTER_DEFINITION(Register, i12); ++REGISTER_DEFINITION(Register, i13); ++REGISTER_DEFINITION(Register, i14); ++REGISTER_DEFINITION(Register, i15); ++REGISTER_DEFINITION(Register, i16); ++REGISTER_DEFINITION(Register, i17); ++REGISTER_DEFINITION(Register, i18); ++REGISTER_DEFINITION(Register, i19); ++REGISTER_DEFINITION(Register, i20); ++REGISTER_DEFINITION(Register, i21); ++REGISTER_DEFINITION(Register, i22); ++REGISTER_DEFINITION(Register, i23); ++REGISTER_DEFINITION(Register, i24); ++REGISTER_DEFINITION(Register, i25); ++REGISTER_DEFINITION(Register, i26); ++REGISTER_DEFINITION(Register, i27); ++REGISTER_DEFINITION(Register, i28); ++REGISTER_DEFINITION(Register, i29); ++REGISTER_DEFINITION(Register, i30); ++REGISTER_DEFINITION(Register, i31); ++ ++REGISTER_DEFINITION(FloatRegister, fnoreg); ++REGISTER_DEFINITION(FloatRegister, f0); ++REGISTER_DEFINITION(FloatRegister, f1); ++REGISTER_DEFINITION(FloatRegister, f2); ++REGISTER_DEFINITION(FloatRegister, f3); ++REGISTER_DEFINITION(FloatRegister, f4); ++REGISTER_DEFINITION(FloatRegister, f5); ++REGISTER_DEFINITION(FloatRegister, f6); ++REGISTER_DEFINITION(FloatRegister, f7); ++REGISTER_DEFINITION(FloatRegister, f8); ++REGISTER_DEFINITION(FloatRegister, f9); ++REGISTER_DEFINITION(FloatRegister, f10); ++REGISTER_DEFINITION(FloatRegister, f11); ++REGISTER_DEFINITION(FloatRegister, f12); ++REGISTER_DEFINITION(FloatRegister, f13); ++REGISTER_DEFINITION(FloatRegister, f14); ++REGISTER_DEFINITION(FloatRegister, f15); ++REGISTER_DEFINITION(FloatRegister, f16); ++REGISTER_DEFINITION(FloatRegister, f17); ++REGISTER_DEFINITION(FloatRegister, f18); ++REGISTER_DEFINITION(FloatRegister, f19); ++REGISTER_DEFINITION(FloatRegister, f20); ++REGISTER_DEFINITION(FloatRegister, f21); ++REGISTER_DEFINITION(FloatRegister, f22); ++REGISTER_DEFINITION(FloatRegister, f23); ++REGISTER_DEFINITION(FloatRegister, f24); ++REGISTER_DEFINITION(FloatRegister, f25); ++REGISTER_DEFINITION(FloatRegister, f26); ++REGISTER_DEFINITION(FloatRegister, f27); ++REGISTER_DEFINITION(FloatRegister, f28); ++REGISTER_DEFINITION(FloatRegister, f29); ++REGISTER_DEFINITION(FloatRegister, f30); ++REGISTER_DEFINITION(FloatRegister, f31); +diff --git a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp +new file mode 100644 +index 0000000000..4a9b22bfef +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_mips.cpp +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "register_mips.hpp" ++ ++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1; ++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr + ++ 2 * FloatRegisterImpl::number_of_registers; ++ ++const char* RegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3", ++ "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3", ++ "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7", ++ "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra" ++ }; ++ return is_valid() ? names[encoding()] : "noreg"; ++} ++ ++const char* FloatRegisterImpl::name() const { ++ const char* names[number_of_registers] = { ++ "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7", ++ "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15", ++ "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23", ++ "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31", ++ }; ++ return is_valid() ? names[encoding()] : "fnoreg"; ++} ++ +diff --git a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp +new file mode 100644 +index 0000000000..ea216fbcb9 +--- /dev/null ++++ b/src/hotspot/cpu/mips/register_mips.hpp +@@ -0,0 +1,341 @@ ++/* ++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP ++#define CPU_MIPS_VM_REGISTER_MIPS_HPP ++ ++#include "asm/register.hpp" ++#include "utilities/formatBuffer.hpp" ++ ++class VMRegImpl; ++typedef VMRegImpl* VMReg; ++ ++// Use Register as shortcut ++class RegisterImpl; ++typedef RegisterImpl* Register; ++ ++inline Register as_Register(int encoding) { ++ return (Register)(intptr_t) encoding; ++} ++ ++class RegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ number_of_registers = 32 ++ }; ++ ++ // derived registers, offsets, and addresses ++ Register successor() const { return as_Register(encoding() + 1); } ++ ++ // construction ++ inline friend Register as_Register(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++}; ++ ++ ++// The integer registers of the MIPS32 architecture ++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1)); ++ ++ ++CONSTANT_REGISTER_DECLARATION(Register, i0, (0)); ++CONSTANT_REGISTER_DECLARATION(Register, i1, (1)); ++CONSTANT_REGISTER_DECLARATION(Register, i2, (2)); ++CONSTANT_REGISTER_DECLARATION(Register, i3, (3)); ++CONSTANT_REGISTER_DECLARATION(Register, i4, (4)); ++CONSTANT_REGISTER_DECLARATION(Register, i5, (5)); ++CONSTANT_REGISTER_DECLARATION(Register, i6, (6)); ++CONSTANT_REGISTER_DECLARATION(Register, i7, (7)); ++CONSTANT_REGISTER_DECLARATION(Register, i8, (8)); ++CONSTANT_REGISTER_DECLARATION(Register, i9, (9)); ++CONSTANT_REGISTER_DECLARATION(Register, i10, (10)); ++CONSTANT_REGISTER_DECLARATION(Register, i11, (11)); ++CONSTANT_REGISTER_DECLARATION(Register, i12, (12)); ++CONSTANT_REGISTER_DECLARATION(Register, i13, (13)); ++CONSTANT_REGISTER_DECLARATION(Register, i14, (14)); ++CONSTANT_REGISTER_DECLARATION(Register, i15, (15)); ++CONSTANT_REGISTER_DECLARATION(Register, i16, (16)); ++CONSTANT_REGISTER_DECLARATION(Register, i17, (17)); ++CONSTANT_REGISTER_DECLARATION(Register, i18, (18)); ++CONSTANT_REGISTER_DECLARATION(Register, i19, (19)); ++CONSTANT_REGISTER_DECLARATION(Register, i20, (20)); ++CONSTANT_REGISTER_DECLARATION(Register, i21, (21)); ++CONSTANT_REGISTER_DECLARATION(Register, i22, (22)); ++CONSTANT_REGISTER_DECLARATION(Register, i23, (23)); ++CONSTANT_REGISTER_DECLARATION(Register, i24, (24)); ++CONSTANT_REGISTER_DECLARATION(Register, i25, (25)); ++CONSTANT_REGISTER_DECLARATION(Register, i26, (26)); ++CONSTANT_REGISTER_DECLARATION(Register, i27, (27)); ++CONSTANT_REGISTER_DECLARATION(Register, i28, (28)); ++CONSTANT_REGISTER_DECLARATION(Register, i29, (29)); ++CONSTANT_REGISTER_DECLARATION(Register, i30, (30)); ++CONSTANT_REGISTER_DECLARATION(Register, i31, (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define NOREG ((Register)(noreg_RegisterEnumValue)) ++ ++#define I0 ((Register)(i0_RegisterEnumValue)) ++#define I1 ((Register)(i1_RegisterEnumValue)) ++#define I2 ((Register)(i2_RegisterEnumValue)) ++#define I3 ((Register)(i3_RegisterEnumValue)) ++#define I4 ((Register)(i4_RegisterEnumValue)) ++#define I5 ((Register)(i5_RegisterEnumValue)) ++#define I6 ((Register)(i6_RegisterEnumValue)) ++#define I7 ((Register)(i7_RegisterEnumValue)) ++#define I8 ((Register)(i8_RegisterEnumValue)) ++#define I9 ((Register)(i9_RegisterEnumValue)) ++#define I10 ((Register)(i10_RegisterEnumValue)) ++#define I11 ((Register)(i11_RegisterEnumValue)) ++#define I12 ((Register)(i12_RegisterEnumValue)) ++#define I13 ((Register)(i13_RegisterEnumValue)) ++#define I14 ((Register)(i14_RegisterEnumValue)) ++#define I15 ((Register)(i15_RegisterEnumValue)) ++#define I16 ((Register)(i16_RegisterEnumValue)) ++#define I17 ((Register)(i17_RegisterEnumValue)) ++#define I18 ((Register)(i18_RegisterEnumValue)) ++#define I19 ((Register)(i19_RegisterEnumValue)) ++#define I20 ((Register)(i20_RegisterEnumValue)) ++#define I21 ((Register)(i21_RegisterEnumValue)) ++#define I22 ((Register)(i22_RegisterEnumValue)) ++#define I23 ((Register)(i23_RegisterEnumValue)) ++#define I24 ((Register)(i24_RegisterEnumValue)) ++#define I25 ((Register)(i25_RegisterEnumValue)) ++#define I26 ((Register)(i26_RegisterEnumValue)) ++#define I27 ((Register)(i27_RegisterEnumValue)) ++#define I28 ((Register)(i28_RegisterEnumValue)) ++#define I29 ((Register)(i29_RegisterEnumValue)) ++#define I30 ((Register)(i30_RegisterEnumValue)) ++#define I31 ((Register)(i31_RegisterEnumValue)) ++ ++#define R0 ((Register)(i0_RegisterEnumValue)) ++#define AT ((Register)(i1_RegisterEnumValue)) ++#define V0 ((Register)(i2_RegisterEnumValue)) ++#define V1 ((Register)(i3_RegisterEnumValue)) ++#define A0 ((Register)(i4_RegisterEnumValue)) ++#define A1 ((Register)(i5_RegisterEnumValue)) ++#define A2 ((Register)(i6_RegisterEnumValue)) ++#define A3 ((Register)(i7_RegisterEnumValue)) ++#define A4 ((Register)(i8_RegisterEnumValue)) ++#define A5 ((Register)(i9_RegisterEnumValue)) ++#define A6 ((Register)(i10_RegisterEnumValue)) ++#define A7 ((Register)(i11_RegisterEnumValue)) ++#define RT0 ((Register)(i12_RegisterEnumValue)) ++#define RT1 ((Register)(i13_RegisterEnumValue)) ++#define RT2 ((Register)(i14_RegisterEnumValue)) ++#define RT3 ((Register)(i15_RegisterEnumValue)) ++#define S0 ((Register)(i16_RegisterEnumValue)) ++#define S1 ((Register)(i17_RegisterEnumValue)) ++#define S2 ((Register)(i18_RegisterEnumValue)) ++#define S3 ((Register)(i19_RegisterEnumValue)) ++#define S4 ((Register)(i20_RegisterEnumValue)) ++#define S5 ((Register)(i21_RegisterEnumValue)) ++#define S6 ((Register)(i22_RegisterEnumValue)) ++#define S7 ((Register)(i23_RegisterEnumValue)) ++#define RT8 ((Register)(i24_RegisterEnumValue)) ++#define RT9 ((Register)(i25_RegisterEnumValue)) ++#define K0 ((Register)(i26_RegisterEnumValue)) ++#define K1 ((Register)(i27_RegisterEnumValue)) ++#define GP ((Register)(i28_RegisterEnumValue)) ++#define SP ((Register)(i29_RegisterEnumValue)) ++#define FP ((Register)(i30_RegisterEnumValue)) ++#define S8 ((Register)(i30_RegisterEnumValue)) ++#define RA ((Register)(i31_RegisterEnumValue)) ++ ++#define c_rarg0 RT0 ++#define c_rarg1 RT1 ++#define Rmethod S3 ++#define Rsender S4 ++#define Rnext S1 ++ ++/* ++#define RT0 T0 ++#define RT1 T1 ++#define RT2 T2 ++#define RT3 T3 ++#define RT4 T8 ++#define RT5 T9 ++*/ ++ ++ ++//for interpreter frame ++// bytecode pointer register ++#define BCP S0 ++// local variable pointer register ++#define LVP S7 ++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM ++// be sure to save and restore its value in call_stub ++#define TSR S2 ++ ++#define OPT_THREAD 1 ++ ++#define TREG S6 ++ ++#define S5_heapbase S5 ++ ++#define mh_SP_save SP ++ ++#define FSR V0 ++#define SSR V1 ++#define FSF F0 ++#define SSF F1 ++#define FTF F14 ++#define STF F15 ++ ++#define AFT F30 ++ ++#define RECEIVER T0 ++#define IC_Klass T1 ++ ++#define SHIFT_count T3 ++ ++#endif // DONT_USE_REGISTER_DEFINES ++ ++// Use FloatRegister as shortcut ++class FloatRegisterImpl; ++typedef FloatRegisterImpl* FloatRegister; ++ ++inline FloatRegister as_FloatRegister(int encoding) { ++ return (FloatRegister)(intptr_t) encoding; ++} ++ ++// The implementation of floating point registers for the architecture ++class FloatRegisterImpl: public AbstractRegisterImpl { ++ public: ++ enum { ++ float_arg_base = 12, ++ number_of_registers = 32 ++ }; ++ ++ // construction ++ inline friend FloatRegister as_FloatRegister(int encoding); ++ ++ VMReg as_VMReg(); ++ ++ // derived registers, offsets, and addresses ++ FloatRegister successor() const { return as_FloatRegister(encoding() + 1); } ++ ++ // accessors ++ int encoding() const { assert(is_valid(), "invalid register"); return (intptr_t)this; } ++ bool is_valid() const { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; } ++ const char* name() const; ++ ++}; ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1)); ++ ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0 , ( 0)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1 , ( 1)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2 , ( 2)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3 , ( 3)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4 , ( 4)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5 , ( 5)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6 , ( 6)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7 , ( 7)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8 , ( 8)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9 , ( 9)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10 , (10)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11 , (11)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12 , (12)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13 , (13)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14 , (14)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15 , (15)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16 , (16)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17 , (17)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18 , (18)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19 , (19)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20 , (20)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21 , (21)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22 , (22)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23 , (23)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24 , (24)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25 , (25)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26 , (26)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27 , (27)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28 , (28)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29 , (29)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30 , (30)); ++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31 , (31)); ++ ++#ifndef DONT_USE_REGISTER_DEFINES ++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue)) ++#define F0 ((FloatRegister)( f0_FloatRegisterEnumValue)) ++#define F1 ((FloatRegister)( f1_FloatRegisterEnumValue)) ++#define F2 ((FloatRegister)( f2_FloatRegisterEnumValue)) ++#define F3 ((FloatRegister)( f3_FloatRegisterEnumValue)) ++#define F4 ((FloatRegister)( f4_FloatRegisterEnumValue)) ++#define F5 ((FloatRegister)( f5_FloatRegisterEnumValue)) ++#define F6 ((FloatRegister)( f6_FloatRegisterEnumValue)) ++#define F7 ((FloatRegister)( f7_FloatRegisterEnumValue)) ++#define F8 ((FloatRegister)( f8_FloatRegisterEnumValue)) ++#define F9 ((FloatRegister)( f9_FloatRegisterEnumValue)) ++#define F10 ((FloatRegister)( f10_FloatRegisterEnumValue)) ++#define F11 ((FloatRegister)( f11_FloatRegisterEnumValue)) ++#define F12 ((FloatRegister)( f12_FloatRegisterEnumValue)) ++#define F13 ((FloatRegister)( f13_FloatRegisterEnumValue)) ++#define F14 ((FloatRegister)( f14_FloatRegisterEnumValue)) ++#define F15 ((FloatRegister)( f15_FloatRegisterEnumValue)) ++#define F16 ((FloatRegister)( f16_FloatRegisterEnumValue)) ++#define F17 ((FloatRegister)( f17_FloatRegisterEnumValue)) ++#define F18 ((FloatRegister)( f18_FloatRegisterEnumValue)) ++#define F19 ((FloatRegister)( f19_FloatRegisterEnumValue)) ++#define F20 ((FloatRegister)( f20_FloatRegisterEnumValue)) ++#define F21 ((FloatRegister)( f21_FloatRegisterEnumValue)) ++#define F22 ((FloatRegister)( f22_FloatRegisterEnumValue)) ++#define F23 ((FloatRegister)( f23_FloatRegisterEnumValue)) ++#define F24 ((FloatRegister)( f24_FloatRegisterEnumValue)) ++#define F25 ((FloatRegister)( f25_FloatRegisterEnumValue)) ++#define F26 ((FloatRegister)( f26_FloatRegisterEnumValue)) ++#define F27 ((FloatRegister)( f27_FloatRegisterEnumValue)) ++#define F28 ((FloatRegister)( f28_FloatRegisterEnumValue)) ++#define F29 ((FloatRegister)( f29_FloatRegisterEnumValue)) ++#define F30 ((FloatRegister)( f30_FloatRegisterEnumValue)) ++#define F31 ((FloatRegister)( f31_FloatRegisterEnumValue)) ++#endif // DONT_USE_REGISTER_DEFINES ++ ++ ++const int MIPS_ARGS_IN_REGS_NUM = 4; ++ ++// Need to know the total number of registers of all sorts for SharedInfo. ++// Define a class that exports it. ++class ConcreteRegisterImpl : public AbstractRegisterImpl { ++ public: ++ enum { ++ // A big enough number for C2: all the registers plus flags ++ // This number must be large enough to cover REG_COUNT (defined by c2) registers. ++ // There is no requirement that any ordering here matches any ordering c2 gives ++ // it's optoregs. ++ number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2 ++ }; ++ ++ static const int max_gpr; ++ static const int max_fpr; ++}; ++ ++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp +new file mode 100644 +index 0000000000..ff8028032b +--- /dev/null ++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/relocInfo.hpp" ++#include "compiler/disassembler.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/compressedOops.inline.hpp" ++#include "oops/oop.hpp" ++#include "oops/oop.inline.hpp" ++#include "runtime/safepoint.hpp" ++ ++ ++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) { ++ x += o; ++ typedef Assembler::WhichOperand WhichOperand; ++ WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop ++ assert(which == Assembler::disp32_operand || ++ which == Assembler::narrow_oop_operand || ++ which == Assembler::imm_operand, "format unpacks ok"); ++ if (which == Assembler::imm_operand) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(x)); ++ } ++ } else if (which == Assembler::narrow_oop_operand) { ++ // both compressed oops and compressed classes look the same ++ if (Universe::heap()->is_in_reserved((oop)x)) { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x)); ++ } ++ } else { ++ if (verify_only) { ++ assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match"); ++ } else { ++ nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x)); ++ } ++ } ++ } else { ++ // Note: Use runtime_call_type relocations for call32_operand. ++ assert(0, "call32_operand not supported in MIPS64"); ++ } ++} ++ ++ ++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target, ++//Maybe We should FORGET CALL RELOCATION ++address Relocation::pd_call_destination(address orig_addr) { ++ intptr_t adj = 0; ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ if (!ni->is_trampoline_call()) { ++ return nativeCall_at(addr())->target_addr_for_insn(); ++ } else { ++ address trampoline = nativeCall_at(addr())->get_trampoline(); ++ if (trampoline) { ++ return nativeCallTrampolineStub_at(trampoline)->destination(); ++ } else { ++ return (address) -1; ++ } ++ } ++ } else if (ni->is_jump()) { ++ return nativeGeneralJump_at(addr())->jump_destination() + adj; ++ } else if (ni->is_cond_jump()) { ++ return nativeCondJump_at(addr())->jump_destination() +adj; ++ } else { ++ tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr())); ++ Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty); ++ ShouldNotReachHere(); ++ return NULL; ++ } ++} ++ ++ ++void Relocation::pd_set_call_destination(address x) { ++ NativeInstruction* ni = nativeInstruction_at(addr()); ++ if (ni->is_call()) { ++ NativeCall* call = nativeCall_at(addr()); ++ if (!ni->is_trampoline_call()) { ++ call->set_destination(x); ++ } else { ++ address trampoline_stub_addr = call->get_trampoline(); ++ if (trampoline_stub_addr != NULL) { ++ address orig = call->target_addr_for_insn(); ++ if (orig != trampoline_stub_addr) { ++ call->patch_on_trampoline(trampoline_stub_addr); ++ } ++ call->set_destination_mt_safe(x, false); ++ } ++ } ++ } else if (ni->is_jump()) ++ nativeGeneralJump_at(addr())->set_jump_destination(x); ++ else if (ni->is_cond_jump()) ++ nativeCondJump_at(addr())->set_jump_destination(x); ++ else ++ { ShouldNotReachHere(); } ++ ++ // Unresolved jumps are recognized by a destination of -1 ++ // However 64bit can't actually produce such an address ++ // and encodes a jump to self but jump_destination will ++ // return a -1 as the signal. We must not relocate this ++ // jmp or the ic code will not see it as unresolved. ++} ++ ++ ++address* Relocation::pd_address_in_code() { ++ return (address*)addr(); ++} ++ ++ ++address Relocation::pd_get_address_from_code() { ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ return (address)ni->data(); ++} ++ ++ ++ ++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++ ++/* ++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++} ++*/ ++ ++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) { ++ address target =0; ++ NativeMovConstReg* ni = nativeMovConstReg_at(addr()); ++ target = new_addr_for((address)ni->data(), src, dest); ++ ni->set_data((intptr_t)target); ++} ++ ++void metadata_Relocation::pd_fix_value(address x) { ++} +diff --git a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp +new file mode 100644 +index 0000000000..1e1e170fd8 +--- /dev/null ++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp +@@ -0,0 +1,44 @@ ++/* ++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP ++ ++ // machine-dependent parts of class relocInfo ++ private: ++ enum { ++ // Since MIPS instructions are whole words, ++ // the two low-order offset bits can always be discarded. ++ offset_unit = 4, ++ ++ // imm_oop_operand vs. narrow_oop_operand ++ format_width = 2 ++ }; ++ ++ public: ++ ++ static bool mustIterateImmediateOopsInCode() { return false; } ++ ++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp +new file mode 100644 +index 0000000000..2a0488cd01 +--- /dev/null ++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp +@@ -0,0 +1,198 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#ifdef COMPILER2 ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "code/vmreg.hpp" ++#include "interpreter/interpreter.hpp" ++#include "opto/runtime.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/globalDefinitions.hpp" ++#include "vmreg_mips.inline.hpp" ++#endif ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++//-------------- generate_exception_blob ----------- ++// creates _exception_blob. ++// The exception blob is jumped to from a compiled method. ++// (see emit_exception_handler in sparc.ad file) ++// ++// Given an exception pc at a call we call into the runtime for the ++// handler in this method. This handler might merely restore state ++// (i.e. callee save registers) unwind the frame and jump to the ++// exception handler for the nmethod if there is no Java level handler ++// for the nmethod. ++// ++// This code is entered with a jump, and left with a jump. ++// ++// Arguments: ++// V0: exception oop ++// V1: exception pc ++// ++// Results: ++// A0: exception oop ++// A1: exception pc in caller or ??? ++// jumps to: exception handler of caller ++// ++// Note: the exception pc MUST be at a call (precise debug information) ++// ++// [stubGenerator_mips.cpp] generate_forward_exception() ++// |- V0, V1 are created ++// |- T9 <= SharedRuntime::exception_handler_for_return_address ++// `- jr T9 ++// `- the caller's exception_handler ++// `- jr OptoRuntime::exception_blob ++// `- here ++// ++void OptoRuntime::generate_exception_blob() { ++ // Capture info about frame layout ++ enum layout { ++ fp_off, ++ return_off, // slot for return address ++ framesize ++ }; ++ ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer("exception_blob", 5120, 5120); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ ++ address start = __ pc(); ++ ++ __ daddiu(SP, SP, -1 * framesize * wordSize); // Prolog! ++ ++ // this frame will be treated as the original caller method. ++ // So, the return pc should be filled with the original exception pc. ++ // ref: X86's implementation ++ __ sd(V1, SP, return_off *wordSize); // return address ++ __ sd(FP, SP, fp_off *wordSize); ++ ++ // Save callee saved registers. None for UseSSE=0, ++ // floats-only for UseSSE=1, and doubles for UseSSE=2. ++ ++ __ daddiu(FP, SP, fp_off * wordSize); ++ ++ // Store exception in Thread object. We cannot pass any arguments to the ++ // handle_exception call, since we do not want to make any assumption ++ // about the size of the frame where the exception happened in. ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ sd(V0, Address(thread, JavaThread::exception_oop_offset())); ++ __ sd(V1, Address(thread, JavaThread::exception_pc_offset())); ++ ++ // This call does all the hard work. It checks if an exception handler ++ // exists in the method. ++ // If so, it returns the handler address. ++ // If not, it prepares for stack-unwinding, restoring the callee-save ++ // registers of the frame being removed. ++ __ set_last_Java_frame(thread, NOREG, NOREG, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ ++ { ++ long save_pc = (long)__ pc() + 48; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ move(A0, thread); ++ __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C); ++ __ jalr(T9); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(thread, true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // V0: exception handler ++ ++ // We have a handler in V0, (could be deopt blob) ++ __ move(T9, V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // Get the exception ++ __ ld(A0, Address(thread, JavaThread::exception_oop_offset())); ++ // Get the exception pc in case we are deoptimized ++ __ ld(A1, Address(thread, JavaThread::exception_pc_offset())); ++#ifdef ASSERT ++ __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset())); ++ __ sd(R0, Address(thread, JavaThread::exception_pc_offset())); ++#endif ++ // Clear the exception oop so GC no longer processes it as a root. ++ __ sd(R0, Address(thread, JavaThread::exception_oop_offset())); ++ ++ // Fix seg fault when running: ++ // Eclipse + Plugin + Debug As ++ // This is the only condition where C2 calls SharedRuntime::generate_deopt_blob() ++ // ++ __ move(V0, A0); ++ __ move(V1, A1); ++ ++ // V0: exception oop ++ // T9: exception handler ++ // A1: exception pc ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ // make sure all code is generated ++ masm->flush(); ++ ++ _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize); ++} +diff --git a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp +new file mode 100644 +index 0000000000..4a9791d4cb +--- /dev/null ++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp +@@ -0,0 +1,3879 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "code/debugInfoRec.hpp" ++#include "code/icBuffer.hpp" ++#include "code/nativeInst.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/vframeArray.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++#include ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size; ++ ++class RegisterSaver { ++ enum { FPU_regs_live = 32 }; ++ // Capture info about frame layout ++ enum layout { ++#define DEF_LAYOUT_OFFS(regname) regname ## _off, regname ## H_off, ++ DEF_LAYOUT_OFFS(for_16_bytes_aligned) ++ DEF_LAYOUT_OFFS(fpr0) ++ DEF_LAYOUT_OFFS(fpr1) ++ DEF_LAYOUT_OFFS(fpr2) ++ DEF_LAYOUT_OFFS(fpr3) ++ DEF_LAYOUT_OFFS(fpr4) ++ DEF_LAYOUT_OFFS(fpr5) ++ DEF_LAYOUT_OFFS(fpr6) ++ DEF_LAYOUT_OFFS(fpr7) ++ DEF_LAYOUT_OFFS(fpr8) ++ DEF_LAYOUT_OFFS(fpr9) ++ DEF_LAYOUT_OFFS(fpr10) ++ DEF_LAYOUT_OFFS(fpr11) ++ DEF_LAYOUT_OFFS(fpr12) ++ DEF_LAYOUT_OFFS(fpr13) ++ DEF_LAYOUT_OFFS(fpr14) ++ DEF_LAYOUT_OFFS(fpr15) ++ DEF_LAYOUT_OFFS(fpr16) ++ DEF_LAYOUT_OFFS(fpr17) ++ DEF_LAYOUT_OFFS(fpr18) ++ DEF_LAYOUT_OFFS(fpr19) ++ DEF_LAYOUT_OFFS(fpr20) ++ DEF_LAYOUT_OFFS(fpr21) ++ DEF_LAYOUT_OFFS(fpr22) ++ DEF_LAYOUT_OFFS(fpr23) ++ DEF_LAYOUT_OFFS(fpr24) ++ DEF_LAYOUT_OFFS(fpr25) ++ DEF_LAYOUT_OFFS(fpr26) ++ DEF_LAYOUT_OFFS(fpr27) ++ DEF_LAYOUT_OFFS(fpr28) ++ DEF_LAYOUT_OFFS(fpr29) ++ DEF_LAYOUT_OFFS(fpr30) ++ DEF_LAYOUT_OFFS(fpr31) ++ ++ DEF_LAYOUT_OFFS(v0) ++ DEF_LAYOUT_OFFS(v1) ++ DEF_LAYOUT_OFFS(a0) ++ DEF_LAYOUT_OFFS(a1) ++ DEF_LAYOUT_OFFS(a2) ++ DEF_LAYOUT_OFFS(a3) ++ DEF_LAYOUT_OFFS(a4) ++ DEF_LAYOUT_OFFS(a5) ++ DEF_LAYOUT_OFFS(a6) ++ DEF_LAYOUT_OFFS(a7) ++ DEF_LAYOUT_OFFS(t0) ++ DEF_LAYOUT_OFFS(t1) ++ DEF_LAYOUT_OFFS(t2) ++ DEF_LAYOUT_OFFS(t3) ++ DEF_LAYOUT_OFFS(s0) ++ DEF_LAYOUT_OFFS(s1) ++ DEF_LAYOUT_OFFS(s2) ++ DEF_LAYOUT_OFFS(s3) ++ DEF_LAYOUT_OFFS(s4) ++ DEF_LAYOUT_OFFS(s5) ++ DEF_LAYOUT_OFFS(s6) ++ DEF_LAYOUT_OFFS(s7) ++ DEF_LAYOUT_OFFS(t8) ++ DEF_LAYOUT_OFFS(t9) ++ ++ DEF_LAYOUT_OFFS(gp) ++ DEF_LAYOUT_OFFS(fp) ++ DEF_LAYOUT_OFFS(return) ++ reg_save_size ++ }; ++ ++ public: ++ ++ static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false ); ++ static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false); ++ static int raOffset(void) { return return_off / 2; } ++ //Rmethod ++ static int methodOffset(void) { return s3_off / 2; } ++ ++ static int v0Offset(void) { return v0_off / 2; } ++ static int v1Offset(void) { return v1_off / 2; } ++ ++ static int fpResultOffset(void) { return fpr0_off / 2; } ++ ++ // During deoptimization only the result register need to be restored ++ // all the other values have already been extracted. ++ static void restore_result_registers(MacroAssembler* masm); ++}; ++ ++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) { ++ ++ // Always make the frame size 16-byte aligned ++ int frame_size_in_bytes = round_to(additional_frame_words*wordSize + ++ reg_save_size*BytesPerInt, 16); ++ // OopMap frame size is in compiler stack slots (jint's) not bytes or words ++ int frame_size_in_slots = frame_size_in_bytes / BytesPerInt; ++ // The caller will allocate additional_frame_words ++ int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt; ++ // CodeBlob frame size is in words. ++ int frame_size_in_words = frame_size_in_bytes / wordSize; ++ *total_frame_words = frame_size_in_words; ++ ++ // save registers ++ ++ __ daddiu(SP, SP, - reg_save_size * jintSize); ++ ++ __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize); ++ __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize); ++ __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize); ++ __ sdc1(F6, SP, fpr6_off * jintSize); __ sdc1(F7, SP, fpr7_off * jintSize); ++ __ sdc1(F8, SP, fpr8_off * jintSize); __ sdc1(F9, SP, fpr9_off * jintSize); ++ __ sdc1(F10, SP, fpr10_off * jintSize); __ sdc1(F11, SP, fpr11_off * jintSize); ++ __ sdc1(F12, SP, fpr12_off * jintSize); __ sdc1(F13, SP, fpr13_off * jintSize); ++ __ sdc1(F14, SP, fpr14_off * jintSize); __ sdc1(F15, SP, fpr15_off * jintSize); ++ __ sdc1(F16, SP, fpr16_off * jintSize); __ sdc1(F17, SP, fpr17_off * jintSize); ++ __ sdc1(F18, SP, fpr18_off * jintSize); __ sdc1(F19, SP, fpr19_off * jintSize); ++ __ sdc1(F20, SP, fpr20_off * jintSize); __ sdc1(F21, SP, fpr21_off * jintSize); ++ __ sdc1(F22, SP, fpr22_off * jintSize); __ sdc1(F23, SP, fpr23_off * jintSize); ++ __ sdc1(F24, SP, fpr24_off * jintSize); __ sdc1(F25, SP, fpr25_off * jintSize); ++ __ sdc1(F26, SP, fpr26_off * jintSize); __ sdc1(F27, SP, fpr27_off * jintSize); ++ __ sdc1(F28, SP, fpr28_off * jintSize); __ sdc1(F29, SP, fpr29_off * jintSize); ++ __ sdc1(F30, SP, fpr30_off * jintSize); __ sdc1(F31, SP, fpr31_off * jintSize); ++ __ sd(V0, SP, v0_off * jintSize); __ sd(V1, SP, v1_off * jintSize); ++ __ sd(A0, SP, a0_off * jintSize); __ sd(A1, SP, a1_off * jintSize); ++ __ sd(A2, SP, a2_off * jintSize); __ sd(A3, SP, a3_off * jintSize); ++ __ sd(A4, SP, a4_off * jintSize); __ sd(A5, SP, a5_off * jintSize); ++ __ sd(A6, SP, a6_off * jintSize); __ sd(A7, SP, a7_off * jintSize); ++ __ sd(T0, SP, t0_off * jintSize); ++ __ sd(T1, SP, t1_off * jintSize); ++ __ sd(T2, SP, t2_off * jintSize); ++ __ sd(T3, SP, t3_off * jintSize); ++ __ sd(S0, SP, s0_off * jintSize); ++ __ sd(S1, SP, s1_off * jintSize); ++ __ sd(S2, SP, s2_off * jintSize); ++ __ sd(S3, SP, s3_off * jintSize); ++ __ sd(S4, SP, s4_off * jintSize); ++ __ sd(S5, SP, s5_off * jintSize); ++ __ sd(S6, SP, s6_off * jintSize); ++ __ sd(S7, SP, s7_off * jintSize); ++ ++ __ sd(T8, SP, t8_off * jintSize); ++ __ sd(T9, SP, t9_off * jintSize); ++ ++ __ sd(GP, SP, gp_off * jintSize); ++ __ sd(FP, SP, fp_off * jintSize); ++ __ sd(RA, SP, return_off * jintSize); ++ __ daddiu(FP, SP, fp_off * jintSize); ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ //OopMap* map = new OopMap( frame_words, 0 ); ++ OopMap* map = new OopMap( frame_size_in_slots, 0 ); ++ ++ ++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words) ++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) ++ map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg()); ++ ++ map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg()); ++ map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg()); ++ ++#undef STACK_OFFSET ++ return map; ++} ++ ++ ++// Pop the current frame and restore all the registers that we ++// saved. ++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) { ++ __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize); ++ __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize); ++ __ ldc1(F6, SP, fpr6_off * jintSize); __ ldc1(F7, SP, fpr7_off * jintSize); ++ __ ldc1(F8, SP, fpr8_off * jintSize); __ ldc1(F9, SP, fpr9_off * jintSize); ++ __ ldc1(F10, SP, fpr10_off * jintSize); __ ldc1(F11, SP, fpr11_off * jintSize); ++ __ ldc1(F12, SP, fpr12_off * jintSize); __ ldc1(F13, SP, fpr13_off * jintSize); ++ __ ldc1(F14, SP, fpr14_off * jintSize); __ ldc1(F15, SP, fpr15_off * jintSize); ++ __ ldc1(F16, SP, fpr16_off * jintSize); __ ldc1(F17, SP, fpr17_off * jintSize); ++ __ ldc1(F18, SP, fpr18_off * jintSize); __ ldc1(F19, SP, fpr19_off * jintSize); ++ __ ldc1(F20, SP, fpr20_off * jintSize); __ ldc1(F21, SP, fpr21_off * jintSize); ++ __ ldc1(F22, SP, fpr22_off * jintSize); __ ldc1(F23, SP, fpr23_off * jintSize); ++ __ ldc1(F24, SP, fpr24_off * jintSize); __ ldc1(F25, SP, fpr25_off * jintSize); ++ __ ldc1(F26, SP, fpr26_off * jintSize); __ ldc1(F27, SP, fpr27_off * jintSize); ++ __ ldc1(F28, SP, fpr28_off * jintSize); __ ldc1(F29, SP, fpr29_off * jintSize); ++ __ ldc1(F30, SP, fpr30_off * jintSize); __ ldc1(F31, SP, fpr31_off * jintSize); ++ ++ __ ld(V0, SP, v0_off * jintSize); __ ld(V1, SP, v1_off * jintSize); ++ __ ld(A0, SP, a0_off * jintSize); __ ld(A1, SP, a1_off * jintSize); ++ __ ld(A2, SP, a2_off * jintSize); __ ld(A3, SP, a3_off * jintSize); ++ __ ld(A4, SP, a4_off * jintSize); __ ld(A5, SP, a5_off * jintSize); ++ __ ld(A6, SP, a6_off * jintSize); __ ld(A7, SP, a7_off * jintSize); ++ __ ld(T0, SP, t0_off * jintSize); ++ __ ld(T1, SP, t1_off * jintSize); ++ __ ld(T2, SP, t2_off * jintSize); ++ __ ld(T3, SP, t3_off * jintSize); ++ __ ld(S0, SP, s0_off * jintSize); ++ __ ld(S1, SP, s1_off * jintSize); ++ __ ld(S2, SP, s2_off * jintSize); ++ __ ld(S3, SP, s3_off * jintSize); ++ __ ld(S4, SP, s4_off * jintSize); ++ __ ld(S5, SP, s5_off * jintSize); ++ __ ld(S6, SP, s6_off * jintSize); ++ __ ld(S7, SP, s7_off * jintSize); ++ ++ __ ld(T8, SP, t8_off * jintSize); ++ __ ld(T9, SP, t9_off * jintSize); ++ ++ __ ld(GP, SP, gp_off * jintSize); ++ __ ld(FP, SP, fp_off * jintSize); ++ __ ld(RA, SP, return_off * jintSize); ++ ++ __ addiu(SP, SP, reg_save_size * jintSize); ++} ++ ++// Pop the current frame and restore the registers that might be holding ++// a result. ++void RegisterSaver::restore_result_registers(MacroAssembler* masm) { ++ ++ // Just restore result register. Only used by deoptimization. By ++ // now any callee save register that needs to be restore to a c2 ++ // caller of the deoptee has been extracted into the vframeArray ++ // and will be stuffed into the c2i adapter we create for later ++ // restoration so only result registers need to be restored here. ++ ++ __ ld(V0, SP, v0_off * jintSize); ++ __ ld(V1, SP, v1_off * jintSize); ++ __ ldc1(F0, SP, fpr0_off * jintSize); ++ __ ldc1(F1, SP, fpr1_off * jintSize); ++ __ addiu(SP, SP, return_off * jintSize); ++} ++ ++// Is vector's size (in bytes) bigger than a size saved by default? ++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions. ++bool SharedRuntime::is_wide_vector(int size) { ++ return size > 16; ++} ++ ++size_t SharedRuntime::trampoline_size() { ++ return 32; ++} ++ ++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) { ++ // trampoline is not in CodeCache ++ __ set64(T9, (long)destination); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// The java_calling_convention describes stack locations as ideal slots on ++// a frame with no abi restrictions. Since we must observe abi restrictions ++// (like the placement of the register window) the slots must be biased by ++// the following value. ++ ++static int reg2offset_in(VMReg r) { ++ // Account for saved fp and return address ++ // This should really be in_preserve_stack_slots ++ return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size; // + 2 * VMRegImpl::stack_slot_size); ++} ++ ++static int reg2offset_out(VMReg r) { ++ return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size; ++} ++ ++// --------------------------------------------------------------------------- ++// Read the array of BasicTypes from a signature, and compute where the ++// arguments should go. Values in the VMRegPair regs array refer to 4-byte ++// quantities. Values less than SharedInfo::stack0 are registers, those above ++// refer to 4-byte stack slots. All stack slots are based off of the stack pointer ++// as framesizes are fixed. ++// VMRegImpl::stack0 refers to the first slot 0(sp). ++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher. Register ++// up to RegisterImpl::number_of_registers) are the 32-bit ++// integer registers. ++ ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++// Note: the INPUTS in sig_bt are in units of Java argument words, which are ++// either 32-bit or 64-bit depending on the build. The OUTPUTS are in 32-bit ++// units regardless of build. ++ ++ ++// --------------------------------------------------------------------------- ++// The compiled Java calling convention. ++// Pass first five oop/int args in registers T0, A0 - A3. ++// Pass float/double/long args in stack. ++// Doubles have precedence, so if you pass a mix of floats and doubles ++// the doubles will grab the registers before the floats will. ++ ++int SharedRuntime::java_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ int total_args_passed, ++ int is_outgoing) { ++ ++ // Create the mapping between argument positions and registers. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ T0, A0, A1, A2, A3, A4, A5, A6 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: ++ // halves of T_LONG or T_DOUBLE ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// Patch the callers callsite with entry to compiled code if it exists. ++static void patch_callers_callsite(MacroAssembler *masm) { ++ Label L; ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ // Schedule the branch target address early. ++ // Call into the VM to patch the caller, then jump to compiled callee ++ // V0 isn't live so capture return address while we easily can ++ __ move(V0, RA); ++ ++ __ pushad(); ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // VM needs caller's callsite ++ // VM needs target method ++ ++ __ move(A0, Rmethod); ++ __ move(A1, V0); ++ // we should preserve the return address ++ __ move(TSR, SP); ++ __ move(AT, -(StackAlignmentInBytes)); // align the stack ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite), ++ relocInfo::runtime_call_type); ++ ++ __ delayed()->nop(); ++ __ move(SP, TSR); ++ __ popad(); ++ __ bind(L); ++} ++ ++static void gen_c2i_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ Label& skip_fixup) { ++ ++ // Before we get into the guts of the C2I adapter, see if we should be here ++ // at all. We've come from compiled code and are attempting to jump to the ++ // interpreter, which means the caller made a static call to get here ++ // (vcalls always get a compiled target if there is one). Check for a ++ // compiled target. If there is one, we need to patch the caller's call. ++ // However we will run interpreted if we come thru here. The next pass ++ // thru the call site will run compiled. If we ran compiled here then ++ // we can (theorectically) do endless i2c->c2i->i2c transitions during ++ // deopt/uncommon trap cycles. If we always go interpreted here then ++ // we can have at most one and don't need to play any tricks to keep ++ // from endlessly growing the stack. ++ // ++ // Actually if we detected that we had an i2c->c2i transition here we ++ // ought to be able to reset the world back to the state of the interpreted ++ // call and not bother building another interpreter arg area. We don't ++ // do that at this point. ++ ++ patch_callers_callsite(masm); ++ __ bind(skip_fixup); ++ ++#ifdef COMPILER2 ++ __ empty_FPU_stack(); ++#endif ++ //this is for native ? ++ // Since all args are passed on the stack, total_args_passed * interpreter_ ++ // stack_element_size is the ++ // space we need. ++ int extraspace = total_args_passed * Interpreter::stackElementSize; ++ ++ // stack is aligned, keep it that way ++ extraspace = round_to(extraspace, 2*wordSize); ++ ++ // Get return address ++ __ move(V0, RA); ++ // set senderSP value ++ //refer to interpreter_mips.cpp:generate_asm_entry ++ __ move(Rsender, SP); ++ __ addiu(SP, SP, -extraspace); ++ ++ // Now write the args into the outgoing interpreter space ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // st_off points to lowest address on stack. ++ int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize; ++ // Say 4 args: ++ // i st_off ++ // 0 12 T_LONG ++ // 1 8 T_VOID ++ // 2 4 T_OBJECT ++ // 3 0 T_BOOL ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // memory to memory use fpu stack top ++ int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace; ++ if (!r_2->is_valid()) { ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ } else { ++ ++ ++ int next_off = st_off - Interpreter::stackElementSize; ++ __ ld_ptr(AT, SP, ld_off); ++ __ st_ptr(AT, SP, st_off); ++ ++ // Ref to is_Register condition ++ if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ st_ptr(AT, SP, st_off - 8); ++ } ++ } else if (r_1->is_Register()) { ++ Register r = r_1->as_Register(); ++ if (!r_2->is_valid()) { ++ __ sd(r, SP, st_off); ++ } else { ++ //FIXME, mips will not enter here ++ // long/double in gpr ++ __ sd(r, SP, st_off); ++ // In [java/util/zip/ZipFile.java] ++ // ++ // private static native long open(String name, int mode, long lastModified); ++ // private static native int getTotal(long jzfile); ++ // ++ // We need to transfer T_LONG paramenters from a compiled method to a native method. ++ // It's a complex process: ++ // ++ // Caller -> lir_static_call -> gen_resolve_stub ++ // -> -- resolve_static_call_C ++ // `- gen_c2i_adapter() [*] ++ // | ++ // `- AdapterHandlerLibrary::get_create_apapter_index ++ // -> generate_native_entry ++ // -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**] ++ // ++ // In [**], T_Long parameter is stored in stack as: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // However, the sequence is reversed here: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry(). ++ // ++ if (sig_bt[i] == T_LONG) ++ __ sd(r, SP, st_off - 8); ++ } ++ } else if (r_1->is_FloatRegister()) { ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ swc1(fr, SP, st_off); ++ else { ++ __ sdc1(fr, SP, st_off); ++ __ sdc1(fr, SP, st_off - 8); // T_DOUBLE needs two slots ++ } ++ } ++ } ++ ++ // Schedule the branch target address early. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) ); ++ // And repush original return address ++ __ move(RA, V0); ++ __ jr (AT); ++ __ delayed()->nop(); ++} ++ ++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs) { ++ ++ // Generate an I2C adapter: adjust the I-frame to make space for the C-frame ++ // layout. Lesp was saved by the calling I-frame and will be restored on ++ // return. Meanwhile, outgoing arg space is all owned by the callee ++ // C-frame, so we can mangle it at will. After adjusting the frame size, ++ // hoist register arguments and repack other args according to the compiled ++ // code convention. Finally, end in a jump to the compiled code. The entry ++ // point address is the start of the buffer. ++ ++ // We will only enter here from an interpreted frame and never from after ++ // passing thru a c2i. Azul allowed this but we do not. If we lose the ++ // race and use a c2i we will remain interpreted for the race loser(s). ++ // This removes all sorts of headaches on the mips side and also eliminates ++ // the possibility of having c2i -> i2c -> c2i -> ... endless transitions. ++ ++ ++ __ move(T9, SP); ++ ++ // Cut-out for having no stack args. Since up to 2 int/oop args are passed ++ // in registers, we will occasionally have no stack args. ++ int comp_words_on_stack = 0; ++ if (comp_args_on_stack) { ++ // Sig words on the stack are greater-than VMRegImpl::stack0. Those in ++ // registers are below. By subtracting stack0, we either get a negative ++ // number (all values in registers) or the maximum stack slot accessed. ++ // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg); ++ // Convert 4-byte stack slots to words. ++ comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord; ++ // Round up to miminum stack alignment, in wordSize ++ comp_words_on_stack = round_to(comp_words_on_stack, 2); ++ __ daddiu(SP, SP, -comp_words_on_stack * wordSize); ++ } ++ ++ // Align the outgoing SP ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ // push the return address on the stack (note that pushing, rather ++ // than storing it, yields the correct frame alignment for the callee) ++ // Put saved SP in another register ++ const Register saved_sp = V0; ++ __ move(saved_sp, T9); ++ ++ ++ // Will jump to the compiled code just as if compiled code was doing it. ++ // Pre-load the register-jump target early, to schedule it better. ++ __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset())); ++ ++ // Now generate the shuffle code. Pick up all register args and move the ++ // rest through the floating point stack top. ++ for (int i = 0; i < total_args_passed; i++) { ++ if (sig_bt[i] == T_VOID) { ++ // Longs and doubles are passed in native word order, but misaligned ++ // in the 32-bit build. ++ assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half"); ++ continue; ++ } ++ ++ // Pick up 0, 1 or 2 words from SP+offset. ++ ++ //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?"); ++ // Load in argument order going down. ++ int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize; ++ // Point to interpreter value (vs. tag) ++ int next_off = ld_off - Interpreter::stackElementSize; ++ VMReg r_1 = regs[i].first(); ++ VMReg r_2 = regs[i].second(); ++ if (!r_1->is_valid()) { ++ assert(!r_2->is_valid(), ""); ++ continue; ++ } ++ if (r_1->is_stack()) { ++ // Convert stack slot to an SP offset (+ wordSize to ++ // account for return address ) ++ // NOTICE HERE!!!! I sub a wordSize here ++ int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size; ++ //+ wordSize; ++ ++ if (!r_2->is_valid()) { ++ __ ld(AT, saved_sp, ld_off); ++ __ sd(AT, SP, st_off); ++ } else { ++ // Interpreter local[n] == MSW, local[n+1] == LSW however locals ++ // are accessed as negative so LSW is at LOW address ++ ++ // ld_off is MSW so get LSW ++ // st_off is LSW (i.e. reg.first()) ++ ++ // [./org/eclipse/swt/graphics/GC.java] ++ // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight, ++ // int destX, int destY, int destWidth, int destHeight, ++ // boolean simple, ++ // int imgWidth, int imgHeight, ++ // long maskPixmap, <-- Pass T_LONG in stack ++ // int maskType); ++ // Before this modification, Eclipse displays icons with solid black background. ++ // ++ __ ld(AT, saved_sp, ld_off); ++ if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) ++ __ ld(AT, saved_sp, ld_off - 8); ++ __ sd(AT, SP, st_off); ++ } ++ } else if (r_1->is_Register()) { // Register argument ++ Register r = r_1->as_Register(); ++ if (r_2->is_valid()) { ++ // Remember r_1 is low address (and LSB on mips) ++ // So r_2 gets loaded from high address regardless of the platform ++ assert(r_2->as_Register() == r_1->as_Register(), ""); ++ __ ld(r, saved_sp, ld_off); ++ ++ // ++ // For T_LONG type, the real layout is as below: ++ // ++ // (high) ++ // | | ++ // ----------- ++ // | 8 bytes | ++ // | (void) | ++ // ----------- ++ // | 8 bytes | ++ // | (long) | ++ // ----------- ++ // | | ++ // (low) ++ // ++ // We should load the low-8 bytes. ++ // ++ if (sig_bt[i] == T_LONG) ++ __ ld(r, saved_sp, ld_off - 8); ++ } else { ++ __ lw(r, saved_sp, ld_off); ++ } ++ } else if (r_1->is_FloatRegister()) { // Float Register ++ assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register"); ++ ++ FloatRegister fr = r_1->as_FloatRegister(); ++ if (sig_bt[i] == T_FLOAT) ++ __ lwc1(fr, saved_sp, ld_off); ++ else { ++ __ ldc1(fr, saved_sp, ld_off); ++ __ ldc1(fr, saved_sp, ld_off - 8); ++ } ++ } ++ } ++ ++ // 6243940 We might end up in handle_wrong_method if ++ // the callee is deoptimized as we race thru here. If that ++ // happens we don't want to take a safepoint because the ++ // caller frame will look interpreted and arguments are now ++ // "compiled" so it is much better to make this transition ++ // invisible to the stack walking code. Unfortunately if ++ // we try and find the callee by normal means a safepoint ++ // is possible. So we stash the desired callee in the thread ++ // and the vm will find there should this case occur. ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset())); ++ ++ // move methodOop to V0 in case we end up in an c2i adapter. ++ // the c2i adapters expect methodOop in V0 (c2) because c2's ++ // resolve stubs return the result (the method) in V0. ++ // I'd love to fix this. ++ __ move(V0, Rmethod); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// --------------------------------------------------------------- ++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm, ++ int total_args_passed, ++ int comp_args_on_stack, ++ const BasicType *sig_bt, ++ const VMRegPair *regs, ++ AdapterFingerPrint* fingerprint) { ++ address i2c_entry = __ pc(); ++ ++ gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs); ++ ++ // ------------------------------------------------------------------------- ++ // Generate a C2I adapter. On entry we know G5 holds the methodOop. The ++ // args start out packed in the compiled layout. They need to be unpacked ++ // into the interpreter layout. This will almost always require some stack ++ // space. We grow the current (compiled) stack, then repack the args. We ++ // finally end in a jump to the generic interpreter entry point. On exit ++ // from the interpreter, the interpreter will restore our SP (lest the ++ // compiled code, which relys solely on SP and not FP, get sick). ++ ++ address c2i_unverified_entry = __ pc(); ++ Label skip_fixup; ++ { ++ Register holder = T1; ++ Register receiver = T0; ++ Register temp = T8; ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ Label missed; ++ ++ //add for compressedoops ++ __ load_klass(temp, receiver); ++ ++ __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset()); ++ __ bne(AT, temp, missed); ++ __ delayed()->nop(); ++ // Method might have been compiled since the call site was patched to ++ // interpreted if that is the case treat it as a miss so we can get ++ // the call site corrected. ++ __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset())); ++ __ beq(AT, R0, skip_fixup); ++ __ delayed()->nop(); ++ __ bind(missed); ++ ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ ++ address c2i_entry = __ pc(); ++ ++ gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup); ++ ++ __ flush(); ++ return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry); ++} ++ ++int SharedRuntime::c_calling_convention(const BasicType *sig_bt, ++ VMRegPair *regs, ++ VMRegPair *regs2, ++ int total_args_passed) { ++ assert(regs2 == NULL, "not needed on MIPS"); ++ // Return the number of VMReg stack_slots needed for the args. ++ // This value does not include an abi space (like register window ++ // save area). ++ ++ // We return the amount of VMReg stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. Since we always ++ // have space for storing at least 6 registers to memory we start with that. ++ // See int_stk_helper for a further discussion. ++ // We return the amount of VMRegImpl stack slots we need to reserve for all ++ // the arguments NOT counting out_preserve_stack_slots. ++ static const Register INT_ArgReg[Argument::n_register_parameters] = { ++ A0, A1, A2, A3, A4, A5, A6, A7 ++ }; ++ static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = { ++ F12, F13, F14, F15, F16, F17, F18, F19 ++ }; ++ uint args = 0; ++ uint stk_args = 0; // inc by 2 each time ++ ++// Example: ++// n java.lang.UNIXProcess::forkAndExec ++// private native int forkAndExec(byte[] prog, ++// byte[] argBlock, int argc, ++// byte[] envBlock, int envc, ++// byte[] dir, ++// boolean redirectErrorStream, ++// FileDescriptor stdin_fd, ++// FileDescriptor stdout_fd, ++// FileDescriptor stderr_fd) ++// JNIEXPORT jint JNICALL ++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env, ++// jobject process, ++// jbyteArray prog, ++// jbyteArray argBlock, jint argc, ++// jbyteArray envBlock, jint envc, ++// jbyteArray dir, ++// jboolean redirectErrorStream, ++// jobject stdin_fd, ++// jobject stdout_fd, ++// jobject stderr_fd) ++// ++// ::c_calling_convention ++// 0: // env <-- a0 ++// 1: L // klass/obj <-- t0 => a1 ++// 2: [ // prog[] <-- a0 => a2 ++// 3: [ // argBlock[] <-- a1 => a3 ++// 4: I // argc <-- a2 => a4 ++// 5: [ // envBlock[] <-- a3 => a5 ++// 6: I // envc <-- a4 => a5 ++// 7: [ // dir[] <-- a5 => a7 ++// 8: Z // redirectErrorStream <-- a6 => sp[0] ++// 9: L // stdin fp[16] => sp[8] ++// 10: L // stdout fp[24] => sp[16] ++// 11: L // stderr fp[32] => sp[24] ++// ++ for (int i = 0; i < total_args_passed; i++) { ++ switch (sig_bt[i]) { ++ case T_VOID: // Halves of longs and doubles ++ assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half"); ++ regs[i].set_bad(); ++ break; ++ case T_BOOLEAN: ++ case T_CHAR: ++ case T_BYTE: ++ case T_SHORT: ++ case T_INT: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set1(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_LONG: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ // fall through ++ case T_OBJECT: ++ case T_ARRAY: ++ case T_ADDRESS: ++ case T_METADATA: ++ if (args < Argument::n_register_parameters) { ++ regs[i].set2(INT_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_FLOAT: ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set1(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set1(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ case T_DOUBLE: ++ assert(sig_bt[i + 1] == T_VOID, "expecting half"); ++ if (args < Argument::n_float_register_parameters) { ++ regs[i].set2(FP_ArgReg[args++]->as_VMReg()); ++ } else { ++ regs[i].set2(VMRegImpl::stack2reg(stk_args)); ++ stk_args += 2; ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ break; ++ } ++ } ++ ++ return round_to(stk_args, 2); ++} ++ ++// --------------------------------------------------------------------------- ++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ swc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ sdc1(FSF, FP, -wordSize ); ++ break; ++ case T_VOID: break; ++ case T_LONG: ++ __ sd(V0, FP, -wordSize); ++ break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ sd(V0, FP, -wordSize); ++ break; ++ default: { ++ __ sw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) { ++ // We always ignore the frame_slots arg and just use the space just below frame pointer ++ // which by this time is free to use ++ switch (ret_type) { ++ case T_FLOAT: ++ __ lwc1(FSF, FP, -wordSize); ++ break; ++ case T_DOUBLE: ++ __ ldc1(FSF, FP, -wordSize ); ++ break; ++ case T_LONG: ++ __ ld(V0, FP, -wordSize); ++ break; ++ case T_VOID: break; ++ case T_OBJECT: ++ case T_ARRAY: ++ __ ld(V0, FP, -wordSize); ++ break; ++ default: { ++ __ lw(V0, FP, -wordSize); ++ } ++ } ++} ++ ++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = first_arg ; i < arg_count ; i++ ) { ++ if (args[i].first()->is_Register()) { ++ __ push(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ push(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) { ++ for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) { ++ if (args[i].first()->is_Register()) { ++ __ pop(args[i].first()->as_Register()); ++ } else if (args[i].first()->is_FloatRegister()) { ++ __ pop(args[i].first()->as_FloatRegister()); ++ } ++ } ++} ++ ++// A simple move of integer like type ++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ // stack to stack ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ // stack to reg ++ __ lw(dst.first()->as_Register(), FP, reg2offset_in(src.first())); ++ } ++ } else if (dst.first()->is_stack()) { ++ // reg to stack ++ __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ if (dst.first() != src.first()){ ++ __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first() ++ } ++ } ++} ++ ++// An oop arg. Must pass a handle not the oop itself ++static void object_move(MacroAssembler* masm, ++ OopMap* map, ++ int oop_handle_offset, ++ int framesize_in_slots, ++ VMRegPair src, ++ VMRegPair dst, ++ bool is_receiver, ++ int* receiver_offset) { ++ ++ // must pass a handle. First figure out the location we use as a handle ++ ++ //FIXME, for mips, dst can be register ++ if (src.first()->is_stack()) { ++ // Oop is already on the stack as an argument ++ Register rHandle = V0; ++ Label nil; ++ __ xorr(rHandle, rHandle, rHandle); ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ beq(AT, R0, nil); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(FP, reg2offset_in(src.first()))); ++ __ bind(nil); ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move( (dst.first())->as_Register(), rHandle); ++ //if dst is register ++ //FIXME, do mips need out preserve stack slots? ++ int offset_in_older_frame = src.first()->reg2stack() ++ + SharedRuntime::out_preserve_stack_slots(); ++ map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots)); ++ if (is_receiver) { ++ *receiver_offset = (offset_in_older_frame ++ + framesize_in_slots) * VMRegImpl::stack_slot_size; ++ } ++ } else { ++ // Oop is in an a register we must store it to the space we reserve ++ // on the stack for oop_handles ++ const Register rOop = src.first()->as_Register(); ++ assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register"); ++ const Register rHandle = V0; ++ //Important: refer to java_calling_convertion ++ int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset; ++ int offset = oop_slot*VMRegImpl::stack_slot_size; ++ Label skip; ++ __ sd( rOop , SP, offset ); ++ map->set_oop(VMRegImpl::stack2reg(oop_slot)); ++ __ xorr( rHandle, rHandle, rHandle); ++ __ beq(rOop, R0, skip); ++ __ delayed()->nop(); ++ __ lea(rHandle, Address(SP, offset)); ++ __ bind(skip); ++ // Store the handle parameter ++ if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first())); ++ else __ move((dst.first())->as_Register(), rHandle); ++ //if dst is register ++ ++ if (is_receiver) { ++ *receiver_offset = offset; ++ } ++ } ++} ++ ++// A float arg may have to do float reg int reg conversion ++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move"); ++ ++ if (src.first()->is_stack()) { ++ if (dst.first()->is_stack()) { ++ __ lw(AT, FP, reg2offset_in(src.first())); ++ __ sw(AT, SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } else { ++ // reg to stack ++ if(dst.first()->is_stack()) ++ __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ else ++ __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ } ++} ++ ++// A long move ++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibility for a long_move VMRegPair is: ++ // 1: two stack slots (possibly unaligned) ++ // as neither the java or C calling convention will use registers ++ // for longs. ++ ++ if (src.first()->is_stack()) { ++ assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack"); ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first())); ++ } ++ } else { ++ if( dst.first()->is_stack()){ ++ __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first())); ++ } else { ++ __ move( (dst.first())->as_Register() , (src.first())->as_Register()); ++ } ++ } ++} ++ ++// A double move ++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) { ++ ++ // The only legal possibilities for a double_move VMRegPair are: ++ // The painful thing here is that like long_move a VMRegPair might be ++ ++ // Because of the calling convention we know that src is either ++ // 1: a single physical register (xmm registers only) ++ // 2: two stack slots (possibly unaligned) ++ // dst can only be a pair of stack slots. ++ ++ ++ if (src.first()->is_stack()) { ++ // source is all stack ++ if( dst.first()->is_stack()){ ++ __ ld(AT, FP, reg2offset_in(src.first())); ++ __ sd(AT, SP, reg2offset_out(dst.first())); ++ } else { ++ __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first())); ++ } ++ ++ } else { ++ // reg to stack ++ // No worries about stack alignment ++ if( dst.first()->is_stack()){ ++ __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first())); ++ } ++ else ++ __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister()); ++ ++ } ++} ++ ++static void verify_oop_args(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ Register temp_reg = T9; // not part of any compiled calling seq ++ if (VerifyOops) { ++ for (int i = 0; i < method->size_of_parameters(); i++) { ++ if (sig_bt[i] == T_OBJECT || ++ sig_bt[i] == T_ARRAY) { ++ VMReg r = regs[i].first(); ++ assert(r->is_valid(), "bad oop arg"); ++ if (r->is_stack()) { ++ __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize)); ++ __ verify_oop(temp_reg); ++ } else { ++ __ verify_oop(r->as_Register()); ++ } ++ } ++ } ++ } ++} ++ ++static void gen_special_dispatch(MacroAssembler* masm, ++ methodHandle method, ++ const BasicType* sig_bt, ++ const VMRegPair* regs) { ++ verify_oop_args(masm, method, sig_bt, regs); ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ ++ // Now write the args into the outgoing interpreter space ++ bool has_receiver = false; ++ Register receiver_reg = noreg; ++ int member_arg_pos = -1; ++ Register member_reg = noreg; ++ int ref_kind = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid); ++ if (ref_kind != 0) { ++ member_arg_pos = method->size_of_parameters() - 1; // trailing MemberName argument ++ member_reg = S3; // known to be free at this point ++ has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind); ++ } else if (iid == vmIntrinsics::_invokeBasic) { ++ has_receiver = true; ++ } else { ++ fatal("unexpected intrinsic id %d", iid); ++ } ++ ++ if (member_reg != noreg) { ++ // Load the member_arg into register, if necessary. ++ SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs); ++ VMReg r = regs[member_arg_pos].first(); ++ if (r->is_stack()) { ++ __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ member_reg = r->as_Register(); ++ } ++ } ++ ++ if (has_receiver) { ++ // Make sure the receiver is loaded into a register. ++ assert(method->size_of_parameters() > 0, "oob"); ++ assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object"); ++ VMReg r = regs[0].first(); ++ assert(r->is_valid(), "bad receiver arg"); ++ if (r->is_stack()) { ++ // Porting note: This assumes that compiled calling conventions always ++ // pass the receiver oop in a register. If this is not true on some ++ // platform, pick a temp and load the receiver from stack. ++ fatal("receiver always in a register"); ++ receiver_reg = SSR; // known to be free at this point ++ __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size)); ++ } else { ++ // no data motion is needed ++ receiver_reg = r->as_Register(); ++ } ++ } ++ ++ // Figure out which address we are really jumping to: ++ MethodHandles::generate_method_handle_dispatch(masm, iid, ++ receiver_reg, member_reg, /*for_compiler_entry:*/ true); ++} ++ ++// --------------------------------------------------------------------------- ++// Generate a native wrapper for a given method. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// convention (handlizes oops, etc), transitions to native, makes the call, ++// returns to java state (possibly blocking), unhandlizes any result and ++// returns. ++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm, ++ const methodHandle& method, ++ int compile_id, ++ BasicType* in_sig_bt, ++ VMRegPair* in_regs, ++ BasicType ret_type, ++ address critical_entry) { ++ if (method->is_method_handle_intrinsic()) { ++ vmIntrinsics::ID iid = method->intrinsic_id(); ++ intptr_t start = (intptr_t)__ pc(); ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ gen_special_dispatch(masm, ++ method, ++ in_sig_bt, ++ in_regs); ++ int frame_complete = ((intptr_t)__ pc()) - start; // not complete, period ++ __ flush(); ++ int stack_slots = SharedRuntime::out_preserve_stack_slots(); // no out slots at all, actually ++ return nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ in_ByteSize(-1), ++ in_ByteSize(-1), ++ (OopMapSet*)NULL); ++ } ++ bool is_critical_native = true; ++ address native_func = critical_entry; ++ if (native_func == NULL) { ++ native_func = method->native_function(); ++ is_critical_native = false; ++ } ++ assert(native_func != NULL, "must have function"); ++ ++ // Native nmethod wrappers never take possesion of the oop arguments. ++ // So the caller will gc the arguments. The only thing we need an ++ // oopMap for is if the call is static ++ // ++ // An OopMap for lock (and class if static), and one for the VM call itself ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the jni function will expect them. To figure out where they go ++ // we convert the java signature to a C signature by inserting ++ // the hidden arguments as arg[0] and possibly arg[1] (static method) ++ ++ const int total_in_args = method->size_of_parameters(); ++ int total_c_args = total_in_args; ++ if (!is_critical_native) { ++ total_c_args += 1; ++ if (method->is_static()) { ++ total_c_args++; ++ } ++ } else { ++ for (int i = 0; i < total_in_args; i++) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ total_c_args++; ++ } ++ } ++ } ++ ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args); ++ BasicType* in_elem_bt = NULL; ++ ++ int argc = 0; ++ if (!is_critical_native) { ++ out_sig_bt[argc++] = T_ADDRESS; ++ if (method->is_static()) { ++ out_sig_bt[argc++] = T_OBJECT; ++ } ++ ++ for (int i = 0; i < total_in_args ; i++ ) { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ } ++ } else { ++ Thread* THREAD = Thread::current(); ++ in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args); ++ SignatureStream ss(method->signature()); ++ for (int i = 0; i < total_in_args ; i++ ) { ++ if (in_sig_bt[i] == T_ARRAY) { ++ // Arrays are passed as int, elem* pair ++ out_sig_bt[argc++] = T_INT; ++ out_sig_bt[argc++] = T_ADDRESS; ++ Symbol* atype = ss.as_symbol(CHECK_NULL); ++ const char* at = atype->as_C_string(); ++ if (strlen(at) == 2) { ++ assert(at[0] == '[', "must be"); ++ switch (at[1]) { ++ case 'B': in_elem_bt[i] = T_BYTE; break; ++ case 'C': in_elem_bt[i] = T_CHAR; break; ++ case 'D': in_elem_bt[i] = T_DOUBLE; break; ++ case 'F': in_elem_bt[i] = T_FLOAT; break; ++ case 'I': in_elem_bt[i] = T_INT; break; ++ case 'J': in_elem_bt[i] = T_LONG; break; ++ case 'S': in_elem_bt[i] = T_SHORT; break; ++ case 'Z': in_elem_bt[i] = T_BOOLEAN; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } else { ++ out_sig_bt[argc++] = in_sig_bt[i]; ++ in_elem_bt[i] = T_VOID; ++ } ++ if (in_sig_bt[i] != T_VOID) { ++ assert(in_sig_bt[i] == ss.type(), "must match"); ++ ss.next(); ++ } ++ } ++ } ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ // ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Compute framesize for the wrapper. We need to handlize all oops in ++ // registers. We must create space for them here that is disjoint from ++ // the windowed save area because we have no control over when we might ++ // flush the window again and overwrite values that gc has since modified. ++ // (The live window race) ++ // ++ // We always just allocate 6 word for storing down these object. This allow ++ // us to simply record the base and use the Ireg number to decide which ++ // slot to use. (Note that the reg number is the inbound number not the ++ // outbound number). ++ // We must shuffle args to match the native convention, and include var-args space. ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Now the space for the inbound oop handle area ++ int total_save_slots = 9 * VMRegImpl::slots_per_word; // 9 arguments passed in registers ++ if (is_critical_native) { ++ // Critical natives may have to call out so they need a save area ++ // for register arguments. ++ int double_slots = 0; ++ int single_slots = 0; ++ for ( int i = 0; i < total_in_args; i++) { ++ if (in_regs[i].first()->is_Register()) { ++ const Register reg = in_regs[i].first()->as_Register(); ++ switch (in_sig_bt[i]) { ++ case T_BOOLEAN: ++ case T_BYTE: ++ case T_SHORT: ++ case T_CHAR: ++ case T_INT: single_slots++; break; ++ case T_ARRAY: ++ case T_LONG: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ switch (in_sig_bt[i]) { ++ case T_FLOAT: single_slots++; break; ++ case T_DOUBLE: double_slots++; break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ } ++ total_save_slots = double_slots * 2 + single_slots; ++ // align the save area ++ if (double_slots != 0) { ++ stack_slots = round_to(stack_slots, 2); ++ } ++ } ++ ++ int oop_handle_offset = stack_slots; ++ stack_slots += total_save_slots; ++ ++ // Now any space we need for handlizing a klass if static method ++ ++ int klass_slot_offset = 0; ++ int klass_offset = -1; ++ int lock_slot_offset = 0; ++ bool is_static = false; ++ ++ if (method->is_static()) { ++ klass_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size; ++ is_static = true; ++ } ++ ++ // Plus a lock if needed ++ ++ if (method->is_synchronized()) { ++ lock_slot_offset = stack_slots; ++ stack_slots += VMRegImpl::slots_per_word; ++ } ++ ++ // Now a place to save return value or as a temporary for any gpr -> fpr moves ++ // + 2 for return address (which we own) and saved fp ++ stack_slots += 2 + 9 * VMRegImpl::slots_per_word; // (T0, A0, A1, A2, A3, A4, A5, A6, A7) ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | 2 slots for moves | ++ // |---------------------| ++ // | lock box (if sync) | ++ // |---------------------| <- lock_slot_offset ++ // | klass (if static) | ++ // |---------------------| <- klass_slot_offset ++ // | oopHandle area | ++ // |---------------------| <- oop_handle_offset ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | vararg area | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, StackAlignmentInSlots); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ ++ ++ // First thing make an ic check to see if we should even be here ++ address ic_miss = SharedRuntime::get_ic_miss_stub(); ++ ++ // We are free to use all registers as temps without saving them and ++ // restoring them except fp. fp is the only callee save register ++ // as far as the interpreter and the compiler(s) are concerned. ++ ++ //refer to register_mips.hpp:IC_Klass ++ const Register ic_reg = T1; ++ const Register receiver = T0; ++ ++ Label hit; ++ Label exception_pending; ++ ++ __ verify_oop(receiver); ++ //add for compressedoops ++ __ load_klass(T9, receiver); ++ __ beq(T9, ic_reg, hit); ++ __ delayed()->nop(); ++ __ jmp(ic_miss, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ bind(hit); ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check ++ if (UseStackBanging) { ++ __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size()); ++ } ++ ++ // Generate a new frame for the wrapper. ++ // do mips need this ? ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ enter(); ++ // -2 because return address is already present and so is saved fp ++ __ addiu(SP, SP, -1 * (stack_size - 2*wordSize)); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++ // Calculate the difference between sp and fp. We need to know it ++ // after the native call because on windows Java Natives will pop ++ // the arguments and it is painful to do sp relative addressing ++ // in a platform independent way. So after the call we switch to ++ // fp relative addressing. ++ //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change ++ //the SP ++ int fp_adjustment = stack_size - 2*wordSize; ++ ++#ifdef COMPILER2 ++ // C2 may leave the stack dirty if not in SSE2+ mode ++ __ empty_FPU_stack(); ++#endif ++ ++ // Compute the fp offset for any slots used after the jni call ++ ++ int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment; ++ // We use TREG as a thread pointer because it is callee save and ++ // if we load it once it is usable thru the entire wrapper ++ const Register thread = TREG; ++ ++ // We use S4 as the oop handle for the receiver/klass ++ // It is callee save so it survives the call to native ++ ++ const Register oop_handle_reg = S4; ++ if (is_critical_native) { ++ Unimplemented(); ++ // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args, ++ // oop_handle_offset, oop_maps, in_regs, in_sig_bt); ++ } ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // ++ // We immediately shuffle the arguments so that any vm call we have to ++ // make from here on out (sync slow path, jvmpi, etc.) we will have ++ // captured the oops from our caller and have a valid oopMap for ++ // them. ++ ++ // ----------------- ++ // The Grand Shuffle ++ // ++ // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv* ++ // and, if static, the class mirror instead of a receiver. This pretty much ++ // guarantees that register layout will not match (and mips doesn't use reg ++ // parms though amd does). Since the native abi doesn't use register args ++ // and the java conventions does we don't have to worry about collisions. ++ // All of our moved are reg->stack or stack->stack. ++ // We ignore the extra arguments during the shuffle and handle them at the ++ // last moment. The shuffle is described by the two calling convention ++ // vectors we have in our possession. We simply walk the java vector to ++ // get the source locations and the c vector to get the destinations. ++ ++ int c_arg = method->is_static() ? 2 : 1 ; ++ ++ // Record sp-based slot for receiver on stack for non-static methods ++ int receiver_offset = -1; ++ ++ // This is a trick. We double the stack slots so we can claim ++ // the oops in the caller's frame. Since we are sure to have ++ // more args than the caller doubling is enough to make ++ // sure we can capture all the incoming oop args from the ++ // caller. ++ // ++ OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/); ++ ++ // Mark location of fp (someday) ++ // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp)); ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ // This may iterate in two different directions depending on the ++ // kind of native it is. The reason is that for regular JNI natives ++ // the incoming and outgoing registers are offset upwards and for ++ // critical natives they are offset down. ++ GrowableArray arg_order(2 * total_in_args); ++ VMRegPair tmp_vmreg; ++ tmp_vmreg.set2(T8->as_VMReg()); ++ ++ if (!is_critical_native) { ++ for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) { ++ arg_order.push(i); ++ arg_order.push(c_arg); ++ } ++ } else { ++ // Compute a valid move order, using tmp_vmreg to break any cycles ++ Unimplemented(); ++ // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg); ++ } ++ ++ int temploc = -1; ++ for (int ai = 0; ai < arg_order.length(); ai += 2) { ++ int i = arg_order.at(ai); ++ int c_arg = arg_order.at(ai + 1); ++ __ block_comment(err_msg("move %d -> %d", i, c_arg)); ++ if (c_arg == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // This arg needs to be moved to a temporary ++ __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register()); ++ in_regs[i] = tmp_vmreg; ++ temploc = i; ++ continue; ++ } else if (i == -1) { ++ assert(is_critical_native, "should only be required for critical natives"); ++ // Read from the temporary location ++ assert(temploc != -1, "must be valid"); ++ i = temploc; ++ temploc = -1; ++ } ++#ifdef ASSERT ++ if (in_regs[i].first()->is_Register()) { ++ assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!"); ++ } else if (in_regs[i].first()->is_FloatRegister()) { ++ assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!"); ++ } ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif /* ASSERT */ ++ switch (in_sig_bt[i]) { ++ case T_ARRAY: ++ if (is_critical_native) { ++ Unimplemented(); ++ // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]); ++ c_arg++; ++#ifdef ASSERT ++ if (out_regs[c_arg].first()->is_Register()) { ++ reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true; ++ } else if (out_regs[c_arg].first()->is_FloatRegister()) { ++ freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true; ++ } ++#endif ++ break; ++ } ++ case T_OBJECT: ++ assert(!is_critical_native, "no oop arguments"); ++ object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg], ++ ((i == 0) && (!is_static)), ++ &receiver_offset); ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ float_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_DOUBLE: ++ assert( i + 1 < total_in_args && ++ in_sig_bt[i + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ double_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_LONG : ++ long_move(masm, in_regs[i], out_regs[c_arg]); ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ simple_move32(masm, in_regs[i], out_regs[c_arg]); ++ } ++ } ++ ++ // point c_arg at the first arg that is already loaded in case we ++ // need to spill before we call out ++ c_arg = total_c_args - total_in_args; ++ // Pre-load a static method's oop. Used both by locking code and ++ // the normal JNI call code. ++ ++ __ move(oop_handle_reg, A1); ++ ++ if (method->is_static() && !is_critical_native) { ++ ++ // load opp into a register ++ int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local( ++ (method->method_holder())->java_mirror())); ++ ++ ++ RelocationHolder rspec = oop_Relocation::spec(oop_index); ++ __ relocate(rspec); ++ __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror())); ++ // Now handlize the static class mirror it's known not-null. ++ __ sd( oop_handle_reg, SP, klass_offset); ++ map->set_oop(VMRegImpl::stack2reg(klass_slot_offset)); ++ ++ // Now get the handle ++ __ lea(oop_handle_reg, Address(SP, klass_offset)); ++ // store the klass handle as second argument ++ __ move(A1, oop_handle_reg); ++ // and protect the arg if we must spill ++ c_arg--; ++ } ++ ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ // We use the same pc/oopMap repeatedly when we call out ++ ++ intptr_t the_pc = (intptr_t) __ pc(); ++ oop_maps->add_gc_map(the_pc - start, map); ++ ++ __ set_last_Java_frame(SP, noreg, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)the_pc ; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ ++ // We have all of the arguments setup at this point. We must not touch any register ++ // argument registers at this point (what if we save/restore them there are no oop? ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ save_args(masm, total_c_args, c_arg, out_regs); ++ int metadata_index = __ oop_recorder()->find_index(method()); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry), ++ thread, AT); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ } ++ ++ // These are register definitions we need for locking/unlocking ++ const Register swap_reg = T8; // Must use T8 for cmpxchg instruction ++ const Register obj_reg = T9; // Will contain the oop ++ //const Register lock_reg = T6; // Address of compiler lock object (BasicLock) ++ const Register lock_reg = c_rarg0; // Address of compiler lock object (BasicLock) ++ ++ ++ ++ Label slow_path_lock; ++ Label lock_done; ++ ++ // Lock a synchronized method ++ if (method->is_synchronized()) { ++ assert(!is_critical_native, "unhandled"); ++ ++ const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes(); ++ ++ // Get the handle (the 2nd argument) ++ __ move(oop_handle_reg, A1); ++ ++ // Get address of the box ++ __ lea(lock_reg, Address(FP, lock_slot_fp_offset)); ++ ++ // Load the oop from the handle ++ __ ld(obj_reg, oop_handle_reg, 0); ++ ++ if (UseBiasedLocking) { ++ // Note that oop_handle_reg is trashed during this call ++ __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock); ++ } ++ ++ // Load immediate 1 into swap_reg %T8 ++ __ move(swap_reg, 1); ++ ++ __ ld(AT, obj_reg, 0); ++ __ orr(swap_reg, swap_reg, AT); ++ ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done); ++ // Test if the oopMark is an obvious stack pointer, i.e., ++ // 1) (mark & 3) == 0, and ++ // 2) sp <= mark < mark + os::pagesize() ++ // These 3 tests can be done by evaluating the following ++ // expression: ((mark - sp) & (3 - os::vm_page_size())), ++ // assuming both stack pointer and pagesize have their ++ // least significant 2 bits clear. ++ // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg ++ ++ __ dsubu(swap_reg, swap_reg, SP); ++ __ move(AT, 3 - os::vm_page_size()); ++ __ andr(swap_reg , swap_reg, AT); ++ // Save the test result, for recursive case, the result is zero ++ __ sd(swap_reg, lock_reg, mark_word_offset); ++ __ bne(swap_reg, R0, slow_path_lock); ++ __ delayed()->nop(); ++ // Slow path will re-enter here ++ __ bind(lock_done); ++ ++ if (UseBiasedLocking) { ++ // Re-fetch oop_handle_reg as we trashed it above ++ __ move(A1, oop_handle_reg); ++ } ++ } ++ ++ ++ // Finally just about ready to make the JNI call ++ ++ ++ // get JNIEnv* which is first argument to native ++ if (!is_critical_native) { ++ __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset())); ++ } ++ ++ // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob) ++ // Load the second arguments into A1 ++ //__ ld(A1, SP , wordSize ); // klass ++ ++ // Now set thread in native ++ __ addiu(AT, R0, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ // do the call ++ __ call(native_func, relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // WARNING - on Windows Java Natives use pascal calling convention and pop the ++ // arguments off of the stack. We could just re-adjust the stack pointer here ++ // and continue to do SP relative addressing but we instead switch to FP ++ // relative addressing. ++ ++ // Unpack native results. ++ switch (ret_type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : // nothing to do break; ++ case T_DOUBLE : ++ case T_FLOAT : ++ // Result is in st0 we'll save as needed ++ break; ++ case T_ARRAY: // Really a handle ++ case T_OBJECT: // Really a handle ++ break; // can't de-handlize until after safepoint check ++ case T_VOID: break; ++ case T_LONG: break; ++ default : ShouldNotReachHere(); ++ } ++ // Switch thread to "native transition" state before reading the synchronization state. ++ // This additional state is necessary because reading and testing the synchronization ++ // state is not atomic w.r.t. GC, as this scenario demonstrates: ++ // Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted. ++ // VM thread changes sync state to synchronizing and suspends threads for GC. ++ // Thread A is resumed to finish this native method, but doesn't block here since it ++ // didn't see any synchronization is progress, and escapes. ++ __ addiu(AT, R0, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ Label after_transition; ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { ++ Label Continue; ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // ++ save_native_result(masm, ret_type, stack_slots); ++ __ move(A0, thread); ++ __ addiu(SP, SP, -wordSize); ++ __ push(S2); ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ if (!is_critical_native) { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } else { ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ move(SP, S2); // use S2 as a sender SP holder ++ __ pop(S2); ++ __ addiu(SP, SP, wordSize); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Restore any method result value ++ restore_native_result(masm, ret_type, stack_slots); ++ ++ if (is_critical_native) { ++ // The call above performed the transition to thread_in_Java so ++ // skip the transition logic below. ++ __ beq(R0, R0, after_transition); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ addiu(AT, R0, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ bind(after_transition); ++ Label reguard; ++ Label reguard_done; ++ __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ addiu(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled); ++ __ beq(AT, R0, reguard); ++ __ delayed()->nop(); ++ // slow path reguard re-enters here ++ __ bind(reguard_done); ++ ++ // Handle possible exception (will unlock if necessary) ++ ++ // native result if any is live ++ ++ // Unlock ++ Label slow_path_unlock; ++ Label unlock_done; ++ if (method->is_synchronized()) { ++ ++ Label done; ++ ++ // Get locked oop from the handle we passed to jni ++ __ ld( obj_reg, oop_handle_reg, 0); ++ if (UseBiasedLocking) { ++ __ biased_locking_exit(obj_reg, T8, done); ++ ++ } ++ ++ // Simple recursive lock? ++ ++ __ ld(AT, FP, lock_slot_fp_offset); ++ __ beq(AT, R0, done); ++ __ delayed()->nop(); ++ // Must save FSF if if it is live now because cmpxchg must use it ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // get old displaced header ++ __ ld (T8, FP, lock_slot_fp_offset); ++ // get address of the stack lock ++ __ addiu(c_rarg0, FP, lock_slot_fp_offset); ++ // Atomic swap old header if oop still contains the stack lock ++ __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock); ++ ++ // slow path re-enters here ++ __ bind(unlock_done); ++ if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ __ bind(done); ++ ++ } ++ { ++ SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0); ++ // Tell dtrace about this method exit ++ save_native_result(masm, ret_type, stack_slots); ++ int metadata_index = __ oop_recorder()->find_index( (method())); ++ RelocationHolder rspec = metadata_Relocation::spec(metadata_index); ++ __ relocate(rspec); ++ __ patchable_set48(AT, (long)(method())); ++ ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit), ++ thread, AT); ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ ++ // We can finally stop using that last_Java_frame we setup ages ago ++ ++ __ reset_last_Java_frame(false); ++ ++ // Unpack oop result, e.g. JNIHandles::resolve value. ++ if (ret_type == T_OBJECT || ret_type == T_ARRAY) { ++ __ resolve_jobject(V0, thread, T9); ++ } ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ if (!is_critical_native) { ++ // reset handle block ++ __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes()); ++ } ++ ++ if (!is_critical_native) { ++ // Any exception pending? ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, exception_pending); ++ __ delayed()->nop(); ++ } ++ // no exception, we're almost done ++ ++ // check that only result value is on FPU stack ++ __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit"); ++ ++ // Return ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ leave(); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ // Unexpected paths are out of line and go here ++ // Slow path locking & unlocking ++ if (method->is_synchronized()) { ++ ++ // BEGIN Slow path lock ++ __ bind(slow_path_lock); ++ ++ // protect the args we've loaded ++ save_args(masm, total_c_args, c_arg, out_regs); ++ ++ // has last_Java_frame setup. No exceptions so do vanilla call not call_VM ++ // args are (oop obj, BasicLock* lock, JavaThread* thread) ++ ++ __ move(A0, obj_reg); ++ __ move(A1, lock_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, - 3*wordSize); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S2); ++ __ addiu(SP, SP, 3*wordSize); ++ ++ restore_args(masm, total_c_args, c_arg, out_regs); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit from monitorenter"); ++ __ bind(L); ++ } ++#endif ++ __ b(lock_done); ++ __ delayed()->nop(); ++ // END Slow path lock ++ ++ // BEGIN Slow path unlock ++ __ bind(slow_path_unlock); ++ ++ // Slow path unlock ++ ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ save_native_result(masm, ret_type, stack_slots); ++ } ++ // Save pending exception around call to VM (which contains an EXCEPTION_MARK) ++ ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ push(AT); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ move(S2, SP); // use S2 as a sender SP holder ++ __ andr(SP, SP, AT); // align stack as required by ABI ++ ++ // should be a peal ++ // +wordSize because of the push above ++ __ addiu(A1, FP, lock_slot_fp_offset); ++ ++ __ move(A0, obj_reg); ++ __ move(A2, thread); ++ __ addiu(SP, SP, -2*wordSize); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ addiu(SP, SP, 2*wordSize); ++ __ move(SP, S2); ++ //add for compressedoops ++ __ reinit_heapbase(); ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld( AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("no pending exception allowed on exit complete_monitor_unlocking_C"); ++ __ bind(L); ++ } ++#endif /* ASSERT */ ++ ++ __ pop(AT); ++ __ sd(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) { ++ restore_native_result(masm, ret_type, stack_slots); ++ } ++ __ b(unlock_done); ++ __ delayed()->nop(); ++ // END Slow path unlock ++ ++ } ++ ++ // SLOW PATH Reguard the stack if needed ++ ++ __ bind(reguard); ++ save_native_result(masm, ret_type, stack_slots); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ restore_native_result(masm, ret_type, stack_slots); ++ __ b(reguard_done); ++ __ delayed()->nop(); ++ ++ // BEGIN EXCEPTION PROCESSING ++ if (!is_critical_native) { ++ // Forward the exception ++ __ bind(exception_pending); ++ ++ // remove possible return value from FPU register stack ++ __ empty_FPU_stack(); ++ ++ // pop our frame ++ //forward_exception_entry need return address on stack ++ __ move(SP, FP); ++ __ pop(FP); ++ ++ // and forward the exception ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ } ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_native_nmethod(method, ++ compile_id, ++ masm->code(), ++ vep_offset, ++ frame_complete, ++ stack_slots / VMRegImpl::slots_per_word, ++ (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)), ++ in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size), ++ oop_maps); ++ ++ if (is_critical_native) { ++ nm->set_lazy_critical_native(true); ++ } ++ ++ return nm; ++ ++} ++ ++#ifdef HAVE_DTRACE_H ++// --------------------------------------------------------------------------- ++// Generate a dtrace nmethod for a given signature. The method takes arguments ++// in the Java compiled code convention, marshals them to the native ++// abi and then leaves nops at the position you would expect to call a native ++// function. When the probe is enabled the nops are replaced with a trap ++// instruction that dtrace inserts and the trace will cause a notification ++// to dtrace. ++// ++// The probes are only able to take primitive types and java/lang/String as ++// arguments. No other java types are allowed. Strings are converted to utf8 ++// strings so that from dtrace point of view java strings are converted to C ++// strings. There is an arbitrary fixed limit on the total space that a method ++// can use for converting the strings. (256 chars per string in the signature). ++// So any java string larger then this is truncated. ++ ++static int fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 }; ++static bool offsets_initialized = false; ++ ++static VMRegPair reg64_to_VMRegPair(Register r) { ++ VMRegPair ret; ++ if (wordSize == 8) { ++ ret.set2(r->as_VMReg()); ++ } else { ++ ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg()); ++ } ++ return ret; ++} ++ ++ ++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm, ++ methodHandle method) { ++ ++ ++ // generate_dtrace_nmethod is guarded by a mutex so we are sure to ++ // be single threaded in this method. ++ assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be"); ++ ++ // Fill in the signature array, for the calling-convention call. ++ int total_args_passed = method->size_of_parameters(); ++ ++ BasicType* in_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed); ++ VMRegPair *in_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed); ++ ++ // The signature we are going to use for the trap that dtrace will see ++ // java/lang/String is converted. We drop "this" and any other object ++ // is converted to NULL. (A one-slot java/lang/Long object reference ++ // is converted to a two-slot long, which is why we double the allocation). ++ BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2); ++ VMRegPair* out_regs = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2); ++ ++ int i=0; ++ int total_strings = 0; ++ int first_arg_to_pass = 0; ++ int total_c_args = 0; ++ ++ // Skip the receiver as dtrace doesn't want to see it ++ if( !method->is_static() ) { ++ in_sig_bt[i++] = T_OBJECT; ++ first_arg_to_pass = 1; ++ } ++ ++ SignatureStream ss(method->signature()); ++ for ( ; !ss.at_return_type(); ss.next()) { ++ BasicType bt = ss.type(); ++ in_sig_bt[i++] = bt; // Collect remaining bits of signature ++ out_sig_bt[total_c_args++] = bt; ++ if( bt == T_OBJECT) { ++ symbolOop s = ss.as_symbol_or_null(); ++ if (s == vmSymbols::java_lang_String()) { ++ total_strings++; ++ out_sig_bt[total_c_args-1] = T_ADDRESS; ++ } else if (s == vmSymbols::java_lang_Boolean() || ++ s == vmSymbols::java_lang_Byte()) { ++ out_sig_bt[total_c_args-1] = T_BYTE; ++ } else if (s == vmSymbols::java_lang_Character() || ++ s == vmSymbols::java_lang_Short()) { ++ out_sig_bt[total_c_args-1] = T_SHORT; ++ } else if (s == vmSymbols::java_lang_Integer() || ++ s == vmSymbols::java_lang_Float()) { ++ out_sig_bt[total_c_args-1] = T_INT; ++ } else if (s == vmSymbols::java_lang_Long() || ++ s == vmSymbols::java_lang_Double()) { ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } ++ } else if ( bt == T_LONG || bt == T_DOUBLE ) { ++ in_sig_bt[i++] = T_VOID; // Longs & doubles take 2 Java slots ++ // We convert double to long ++ out_sig_bt[total_c_args-1] = T_LONG; ++ out_sig_bt[total_c_args++] = T_VOID; ++ } else if ( bt == T_FLOAT) { ++ // We convert float to int ++ out_sig_bt[total_c_args-1] = T_INT; ++ } ++ } ++ ++ assert(i==total_args_passed, "validly parsed signature"); ++ ++ // Now get the compiled-Java layout as input arguments ++ int comp_args_on_stack; ++ comp_args_on_stack = SharedRuntime::java_calling_convention( ++ in_sig_bt, in_regs, total_args_passed, false); ++ ++ // We have received a description of where all the java arg are located ++ // on entry to the wrapper. We need to convert these args to where ++ // the a native (non-jni) function would expect them. To figure out ++ // where they go we convert the java signature to a C signature and remove ++ // T_VOID for any long/double we might have received. ++ ++ ++ // Now figure out where the args must be stored and how much stack space ++ // they require (neglecting out_preserve_stack_slots but space for storing ++ // the 1st six register arguments). It's weird see int_stk_helper. ++ ++ int out_arg_slots; ++ out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args); ++ ++ // Calculate the total number of stack slots we will need. ++ ++ // First count the abi requirement plus all of the outgoing args ++ int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots; ++ ++ // Plus a temp for possible converion of float/double/long register args ++ ++ int conversion_temp = stack_slots; ++ stack_slots += 2; ++ ++ ++ // Now space for the string(s) we must convert ++ ++ int string_locs = stack_slots; ++ stack_slots += total_strings * ++ (max_dtrace_string_size / VMRegImpl::stack_slot_size); ++ ++ // Ok The space we have allocated will look like: ++ // ++ // ++ // FP-> | | ++ // |---------------------| ++ // | string[n] | ++ // |---------------------| <- string_locs[n] ++ // | string[n-1] | ++ // |---------------------| <- string_locs[n-1] ++ // | ... | ++ // | ... | ++ // |---------------------| <- string_locs[1] ++ // | string[0] | ++ // |---------------------| <- string_locs[0] ++ // | temp | ++ // |---------------------| <- conversion_temp ++ // | outbound memory | ++ // | based arguments | ++ // | | ++ // |---------------------| ++ // | | ++ // SP-> | out_preserved_slots | ++ // ++ // ++ ++ // Now compute actual number of stack words we need rounding to make ++ // stack properly aligned. ++ stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word); ++ ++ int stack_size = stack_slots * VMRegImpl::stack_slot_size; ++ ++ intptr_t start = (intptr_t)__ pc(); ++ ++ // First thing make an ic check to see if we should even be here ++ ++ { ++ Label L; ++ const Register temp_reg = G3_scratch; ++ Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub()); ++ __ verify_oop(O0); ++ __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg); ++ __ cmp(temp_reg, G5_inline_cache_reg); ++ __ brx(Assembler::equal, true, Assembler::pt, L); ++ __ delayed()->nop(); ++ ++ __ jump_to(ic_miss, 0); ++ __ delayed()->nop(); ++ __ align(CodeEntryAlignment); ++ __ bind(L); ++ } ++ ++ int vep_offset = ((intptr_t)__ pc()) - start; ++ ++ // Make enough room for patch_verified_entry ++ __ nop(); ++ __ nop(); ++ ++ // Generate stack overflow check before creating frame ++ __ generate_stack_overflow_check(stack_size); ++ ++ // Generate a new frame for the wrapper. ++ __ save(SP, -stack_size, SP); ++ ++ // Frame is now completed as far a size and linkage. ++ ++ int frame_complete = ((intptr_t)__ pc()) - start; ++ ++#ifdef ASSERT ++ bool reg_destroyed[RegisterImpl::number_of_registers]; ++ bool freg_destroyed[FloatRegisterImpl::number_of_registers]; ++ for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) { ++ reg_destroyed[r] = false; ++ } ++ for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) { ++ freg_destroyed[f] = false; ++ } ++ ++#endif /* ASSERT */ ++ ++ VMRegPair zero; ++ const Register g0 = G0; // without this we get a compiler warning (why??) ++ zero.set2(g0->as_VMReg()); ++ ++ int c_arg, j_arg; ++ ++ Register conversion_off = noreg; ++ ++ for (j_arg = first_arg_to_pass, c_arg = 0 ; ++ j_arg < total_args_passed ; j_arg++, c_arg++ ) { ++ ++ VMRegPair src = in_regs[j_arg]; ++ VMRegPair dst = out_regs[c_arg]; ++ ++#ifdef ASSERT ++ if (src.first()->is_Register()) { ++ assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!"); ++ } else if (src.first()->is_FloatRegister()) { ++ assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)], "ack!"); ++ } ++ if (dst.first()->is_Register()) { ++ reg_destroyed[dst.first()->as_Register()->encoding()] = true; ++ } else if (dst.first()->is_FloatRegister()) { ++ freg_destroyed[dst.first()->as_FloatRegister()->encoding( ++ FloatRegisterImpl::S)] = true; ++ } ++#endif /* ASSERT */ ++ ++ switch (in_sig_bt[j_arg]) { ++ case T_ARRAY: ++ case T_OBJECT: ++ { ++ if (out_sig_bt[c_arg] == T_BYTE || out_sig_bt[c_arg] == T_SHORT || ++ out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) { ++ // need to unbox a one-slot value ++ Register in_reg = L0; ++ Register tmp = L2; ++ if ( src.first()->is_reg() ) { ++ in_reg = src.first()->as_Register(); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg); ++ } ++ // If the final destination is an acceptable register ++ if ( dst.first()->is_reg() ) { ++ if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) { ++ tmp = dst.first()->as_Register(); ++ } ++ } ++ ++ Label skipUnbox; ++ if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) { ++ __ mov(G0, tmp->successor()); ++ } ++ __ br_null(in_reg, true, Assembler::pn, skipUnbox); ++ __ delayed()->mov(G0, tmp); ++ ++ BasicType bt = out_sig_bt[c_arg]; ++ int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt); ++ switch (bt) { ++ case T_BYTE: ++ __ ldub(in_reg, box_offset, tmp); break; ++ case T_SHORT: ++ __ lduh(in_reg, box_offset, tmp); break; ++ case T_INT: ++ __ ld(in_reg, box_offset, tmp); break; ++ case T_LONG: ++ __ ld_long(in_reg, box_offset, tmp); break; ++ default: ShouldNotReachHere(); ++ } ++ ++ __ bind(skipUnbox); ++ // If tmp wasn't final destination copy to final destination ++ if (tmp == L2) { ++ VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2); ++ if (out_sig_bt[c_arg] == T_LONG) { ++ long_move(masm, tmp_as_VM, dst); ++ } else { ++ move32_64(masm, tmp_as_VM, out_regs[c_arg]); ++ } ++ } ++ if (out_sig_bt[c_arg] == T_LONG) { ++ assert(out_sig_bt[c_arg+1] == T_VOID, "must be"); ++ ++c_arg; // move over the T_VOID to keep the loop indices in sync ++ } ++ } else if (out_sig_bt[c_arg] == T_ADDRESS) { ++ Register s = ++ src.first()->is_reg() ? src.first()->as_Register() : L2; ++ Register d = ++ dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // We store the oop now so that the conversion pass can reach ++ // while in the inner frame. This will be the only store if ++ // the oop is NULL. ++ if (s != L2) { ++ // src is register ++ if (d != L2) { ++ // dst is register ++ __ mov(s, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } else { ++ // src not a register ++ assert(Assembler::is_simm13(reg2offset(src.first()) + ++ STACK_BIAS), "must be"); ++ __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d); ++ if (d == L2) { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } else if (out_sig_bt[c_arg] != T_VOID) { ++ // Convert the arg to NULL ++ if (dst.first()->is_reg()) { ++ __ mov(G0, dst.first()->as_Register()); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + ++ STACK_BIAS), "must be"); ++ __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ } ++ } ++ break; ++ case T_VOID: ++ break; ++ ++ case T_FLOAT: ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ move32_64(masm, src, dst); ++ } else { ++ if (dst.first()->is_reg()) { ++ // freg -> reg ++ int off = ++ STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ Register d = dst.first()->as_Register(); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld(SP, conversion_off , d); ++ } ++ } else { ++ // freg -> mem ++ int off = STACK_BIAS + reg2offset(dst.first()); ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, off); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ } ++ } ++ } ++ break; ++ ++ case T_DOUBLE: ++ assert( j_arg + 1 < total_args_passed && ++ in_sig_bt[j_arg + 1] == T_VOID && ++ out_sig_bt[c_arg+1] == T_VOID, "bad arg list"); ++ if (src.first()->is_stack()) { ++ // Stack to stack/reg is simple ++ long_move(masm, src, dst); ++ } else { ++ Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2; ++ ++ // Destination could be an odd reg on 32bit in which case ++ // we can't load direct to the destination. ++ ++ if (!d->is_even() && wordSize == 4) { ++ d = L2; ++ } ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, off); ++ __ ld_long(SP, off, d); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(), ++ SP, conversion_off); ++ __ ld_long(SP, conversion_off, d); ++ } ++ if (d == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } ++ break; ++ ++ case T_LONG : ++ // 32bit can't do a split move of something like g1 -> O0, O1 ++ // so use a memory temp ++ if (src.is_single_phys_reg() && wordSize == 4) { ++ Register tmp = L2; ++ if (dst.first()->is_reg() && ++ (wordSize == 8 || dst.first()->as_Register()->is_even())) { ++ tmp = dst.first()->as_Register(); ++ } ++ ++ int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size; ++ if (Assembler::is_simm13(off)) { ++ __ stx(src.first()->as_Register(), SP, off); ++ __ ld_long(SP, off, tmp); ++ } else { ++ if (conversion_off == noreg) { ++ __ set(off, L6); ++ conversion_off = L6; ++ } ++ __ stx(src.first()->as_Register(), SP, conversion_off); ++ __ ld_long(SP, conversion_off, tmp); ++ } ++ ++ if (tmp == L2) { ++ long_move(masm, reg64_to_VMRegPair(L2), dst); ++ } ++ } else { ++ long_move(masm, src, dst); ++ } ++ break; ++ ++ case T_ADDRESS: assert(false, "found T_ADDRESS in java args"); ++ ++ default: ++ move32_64(masm, src, dst); ++ } ++ } ++ ++ ++ // If we have any strings we must store any register based arg to the stack ++ // This includes any still live xmm registers too. ++ ++ if (total_strings > 0 ) { ++ ++ // protect all the arg registers ++ __ save_frame(0); ++ __ mov(G2_thread, L7_thread_cache); ++ const Register L2_string_off = L2; ++ ++ // Get first string offset ++ __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off); ++ ++ for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) { ++ if (out_sig_bt[c_arg] == T_ADDRESS) { ++ ++ VMRegPair dst = out_regs[c_arg]; ++ const Register d = dst.first()->is_reg() ? ++ dst.first()->as_Register()->after_save() : noreg; ++ ++ // It's a string the oop and it was already copied to the out arg ++ // position ++ if (d != noreg) { ++ __ mov(d, O0); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ ld_ptr(FP, reg2offset(dst.first()) + STACK_BIAS, O0); ++ } ++ Label skip; ++ ++ __ br_null(O0, false, Assembler::pn, skip); ++ __ delayed()->addu(FP, L2_string_off, O1); ++ ++ if (d != noreg) { ++ __ mov(O1, d); ++ } else { ++ assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS), ++ "must be"); ++ __ st_ptr(O1, FP, reg2offset(dst.first()) + STACK_BIAS); ++ } ++ ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf), ++ relocInfo::runtime_call_type); ++ __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off); ++ ++ __ bind(skip); ++ ++ } ++ ++ } ++ __ mov(L7_thread_cache, G2_thread); ++ __ restore(); ++ ++ } ++ ++ ++ // Ok now we are done. Need to place the nop that dtrace wants in order to ++ // patch in the trap ++ ++ int patch_offset = ((intptr_t)__ pc()) - start; ++ ++ __ nop(); ++ ++ ++ // Return ++ ++ __ ret(); ++ __ delayed()->restore(); ++ ++ __ flush(); ++ ++ nmethod *nm = nmethod::new_dtrace_nmethod( ++ method, masm->code(), vep_offset, patch_offset, frame_complete, ++ stack_slots / VMRegImpl::slots_per_word); ++ return nm; ++ ++} ++ ++#endif // HAVE_DTRACE_H ++ ++// this function returns the adjust size (in number of words) to a c2i adapter ++// activation for use during deoptimization ++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) { ++ return (callee_locals - callee_parameters) * Interpreter::stackElementWords; ++} ++ ++// "Top of Stack" slots that may be unused by the calling convention but must ++// otherwise be preserved. ++// On Intel these are not necessary and the value can be zero. ++// On Sparc this describes the words reserved for storing a register window ++// when an interrupt occurs. ++uint SharedRuntime::out_preserve_stack_slots() { ++ return 0; ++} ++ ++//------------------------------generate_deopt_blob---------------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_deopt_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ //CodeBuffer buffer ("deopt_blob", 4000, 2048); ++ CodeBuffer buffer ("deopt_blob", 8000, 2048); ++ MacroAssembler* masm = new MacroAssembler( & buffer); ++ int frame_size_in_words; ++ OopMap* map = NULL; ++ // Account for the extra args we place on the stack ++ // by the time we call fetch_unroll_info ++ const int additional_words = 2; // deopt kind, thread ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ ++ address start = __ pc(); ++ Label cont; ++ // we use S3 for DeOpt reason register ++ Register reason = S3; ++ // use S6 for thread register ++ Register thread = TREG; ++ // use S7 for fetch_unroll_info returned UnrollBlock ++ Register unroll = S7; ++ // Prolog for non exception case! ++ // Correct the return address we were given. ++ //FIXME, return address is on the tos or Ra? ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ // Save everything in sight. ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ // Normal deoptimization ++ __ move(reason, Deoptimization::Unpack_deopt); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int reexecute_offset = __ pc() - start; ++ ++ // Reexecute case ++ // return address is the pc describes what bci to do re-execute at ++ ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ __ move(reason, Deoptimization::Unpack_reexecute); ++ __ b(cont); ++ __ delayed()->nop(); ++ ++ int exception_offset = __ pc() - start; ++ // Prolog for exception case ++ ++ // all registers are dead at this entry point, except for V0 and ++ // V1 which contain the exception oop and exception pc ++ // respectively. Set them in TLS and fall thru to the ++ // unpack_with_exception_in_tls entry point. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ int exception_in_tls_offset = __ pc() - start; ++ // new implementation because exception oop is now passed in JavaThread ++ ++ // Prolog for exception case ++ // All registers must be preserved because they might be used by LinearScan ++ // Exceptiop oop and throwing PC are passed in JavaThread ++ // tos: stack at point of call to method that threw the exception (i.e. only ++ // args are on the stack, no return address) ++ ++ // Return address will be patched later with the throwing pc. The correct value is not ++ // available now because loading it from memory would destroy registers. ++ // Save everything in sight. ++ // No need to update map as each call to save_live_registers will produce identical oopmap ++ __ addiu(RA, RA, - (NativeCall::return_address_offset_long)); ++ (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words); ++ ++ // Now it is safe to overwrite any register ++ // store the correct deoptimization type ++ __ move(reason, Deoptimization::Unpack_exception); ++ // load throwing pc from JavaThread and patch it as the return address ++ // of the current frame. Then clear the field in JavaThread ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ ++ ++#ifdef ASSERT ++ // verify that there is really an exception oop in JavaThread ++ __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ verify_oop(AT); ++ // verify that there is no pending exception ++ Label no_pending_exception; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, no_pending_exception); ++ __ delayed()->nop(); ++ __ stop("must not have pending exception here"); ++ __ bind(no_pending_exception); ++#endif ++ __ bind(cont); ++ // Compiled code leaves the floating point stack dirty, empty it. ++ __ empty_FPU_stack(); ++ ++ ++ // Call C code. Need thread and this frame, but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ __ move(A0, thread); ++ __ move(A1, reason); // exec_mode ++ __ addiu(SP, SP, -additional_words * wordSize); ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ // Call fetch_unroll_info(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. Call should capture return values. ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call((address)Deoptimization::fetch_unroll_info); ++ //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ oop_maps->add_gc_map(__ pc() - start, map); ++ __ addiu(SP, SP, additional_words * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ __ move(unroll, V0); ++ ++ ++ // Move the unpack kind to a safe place in the UnrollBlock because ++ // we are very short of registers ++ ++ Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ sw(reason, unpack_kind); ++ // save the unpack_kind value ++ // Retrieve the possible live values (return values) ++ // All callee save registers representing jvm state ++ // are now in the vframeArray. ++ ++ Label noException; ++ __ move(AT, Deoptimization::Unpack_exception); ++ __ bne(AT, reason, noException);// Was exception pending? ++ __ delayed()->nop(); ++ __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset())); ++ __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset())); ++ ++ __ verify_oop(V0); ++ ++ // Overwrite the result registers with the exception results. ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize); ++ __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize); ++ ++ __ bind(noException); ++ ++ ++ // Stack is back to only having register save data on the stack. ++ // Now restore the result registers. Everything else is either dead or captured ++ // in the vframeArray. ++ ++ RegisterSaver::restore_result_registers(masm); ++ // All of the register save area has been popped of the stack. Only the ++ // return address remains. ++ // Pop all the frames we must move/replace. ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: caller of deopting frame (could be compiled/interpreted). ++ // ++ // Note: by leaving the return address of self-frame on the stack ++ // and using the size of frame 2 to adjust the stack ++ // when we are done the return to frame 3 will still be on the stack. ++ ++ // register for the sender's sp ++ Register sender_sp = Rsender; ++ // register for frame pcs ++ Register pcs = T0; ++ // register for frame sizes ++ Register sizes = T1; ++ // register for frame count ++ Register count = T3; ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ addu(SP, SP, AT); ++ // sp should be pointing at the return address to the caller (3) ++ ++ // Load array of frame pcs into pcs ++ __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ __ addiu(SP, SP, wordSize); // trash the old pc ++ // Load array of frame sizes into T6 ++ __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ ++ ++ ++ // Load count of frams into T3 ++ __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ subu(SP, SP, AT); ++ ++ // Push interpreter frames in a loop ++ // ++ //Loop: ++ // 0x000000555bd82d18: lw t2, 0x0(t1) ; lw sizes[i] <--- error lw->ld ++ // 0x000000555bd82d1c: ld at, 0x0(t0) ; ld pcs[i] ++ // 0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16 ++ // 0x000000555bd82d24: daddiu sp, sp, 0xfffffff0 ++ // 0x000000555bd82d28: sd fp, 0x0(sp) ; push fp ++ // 0x000000555bd82d2c: sd at, 0x8(sp) ; push at ++ // 0x000000555bd82d30: daddu fp, sp, zero ; fp <- sp ++ // 0x000000555bd82d34: dsubu sp, sp, t2 ; sp -= t2 ++ // 0x000000555bd82d38: sd zero, 0xfffffff0(fp) ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ // 0x000000555bd82d3c: sd s4, 0xfffffff8(fp) ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ // 0x000000555bd82d40: daddu s4, sp, zero ; move(sender_sp, SP); ++ // 0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count -- ++ // 0x000000555bd82d48: daddiu t1, t1, 0x4 ; sizes += 4 ++ // 0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18 ++ // 0x000000555bd82d50: daddiu t0, t0, 0x4 ; <--- error t0 += 8 ++ // ++ // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld_ptr(AT, pcs, 0); // save return address ++ __ addiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ subu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ addiu(count, count, -1); // decrement counter ++ __ addiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ bne(count, R0, loop); ++ __ delayed()->addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ ld(AT, pcs, 0); // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0); ++ // Re-push self-frame ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize); ++ __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize); ++ ++ // Restore frame locals after moving the frame ++ __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize); ++ __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local ++ __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize); ++ ++ ++ // Call unpack_frames(). Need thread and this frame, but NOT official VM entry - cannot block on ++ // this call, no GC can happen. ++ __ move(A1, reason); // exec_mode ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(A0, thread); // thread ++ __ addiu(SP, SP, (-additional_words) *wordSize); ++ ++ // set last_Java_sp, last_Java_fp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ ++ __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // Revert SP alignment after call since we're going to do some SP relative addressing below ++ __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0)); ++ ++ __ push(V0); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(true); ++ ++ // Collect return values ++ __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize); ++ __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize); ++ __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local ++ __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize); ++ //FIXME, ++ // Clear floating point stack before returning to interpreter ++ __ empty_FPU_stack(); ++ //FIXME, we should consider about float and double ++ // Push a float or double return value if necessary. ++ __ leave(); ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words); ++ _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset); ++} ++ ++#ifdef COMPILER2 ++ ++//------------------------------generate_uncommon_trap_blob-------------------- ++// Ought to generate an ideal graph & compile, but here's some SPARC ASM ++// instead. ++void SharedRuntime::generate_uncommon_trap_blob() { ++ // allocate space for the code ++ ResourceMark rm; ++ // setup code generation tools ++ CodeBuffer buffer ("uncommon_trap_blob", 512*80 , 512*40 ); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ enum frame_layout { ++ fp_off, fp_off2, ++ return_off, return_off2, ++ framesize ++ }; ++ assert(framesize % 4 == 0, "sp not 16-byte aligned"); ++ ++ address start = __ pc(); ++ ++ // Push self-frame. ++ __ daddiu(SP, SP, -framesize * BytesPerInt); ++ ++ __ sd(RA, SP, return_off * BytesPerInt); ++ __ sd(FP, SP, fp_off * BytesPerInt); ++ ++ __ daddiu(FP, SP, fp_off * BytesPerInt); ++ ++ // Clear the floating point exception stack ++ __ empty_FPU_stack(); ++ ++ Register thread = TREG; ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // set last_Java_sp ++ __ set_last_Java_frame(NOREG, FP, NULL); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ long save_pc = (long)__ pc() + 56; ++ __ patchable_set48(AT, (long)save_pc); ++ __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset())); ++ } ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // capture callee-saved registers as well as return values. ++ __ move(A0, thread); ++ // argument already in T0 ++ __ move(A1, T0); ++ __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::uncommon_trap); ++ ++ // Set an oopmap for the call site ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = new OopMap( framesize, 0 ); ++ ++ //oop_maps->add_gc_map( __ offset(), true, map); ++ oop_maps->add_gc_map( __ offset(), map); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ reset_last_Java_frame(false); ++ ++ // Load UnrollBlock into S7 ++ Register unroll = S7; ++ __ move(unroll, V0); ++ ++#ifdef ASSERT ++ { Label L; ++ __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()); ++ __ li(T9, Deoptimization::Unpack_uncommon_trap); ++ __ beq(AT, T9, L); ++ __ delayed()->nop(); ++ __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap"); ++ __ bind(L); ++ } ++#endif ++ ++ // Pop all the frames we must move/replace. ++ // ++ // Frame picture (youngest to oldest) ++ // 1: self-frame (no frame link) ++ // 2: deopting frame (no frame link) ++ // 3: possible-i2c-adapter-frame ++ // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an ++ // and c2i here) ++ ++ __ daddiu(SP, SP, framesize * BytesPerInt); ++ ++ // Pop deoptimized frame ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()); ++ __ daddu(SP, SP, AT); ++ ++ // register for frame pcs ++ Register pcs = T8; ++ // register for frame sizes ++ Register sizes = T9; ++ // register for frame count ++ Register count = T3; ++ // register for the sender's sp ++ Register sender_sp = T1; ++ ++ // sp should be pointing at the return address to the caller (4) ++ // Load array of frame pcs ++ __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()); ++ ++ // Load array of frame sizes ++ __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()); ++ __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()); ++ ++ // Pick up the initial fp we should save ++ __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()); ++ // Now adjust the caller's stack to make up for the extra locals ++ // but record the original sp so that we can save it in the skeletal interpreter ++ // frame and the stack walking of interpreter_sender will get the unextended sp ++ // value and not the "real" sp value. ++ ++ __ move(sender_sp, SP); ++ __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes()); ++ __ dsubu(SP, SP, AT); ++ // Push interpreter frames in a loop ++ Label loop; ++ __ bind(loop); ++ __ ld(T2, sizes, 0); // Load frame size ++ __ ld(AT, pcs, 0); // save return address ++ __ daddiu(T2, T2, -2*wordSize); // we'll push pc and fp, by hand ++ __ push2(AT, FP); ++ __ move(FP, SP); ++ __ dsubu(SP, SP, T2); // Prolog! ++ // This value is corrected by layout_activation_impl ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable ++ __ move(sender_sp, SP); // pass to next frame ++ __ daddiu(count, count, -1); // decrement counter ++ __ daddiu(sizes, sizes, wordSize); // Bump array pointer (sizes) ++ __ addiu(pcs, pcs, wordSize); // Bump array pointer (pcs) ++ __ bne(count, R0, loop); ++ __ delayed()->nop(); // Bump array pointer (pcs) ++ ++ __ ld(RA, pcs, 0); ++ ++ // Re-push self-frame ++ // save old & set new FP ++ // save final return address ++ __ enter(); ++ ++ // Use FP because the frames look interpreted now ++ // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP. ++ // Don't need the precise return PC here, just precise enough to point into this code blob. ++ address the_pc = __ pc(); ++ __ set_last_Java_frame(NOREG, FP, the_pc); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); // Fix stack alignment as required by ABI ++ ++ // Call C code. Need thread but NOT official VM entry ++ // crud. We cannot block on this call, no GC can happen. Call should ++ // restore return values to their stack-slots with the new SP. ++ __ move(A0, thread); ++ __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap); ++ __ patchable_call((address)Deoptimization::unpack_frames); ++ // Set an oopmap for the call site ++ oop_maps->add_gc_map( __ offset(), new OopMap( framesize, 0 ) ); ++ ++ __ reset_last_Java_frame(true); ++ ++ // Pop self-frame. ++ __ leave(); // Epilog! ++ ++ // Jump to interpreter ++ __ jr(RA); ++ __ delayed()->nop(); ++ // ------------- ++ // make sure all code is generated ++ masm->flush(); ++ ++ _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2); ++} ++ ++#endif // COMPILER2 ++ ++//------------------------------generate_handler_blob------------------- ++// ++// Generate a special Compile2Runtime blob that saves all registers, and sets ++// up an OopMap and calls safepoint code to stop the compiled code for ++// a safepoint. ++// ++// This blob is jumped to (via a breakpoint and the signal handler) from a ++// safepoint in compiled code. ++ ++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) { ++ ++ // Account for thread arg in our frame ++ const int additional_words = 0; ++ int frame_size_in_words; ++ ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ ResourceMark rm; ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map; ++ ++ // allocate space for the code ++ // setup code generation tools ++ CodeBuffer buffer ("handler_blob", 2048, 512); ++ MacroAssembler* masm = new MacroAssembler( &buffer); ++ ++ const Register thread = TREG; ++ address start = __ pc(); ++ address call_pc = NULL; ++ bool cause_return = (pool_type == POLL_AT_RETURN); ++ bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ // The following is basically a call_VM. However, we need the precise ++ // address of the call in order to generate an oopmap. Hence, we do all the ++ // work outselvs. ++ ++ __ set_last_Java_frame(NOREG, NOREG, NULL); ++ ++ if (!cause_return) { ++ // overwrite the return address pushed by save_live_registers ++ // Additionally, TSR is a callee-saved register so we can look at ++ // it later to determine if someone changed the return address for ++ // us! ++ __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ // Do the call ++ __ move(A0, thread); ++ __ call(call_ptr); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. This oopmap will map all ++ // oop-registers and debug-info registers as callee-saved. This ++ // will allow deoptimization at this safepoint to find all possible ++ // debug-info recordings, as well as let GC find all oops. ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ Label noException; ++ ++ // Clear last_Java_sp again ++ __ reset_last_Java_frame(false); ++ ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, noException); ++ __ delayed()->nop(); ++ ++ // Exception pending ++ ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++ __ patchable_jump((address)StubRoutines::forward_exception_entry()); ++ ++ // No exception case ++ __ bind(noException); ++ ++ Label no_adjust, bail; ++ if (SafepointMechanism::uses_thread_local_poll() && !cause_return) { ++ // If our stashed return pc was modified by the runtime we avoid touching it ++ __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize); ++ __ bne(AT, TSR, no_adjust); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ // Verify the correct encoding of the poll we're about to skip. ++ // See NativeInstruction::is_safepoint_poll() ++ __ lwu(AT, TSR, 0); ++ __ dsrl(AT, AT, 16); ++ __ andi(AT, AT, 0xfc1f); ++ __ xori(AT, AT, 0x8c01); ++ __ bne(AT, R0, bail); ++ __ delayed()->nop(); ++#endif ++ // Adjust return pc forward to step over the safepoint poll instruction ++ __ addiu(RA, TSR, 4); // NativeInstruction::instruction_size=4 ++ __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize); ++ } ++ ++ __ bind(no_adjust); ++ // Normal exit, register restoring and exit ++ RegisterSaver::restore_live_registers(masm, save_vectors); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifdef ASSERT ++ __ bind(bail); ++ __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected"); ++#endif ++ ++ // Make sure all code is generated ++ masm->flush(); ++ ++ // Fill-out other meta info ++ return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words); ++} ++ ++// ++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss ++// ++// Generate a stub that calls into vm to find out the proper destination ++// of a java call. All the argument registers are live at this point ++// but since this is generic code we don't know what they are and the caller ++// must do any gc of the args. ++// ++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) { ++ assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before"); ++ ++ // allocate space for the code ++ ResourceMark rm; ++ ++ //CodeBuffer buffer(name, 1000, 512); ++ CodeBuffer buffer(name, 2000, 2048); ++ MacroAssembler* masm = new MacroAssembler(&buffer); ++ ++ int frame_size_words; ++ //we put the thread in A0 ++ ++ OopMapSet *oop_maps = new OopMapSet(); ++ OopMap* map = NULL; ++ ++ int start = __ offset(); ++ map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words); ++ ++ ++ int frame_complete = __ offset(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ __ get_thread(thread); ++#else ++ const Register thread = TREG; ++#endif ++ ++ __ move(A0, thread); ++ __ set_last_Java_frame(noreg, FP, NULL); ++ //align the stack before invoke native ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ __ call(destination); ++ __ delayed()->nop(); ++ ++ // Set an oopmap for the call site. ++ // We need this not only for callee-saved registers, but also for volatile ++ // registers that the compiler might be keeping live across a safepoint. ++ oop_maps->add_gc_map( __ offset() - start, map); ++ // V0 contains the address we are going to jump to assuming no exception got installed ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ // clear last_Java_sp ++ __ reset_last_Java_frame(true); ++ // check for pending exceptions ++ Label pending; ++ __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, pending); ++ __ delayed()->nop(); ++ // get the returned Method* ++ //FIXME, do mips need this ? ++ __ get_vm_result_2(Rmethod, thread); // Refer to OpenJDK8 ++ __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize); ++ __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize); ++ RegisterSaver::restore_live_registers(masm); ++ ++ // We are back the the original state on entry and ready to go the callee method. ++ __ jr(V0); ++ __ delayed()->nop(); ++ // Pending exception after the safepoint ++ ++ __ bind(pending); ++ ++ RegisterSaver::restore_live_registers(masm); ++ ++ // exception pending => remove activation and forward to exception handler ++ //forward_exception_entry need return address on the stack ++ __ push(RA); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset())); ++ __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ // ++ // make sure all code is generated ++ masm->flush(); ++ ++ RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true); ++ return tmp; ++} ++ ++extern "C" int SpinPause() {return 0;} ++ ++ ++//------------------------------Montgomery multiplication------------------------ ++// ++ ++// Subtract 0:b from carry:a. Return carry. ++static unsigned long ++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) { ++ long borrow = 0, t = 0; ++ unsigned long tmp0, tmp1; ++ __asm__ __volatile__ ( ++ "0: \n" ++ "ld %[tmp0], 0(%[a]) \n" ++ "ld %[tmp1], 0(%[b]) \n" ++ "sltu %[t], %[tmp0], %[borrow] \n" ++ "dsubu %[tmp0], %[tmp0], %[borrow] \n" ++ "sltu %[borrow], %[tmp0], %[tmp1] \n" ++ "or %[borrow], %[borrow], %[t] \n" ++ "dsubu %[tmp0], %[tmp0], %[tmp1] \n" ++ "sd %[tmp0], 0(%[a]) \n" ++ "daddiu %[a], %[a], 8 \n" ++ "daddiu %[b], %[b], 8 \n" ++ "daddiu %[len], %[len], -1 \n" ++ "bgtz %[len], 0b \n" ++ "dsubu %[tmp0], %[carry], %[borrow] \n" ++ : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t) ++ : [carry]"r"(carry) ++ : "memory" ++ ); ++ return tmp0; ++} ++ ++// Multiply (unsigned) Long A by Long B, accumulating the double- ++// length result into the accumulator formed of t0, t1, and t2. ++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// As above, but add twice the double-length result into the ++// accumulator. ++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) { ++ unsigned long hi, lo, carry = 0, t = 0; ++ __asm__ __volatile__( ++ "dmultu %[A], %[B] \n" ++ "mfhi %[hi] \n" ++ "mflo %[lo] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ "daddu %[t0], %[t0], %[lo] \n" ++ "sltu %[carry], %[t0], %[lo] \n" ++ "daddu %[t1], %[t1], %[carry] \n" ++ "sltu %[t], %[t1], %[carry] \n" ++ "daddu %[t1], %[t1], %[hi] \n" ++ "sltu %[carry], %[t1], %[hi] \n" ++ "or %[carry], %[carry], %[t] \n" ++ "daddu %[t2], %[t2], %[carry] \n" ++ : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t) ++ : [A]"r"(A), [B]"r"(B) ++ : ++ ); ++} ++ ++// Fast Montgomery multiplication. The derivation of the algorithm is ++// in A Cryptographic Library for the Motorola DSP56000, ++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237. ++ ++static void __attribute__((noinline)) ++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ for (j = 0; j < i; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ MACC(a[i], b[0], t0, t1, t2); ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery multiply"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int j; ++ for (j = i-len+1; j < len; j++) { ++ MACC(a[j], b[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Fast Montgomery squaring. This uses asymptotically 25% fewer ++// multiplies so it should be up to 25% faster than Montgomery ++// multiplication. However, its loop control is more complex and it ++// may actually run slower on some machines. ++ ++static void __attribute__((noinline)) ++montgomery_square(unsigned long a[], unsigned long n[], ++ unsigned long m[], unsigned long inv, int len) { ++ unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator ++ int i; ++ ++ assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply"); ++ ++ for (i = 0; i < len; i++) { ++ int j; ++ int end = (i+1)/2; ++ for (j = 0; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < i; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i] = t0 * inv; ++ MACC(m[i], n[0], t0, t1, t2); ++ ++ assert(t0 == 0, "broken Montgomery square"); ++ ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ for (i = len; i < 2*len; i++) { ++ int start = i-len+1; ++ int end = start + (len - start)/2; ++ int j; ++ for (j = start; j < end; j++) { ++ MACC2(a[j], a[i-j], t0, t1, t2); ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ if ((i & 1) == 0) { ++ MACC(a[j], a[j], t0, t1, t2); ++ } ++ for (; j < len; j++) { ++ MACC(m[j], n[i-j], t0, t1, t2); ++ } ++ m[i-len] = t0; ++ t0 = t1; t1 = t2; t2 = 0; ++ } ++ ++ while (t0) ++ t0 = sub(m, n, t0, len); ++} ++ ++// Swap words in a longword. ++static unsigned long swap(unsigned long x) { ++ return (x << 32) | (x >> 32); ++} ++ ++// Copy len longwords from s to d, word-swapping as we go. The ++// destination array is reversed. ++static void reverse_words(unsigned long *s, unsigned long *d, int len) { ++ d += len; ++ while(len-- > 0) { ++ d--; ++ *d = swap(*s); ++ s++; ++ } ++} ++ ++// The threshold at which squaring is advantageous was determined ++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz. ++// Doesn't seem to be relevant for MIPS64 so we use the same value. ++#define MONTGOMERY_SQUARING_THRESHOLD 64 ++ ++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_multiply must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 8k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 4; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *b = scratch + 1 * longwords, ++ *n = scratch + 2 * longwords, ++ *m = scratch + 3 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)b_ints, b, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords); ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} ++ ++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints, ++ jint len, jlong inv, ++ jint *m_ints) { ++ assert(len % 2 == 0, "array length in montgomery_square must be even"); ++ int longwords = len/2; ++ ++ // Make very sure we don't use so much space that the stack might ++ // overflow. 512 jints corresponds to an 16384-bit integer and ++ // will use here a total of 6k bytes of stack space. ++ int total_allocation = longwords * sizeof (unsigned long) * 3; ++ guarantee(total_allocation <= 8192, "must be"); ++ unsigned long *scratch = (unsigned long *)alloca(total_allocation); ++ ++ // Local scratch arrays ++ unsigned long ++ *a = scratch + 0 * longwords, ++ *n = scratch + 1 * longwords, ++ *m = scratch + 2 * longwords; ++ ++ reverse_words((unsigned long *)a_ints, a, longwords); ++ reverse_words((unsigned long *)n_ints, n, longwords); ++ ++ if (len >= MONTGOMERY_SQUARING_THRESHOLD) { ++ ::montgomery_square(a, n, m, (unsigned long)inv, longwords); ++ } else { ++ ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords); ++ } ++ ++ reverse_words(m, (unsigned long *)m_ints, longwords); ++} +diff --git a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp +new file mode 100644 +index 0000000000..9fe2bc8377 +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp +@@ -0,0 +1,2162 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "gc/shared/barrierSet.hpp" ++#include "gc/shared/barrierSetAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "nativeInst_mips.hpp" ++#include "oops/instanceOop.hpp" ++#include "oops/method.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/handles.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++// Declaration and definition of StubGenerator (no .hpp file). ++// For a more detailed description of the stub routine structure ++// see the comment in stubRoutines.hpp ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8) ++//#define a__ ((Assembler*)_masm)-> ++ ++//#ifdef PRODUCT ++//#define BLOCK_COMMENT(str) /* nothing */ ++//#else ++//#define BLOCK_COMMENT(str) __ block_comment(str) ++//#endif ++ ++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":") ++const int MXCSR_MASK = 0xFFC0; // Mask out any pending exceptions ++ ++// Stub Code definitions ++ ++class StubGenerator: public StubCodeGenerator { ++ private: ++ ++ // ABI mips n64 ++ // This fig is not MIPS ABI. It is call Java from C ABI. ++ // Call stubs are used to call Java from C ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp ++ // 3 [ result ] <--- a1 ++ // 4 [ result_type ] <--- a2 ++ // 5 [ method ] <--- a3 ++ // 6 [ entry_point ] <--- a4 ++ // 7 [ parameters ] <--- a5 ++ // 8 [ parameter_size ] <--- a6 ++ // 9 [ thread ] <--- a7 ++ ++ // ++ // n64 does not save paras in sp. ++ // ++ // [ return_from_Java ] ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ // ... ++ //-13 [ thread ] ++ //-12 [ result_type ] <--- a2 ++ //-11 [ result ] <--- a1 ++ //-10 [ ] ++ // -9 [ ptr. to call wrapper ] <--- a0 ++ // -8 [ S6 ] ++ // -7 [ S5 ] ++ // -6 [ S4 ] ++ // -5 [ S3 ] ++ // -4 [ S1 ] ++ // -3 [ TSR(S2) ] ++ // -2 [ LVP(S7) ] ++ // -1 [ BCP(S1) ] ++ // 0 [ saved fp ] <--- fp_after_call ++ // 1 [ return address ] ++ // 2 [ ] <--- old sp ++ // ++ // Find a right place in the call_stub for GP. ++ // GP will point to the starting point of Interpreter::dispatch_table(itos). ++ // It should be saved/restored before/after Java calls. ++ // ++ enum call_stub_layout { ++ RA_off = 1, ++ FP_off = 0, ++ BCP_off = -1, ++ LVP_off = -2, ++ TSR_off = -3, ++ S1_off = -4, ++ S3_off = -5, ++ S4_off = -6, ++ S5_off = -7, ++ S6_off = -8, ++ call_wrapper_off = -9, ++ result_off = -11, ++ result_type_off = -12, ++ thread_off = -13, ++ total_off = thread_off - 1, ++ GP_off = -14, ++ }; ++ ++ address generate_call_stub(address& return_address) { ++ ++ StubCodeMark mark(this, "StubRoutines", "call_stub"); ++ address start = __ pc(); ++ ++ // same as in generate_catch_exception()! ++ ++ // stub code ++ // save ra and fp ++ __ enter(); ++ // I think 14 is the max gap between argument and callee saved register ++ assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code"); ++ __ daddiu(SP, SP, total_off * wordSize); ++ __ sd(BCP, FP, BCP_off * wordSize); ++ __ sd(LVP, FP, LVP_off * wordSize); ++ __ sd(TSR, FP, TSR_off * wordSize); ++ __ sd(S1, FP, S1_off * wordSize); ++ __ sd(S3, FP, S3_off * wordSize); ++ __ sd(S4, FP, S4_off * wordSize); ++ __ sd(S5, FP, S5_off * wordSize); ++ __ sd(S6, FP, S6_off * wordSize); ++ __ sd(A0, FP, call_wrapper_off * wordSize); ++ __ sd(A1, FP, result_off * wordSize); ++ __ sd(A2, FP, result_type_off * wordSize); ++ __ sd(A7, FP, thread_off * wordSize); ++ __ sd(GP, FP, GP_off * wordSize); ++ ++ __ set64(GP, (long)Interpreter::dispatch_table(itos)); ++ ++#ifdef OPT_THREAD ++ __ move(TREG, A7); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ ++#ifdef ASSERT ++ // make sure we have no pending exceptions ++ { ++ Label L; ++ __ ld(AT, A7, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ /* FIXME: I do not know how to realize stop in mips arch, do it in the future */ ++ __ stop("StubRoutines::call_stub: entered with pending exception"); ++ __ bind(L); ++ } ++#endif ++ ++ // pass parameters if any ++ // A5: parameter ++ // A6: parameter_size ++ // T0: parameter_size_tmp(--) ++ // T2: offset(++) ++ // T3: tmp ++ Label parameters_done; ++ // judge if the parameter_size equals 0 ++ __ beq(A6, R0, parameters_done); ++ __ delayed()->nop(); ++ __ dsll(AT, A6, Interpreter::logStackElementSize); ++ __ dsubu(SP, SP, AT); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP , AT); ++ // Copy Java parameters in reverse order (receiver last) ++ // Note that the argument order is inverted in the process ++ Label loop; ++ __ move(T0, A6); ++ __ move(T2, R0); ++ __ bind(loop); ++ ++ // get parameter ++ __ dsll(T3, T0, LogBytesPerWord); ++ __ daddu(T3, T3, A5); ++ __ ld(AT, T3, -wordSize); ++ __ dsll(T3, T2, LogBytesPerWord); ++ __ daddu(T3, T3, SP); ++ __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0)); ++ __ daddiu(T2, T2, 1); ++ __ daddiu(T0, T0, -1); ++ __ bne(T0, R0, loop); ++ __ delayed()->nop(); ++ // advance to next parameter ++ ++ // call Java function ++ __ bind(parameters_done); ++ ++ // receiver in V0, methodOop in Rmethod ++ ++ __ move(Rmethod, A3); ++ __ move(Rsender, SP); //set sender sp ++ __ jalr(A4); ++ __ delayed()->nop(); ++ return_address = __ pc(); ++ ++ Label common_return; ++ __ bind(common_return); ++ ++ // store result depending on type ++ // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT) ++ __ ld(T0, FP, result_off * wordSize); // result --> T0 ++ Label is_long, is_float, is_double, exit; ++ __ ld(T2, FP, result_type_off * wordSize); // result_type --> T2 ++ __ daddiu(T3, T2, (-1) * T_LONG); ++ __ beq(T3, R0, is_long); ++ __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT); ++ __ beq(T3, R0, is_float); ++ __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE); ++ __ beq(T3, R0, is_double); ++ __ delayed()->nop(); ++ ++ // handle T_INT case ++ __ sd(V0, T0, 0 * wordSize); ++ __ bind(exit); ++ ++ // restore ++ __ ld(BCP, FP, BCP_off * wordSize); ++ __ ld(LVP, FP, LVP_off * wordSize); ++ __ ld(GP, FP, GP_off * wordSize); ++ __ ld(TSR, FP, TSR_off * wordSize); ++ ++ __ ld(S1, FP, S1_off * wordSize); ++ __ ld(S3, FP, S3_off * wordSize); ++ __ ld(S4, FP, S4_off * wordSize); ++ __ ld(S5, FP, S5_off * wordSize); ++ __ ld(S6, FP, S6_off * wordSize); ++ ++ __ leave(); ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ // handle return types different from T_INT ++ __ bind(is_long); ++ __ sd(V0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_float); ++ __ swc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ ++ __ bind(is_double); ++ __ sdc1(F0, T0, 0 * wordSize); ++ __ b(exit); ++ __ delayed()->nop(); ++ //FIXME, 1.6 mips version add operation of fpu here ++ StubRoutines::gs2::set_call_stub_compiled_return(__ pc()); ++ __ b(common_return); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Return point for a Java call if there's an exception thrown in ++ // Java code. The exception is caught and transformed into a ++ // pending exception stored in JavaThread that can be tested from ++ // within the VM. ++ // ++ // Note: Usually the parameters are removed by the callee. In case ++ // of an exception crossing an activation frame boundary, that is ++ // not the case if the callee is compiled code => need to setup the ++ // sp. ++ // ++ // V0: exception oop ++ ++ address generate_catch_exception() { ++ StubCodeMark mark(this, "StubRoutines", "catch_exception"); ++ address start = __ pc(); ++ ++ Register thread = TREG; ++ ++ // get thread directly ++#ifndef OPT_THREAD ++ __ ld(thread, FP, thread_off * wordSize); ++#endif ++ ++#ifdef ASSERT ++ // verify that threads correspond ++ { Label L; ++ __ get_thread(T8); ++ __ beq(T8, thread, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::catch_exception: threads must correspond"); ++ __ bind(L); ++ } ++#endif ++ // set pending exception ++ __ verify_oop(V0); ++ __ sd(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ li(AT, (long)__FILE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_file_offset ())); ++ __ li(AT, (long)__LINE__); ++ __ sd(AT, thread, in_bytes(Thread::exception_line_offset ())); ++ ++ // complete return to VM ++ assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before"); ++ __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Continuation point for runtime calls returning with a pending ++ // exception. The pending exception check happened in the runtime ++ // or native call stub. The pending exception in Thread is ++ // converted into a Java-level exception. ++ // ++ // Contract with Java-level exception handlers: ++ // V0: exception ++ // V1: throwing pc ++ // ++ // NOTE: At entry of this stub, exception-pc must be on stack !! ++ ++ address generate_forward_exception() { ++ StubCodeMark mark(this, "StubRoutines", "forward exception"); ++ //Register thread = TREG; ++ Register thread = TREG; ++ address start = __ pc(); ++ ++ // Upon entry, the sp points to the return address returning into ++ // Java (interpreted or compiled) code; i.e., the return address ++ // throwing pc. ++ // ++ // Arguments pushed before the runtime call are still on the stack ++ // but the exception handler will reset the stack pointer -> ++ // ignore them. A potential result in registers can be ignored as ++ // well. ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++#ifdef ASSERT ++ // make sure this code is only executed if there is a pending exception ++ { ++ Label L; ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (1)"); ++ __ bind(L); ++ } ++#endif ++ ++ // compute exception handler into T9 ++ __ ld(A1, SP, 0); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); ++ __ pop(V1); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(V0, thread, in_bytes(Thread::pending_exception_offset())); ++ __ sd(R0, thread, in_bytes(Thread::pending_exception_offset())); ++ ++#ifdef ASSERT ++ // make sure exception is set ++ { ++ Label L; ++ __ bne(V0, R0, L); ++ __ delayed()->nop(); ++ __ stop("StubRoutines::forward exception: no pending exception (2)"); ++ __ bind(L); ++ } ++#endif ++ ++ // continue at exception handler (return address removed) ++ // V0: exception ++ // T9: exception handler ++ // V1: throwing pc ++ __ verify_oop(V0); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Non-destructive plausibility checks for oops ++ // ++ address generate_verify_oop() { ++ StubCodeMark mark(this, "StubRoutines", "verify_oop"); ++ address start = __ pc(); ++ __ reinit_heapbase(); ++ __ verify_oop_subroutine(); ++ address end = __ pc(); ++ return start; ++ } ++ ++ // ++ // Generate overlap test for array copy stubs ++ // ++ // Input: ++ // A0 - array1 ++ // A1 - array2 ++ // A2 - element count ++ // ++ ++ // use T9 as temp ++ void array_overlap_test(address no_overlap_target, int log2_elem_size) { ++ int elem_size = 1 << log2_elem_size; ++ Address::ScaleFactor sf = Address::times_1; ++ ++ switch (log2_elem_size) { ++ case 0: sf = Address::times_1; break; ++ case 1: sf = Address::times_2; break; ++ case 2: sf = Address::times_4; break; ++ case 3: sf = Address::times_8; break; ++ } ++ ++ __ dsll(AT, A2, sf); ++ __ daddu(AT, AT, A0); ++ __ daddiu(T9, AT, -elem_size); ++ __ dsubu(AT, A1, A0); ++ __ blez(AT, no_overlap_target); ++ __ delayed()->nop(); ++ __ dsubu(AT, A1, T9); ++ __ bgtz(AT, no_overlap_target); ++ __ delayed()->nop(); ++ ++ // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target ++ Label L; ++ __ bgez(A0, L); ++ __ delayed()->nop(); ++ __ bgtz(A1, no_overlap_target); ++ __ delayed()->nop(); ++ __ bind(L); ++ ++ } ++ ++ // ++ // Generate stub for array fill. If "aligned" is true, the ++ // "to" address is assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // to: c_rarg0 ++ // value: c_rarg1 ++ // count: c_rarg2 treated as signed ++ // ++ address generate_fill(BasicType t, bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ const Register to = A0; // source array address ++ const Register value = A1; // value ++ const Register count = A2; // elements count ++ ++ const Register cnt_words = T8; // temp register ++ ++ __ enter(); ++ ++ Label L_fill_elements, L_exit1; ++ ++ int shift = -1; ++ switch (t) { ++ case T_BYTE: ++ shift = 0; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 8, 8); // 8 bit -> 16 bit ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_SHORT: ++ shift = 1; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ dins(value, value, 16, 16); // 16 bit -> 32 bit ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ case T_INT: ++ shift = 2; ++ __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element ++ __ bne(AT, R0, L_fill_elements); ++ __ delayed()->nop(); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ ++ // Align source address at 8 bytes address boundary. ++ Label L_skip_align1, L_skip_align2, L_skip_align4; ++ if (!aligned) { ++ switch (t) { ++ case T_BYTE: ++ // One byte misalignment happens only for byte arrays. ++ __ andi(AT, to, 1); ++ __ beq(AT, R0, L_skip_align1); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ addiu32(count, count, -1); ++ __ bind(L_skip_align1); ++ // Fallthrough ++ case T_SHORT: ++ // Two bytes misalignment happens only for byte and short (char) arrays. ++ __ andi(AT, to, 1 << 1); ++ __ beq(AT, R0, L_skip_align2); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ addiu32(count, count, -(2 >> shift)); ++ __ bind(L_skip_align2); ++ // Fallthrough ++ case T_INT: ++ // Align to 8 bytes, we know we are 4 byte aligned to start. ++ __ andi(AT, to, 1 << 2); ++ __ beq(AT, R0, L_skip_align4); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ __ daddiu(to, to, 4); ++ __ addiu32(count, count, -(4 >> shift)); ++ __ bind(L_skip_align4); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ } ++ ++ // ++ // Fill large chunks ++ // ++ __ srl(cnt_words, count, 3 - shift); // number of words ++ __ dinsu(value, value, 32, 32); // 32 bit -> 64 bit ++ __ sll(AT, cnt_words, 3 - shift); ++ __ subu32(count, count, AT); ++ ++ Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end; ++ __ addiu32(AT, cnt_words, -8); ++ __ bltz(AT, L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_begin); ++ __ sd(value, to, 0); ++ __ sd(value, to, 8); ++ __ sd(value, to, 16); ++ __ sd(value, to, 24); ++ __ sd(value, to, 32); ++ __ sd(value, to, 40); ++ __ sd(value, to, 48); ++ __ sd(value, to, 56); ++ __ daddiu(to, to, 64); ++ __ addiu32(cnt_words, cnt_words, -8); ++ __ addiu32(AT, cnt_words, -8); ++ __ bgez(AT, L_loop_begin); ++ __ delayed()->nop(); ++ ++ __ bind(L_loop_not_64bytes_fill); ++ __ beq(cnt_words, R0, L_loop_end); ++ __ delayed()->nop(); ++ __ sd(value, to, 0); ++ __ daddiu(to, to, 8); ++ __ addiu32(cnt_words, cnt_words, -1); ++ __ b(L_loop_not_64bytes_fill); ++ __ delayed()->nop(); ++ __ bind(L_loop_end); ++ ++ // Remaining count is less than 8 bytes. Fill it by a single store. ++ // Note that the total length is no less than 8 bytes. ++ if (t == T_BYTE || t == T_SHORT) { ++ Label L_exit1; ++ __ beq(count, R0, L_exit1); ++ __ delayed()->nop(); ++ __ sll(AT, count, shift); ++ __ daddu(to, to, AT); // points to the end ++ __ sd(value, to, -8); // overwrite some elements ++ __ bind(L_exit1); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ // Handle copies less than 8 bytes. ++ Label L_fill_2, L_fill_4, L_exit2; ++ __ bind(L_fill_elements); ++ switch (t) { ++ case T_BYTE: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_2); ++ __ delayed()->nop(); ++ __ sb(value, to, 0); ++ __ daddiu(to, to, 1); ++ __ bind(L_fill_2); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 2); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_SHORT: ++ __ andi(AT, count, 1); ++ __ beq(AT, R0, L_fill_4); ++ __ delayed()->nop(); ++ __ sh(value, to, 0); ++ __ daddiu(to, to, 2); ++ __ bind(L_fill_4); ++ __ andi(AT, count, 1 << 1); ++ __ beq(AT, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ case T_INT: ++ __ beq(count, R0, L_exit2); ++ __ delayed()->nop(); ++ __ sw(value, to, 0); ++ break; ++ default: ShouldNotReachHere(); ++ } ++ __ bind(L_exit2); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ // Side Effects: ++ // disjoint_byte_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_byte_copy(). ++ // ++ address generate_disjoint_byte_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11; ++ Label l_debug; ++ ++ __ daddiu(AT, tmp3, -9); //why the number is 9 ? ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ if (!aligned) { ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy ++ __ delayed()->nop(); ++ ++ __ andi(AT, tmp1, 1); ++ __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes ++ __ delayed()->nop(); ++ ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_10); ++ ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 2 elements if necessary to align to 4 bytes. ++ __ andi(AT, tmp1, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -2); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 4 elements if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sw(AT, tmp2, 0); ++ { // FasterArrayCopy ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ } ++ } ++ ++ __ bind(l_7); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ // For Loongson, there is 128-bit memory access. TODO ++ __ ld(AT, tmp1, 0); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_8); ++ __ delayed()->nop(); ++ } ++ __ bind(l_6); ++ ++ // copy 4 bytes at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_1); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_3); ++ __ delayed()->nop(); ++ ++ } ++ ++ // do 2 bytes copy ++ __ bind(l_1); ++ { ++ __ daddiu(AT, tmp3, -1); ++ __ blez(AT, l_9); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(AT, tmp3, -2); ++ __ bgez(AT, l_5); ++ __ delayed()->nop(); ++ } ++ ++ //do 1 element copy--byte ++ __ bind(l_9); ++ __ beq(R0, tmp3, l_4); ++ __ delayed()->nop(); ++ ++ { ++ __ bind(l_11); ++ __ lb(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -1); ++ __ sb(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 1); ++ __ daddiu(tmp2, tmp2, 1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_4); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // A0 - source array address ++ // A1 - destination array address ++ // A2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries, ++ // we let the hardware handle it. The one to eight bytes within words, ++ // dwords or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_byte_copy(bool aligned, const char *name) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", name); ++ address start = __ pc(); ++ ++ Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit; ++ Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jbyte_disjoint_arraycopy() : ++ StubRoutines::jbyte_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 0); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ daddu(end_from, from, end_count); ++ __ daddu(end_to, to, end_count); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_byte); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ __ bind(l_from_unaligned); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ move(T8, end_count); ++ __ daddiu(AT, end_count, -3); ++ __ blez(AT, l_copy_suffix); ++ __ delayed()->nop(); ++ ++ //__ andi(T8, T8, 3); ++ __ lea(end_from, Address(end_from, -4)); ++ __ lea(end_to, Address(end_to, -4)); ++ ++ __ dsrl(end_count, end_count, 2); ++ __ align(16); ++ __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes ++ __ lw(AT, end_from, 0); ++ __ sw(AT, end_to, 0); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -1); ++ __ bne(end_count, R0, l_copy_4_bytes_loop); ++ __ delayed()->nop(); ++ ++ __ b(l_copy_suffix); ++ __ delayed()->nop(); ++ // copy dwords aligned or not with repeat move ++ // l_copy_suffix ++ // copy suffix (0-3 bytes) ++ __ bind(l_copy_suffix); ++ __ andi(T8, T8, 3); ++ __ beq(T8, R0, l_exit); ++ __ delayed()->nop(); ++ __ addiu(end_from, end_from, 3); ++ __ addiu(end_to, end_to, 3); ++ __ bind(l_copy_suffix_loop); ++ __ lb(AT, end_from, 0); ++ __ sb(AT, end_to, 0); ++ __ addiu(end_from, end_from, -1); ++ __ addiu(end_to, end_to, -1); ++ __ addiu(T8, T8, -1); ++ __ bne(T8, R0, l_copy_suffix_loop); ++ __ delayed()->nop(); ++ ++ __ bind(l_copy_byte); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lb(AT, end_from, -1); ++ __ sb(AT, end_to, -1); ++ __ daddiu(end_from, end_from, -1); ++ __ daddiu(end_to, end_to, -1); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_byte); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Generate stub for disjoint short copy. If "aligned" is true, the ++ // "from" and "to" addresses are assumed to be heapword aligned. ++ // ++ // Arguments for generated stub: ++ // from: A0 ++ // to: A1 ++ // elm.count: A2 treated as signed ++ // one element: 2 bytes ++ // ++ // Strategy for aligned==true: ++ // ++ // If length <= 9: ++ // 1. copy 1 elements at a time (l_5) ++ // ++ // If length > 9: ++ // 1. copy 4 elements at a time until less than 4 elements are left (l_7) ++ // 2. copy 2 elements at a time until less than 2 elements are left (l_6) ++ // 3. copy last element if one was left in step 2. (l_1) ++ // ++ // ++ // Strategy for aligned==false: ++ // ++ // If length <= 9: same as aligned==true case ++ // ++ // If length > 9: ++ // 1. continue with step 7. if the alignment of from and to mod 4 ++ // is different. ++ // 2. align from and to to 4 bytes by copying 1 element if necessary ++ // 3. at l_2 from and to are 4 byte aligned; continue with ++ // 6. if they cannot be aligned to 8 bytes because they have ++ // got different alignment mod 8. ++ // 4. at this point we know that both, from and to, have the same ++ // alignment mod 8, now copy one element if necessary to get ++ // 8 byte alignment of from and to. ++ // 5. copy 4 elements at a time until less than 4 elements are ++ // left; depending on step 3. all load/stores are aligned. ++ // 6. copy 2 elements at a time until less than 2 elements are ++ // left. (l_6) ++ // 7. copy 1 element at a time. (l_5) ++ // 8. copy last element if one was left in step 6. (l_1) ++ ++ address generate_disjoint_short_copy(bool aligned, const char * name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ ++ Register tmp1 = T0; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T8; ++ Register tmp5 = T9; ++ Register tmp6 = T2; ++ ++ address start = __ pc(); ++ ++ __ push(tmp1); ++ __ push(tmp2); ++ __ push(tmp3); ++ __ move(tmp1, A0); ++ __ move(tmp2, A1); ++ __ move(tmp3, A2); ++ ++ Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14; ++ Label l_debug; ++ // don't try anything fancy if arrays don't have many elements ++ __ daddiu(AT, tmp3, -23); ++ __ blez(AT, l_14); ++ __ delayed()->nop(); ++ // move push here ++ __ push(tmp4); ++ __ push(tmp5); ++ __ push(tmp6); ++ ++ if (!aligned) { ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 1); ++ __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen? ++ __ delayed()->nop(); ++ ++ __ xorr(AT, A0, A1); ++ __ andi(AT, AT, 3); ++ __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy ++ __ delayed()->nop(); ++ ++ // At this point it is guaranteed that both, from and to have the same alignment mod 4. ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ andi(AT, A0, 3); ++ __ beq(AT, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ lhu(AT, tmp1, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ bind(l_2); ++ ++ // At this point the positions of both, from and to, are at least 4 byte aligned. ++ ++ // Copy 4 elements at a time. ++ // Align to 8 bytes, but only if both, from and to, have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned ++ __ delayed()->nop(); ++ ++ // Copy a 2-element word if necessary to align to 8 bytes. ++ __ andi(AT, tmp1, 7); ++ __ beq(AT, R0, l_7); ++ __ delayed()->nop(); ++ ++ __ lw(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -2); ++ __ sw(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 4); ++ __ daddiu(tmp2, tmp2, 4); ++ }// end of if (!aligned) ++ ++ __ bind(l_7); ++ // At this time the position of both, from and to, are at least 8 byte aligned. ++ // Copy 8 elemnets at a time. ++ // Align to 16 bytes, but only if both from and to have same alignment mod 8. ++ __ xorr(AT, tmp1, tmp2); ++ __ andi(AT, AT, 15); ++ __ bne(AT, R0, l_9); ++ __ delayed()->nop(); ++ ++ // Copy 4-element word if necessary to align to 16 bytes, ++ __ andi(AT, tmp1, 15); ++ __ beq(AT, R0, l_10); ++ __ delayed()->nop(); ++ ++ __ ld(AT, tmp1, 0); ++ __ daddiu(tmp3, tmp3, -4); ++ __ sd(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ ++ __ bind(l_10); ++ ++ // Copy 8 elements at a time; either the loads or the stores can ++ // be unalligned if aligned == false ++ ++ { // FasterArrayCopy ++ __ bind(l_11); ++ // For loongson the 128-bit memory access instruction is gslq/gssq ++ if (UseLEXT1) { ++ __ gslq(AT, tmp4, tmp1, 0); ++ __ gslq(tmp5, tmp6, tmp1, 16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ gssq(AT, tmp4, tmp2, -32); ++ __ gssq(tmp5, tmp6, tmp2, -16); ++ } else { ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ daddiu(tmp1, tmp1, 32); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2, 16); ++ __ sd(tmp6, tmp2, 24); ++ __ daddiu(tmp2, tmp2, 32); ++ } ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_11); ++ __ delayed()->nop(); ++ } ++ __ bind(l_9); ++ ++ // Copy 4 elements at a time; either the loads or the stores can ++ // be unaligned if aligned == false. ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16 ++ __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain ++ __ delayed()->nop(); ++ ++ __ bind(l_8); ++ __ ld(AT, tmp1, 0); ++ __ ld(tmp4, tmp1, 8); ++ __ ld(tmp5, tmp1, 16); ++ __ ld(tmp6, tmp1, 24); ++ __ sd(AT, tmp2, 0); ++ __ sd(tmp4, tmp2, 8); ++ __ sd(tmp5, tmp2,16); ++ __ daddiu(tmp1, tmp1, 32); ++ __ daddiu(tmp2, tmp2, 32); ++ __ daddiu(tmp3, tmp3, -16); ++ __ daddiu(AT, tmp3, -16); ++ __ bgez(AT, l_8); ++ __ delayed()->sd(tmp6, tmp2, -8); ++ } ++ __ bind(l_6); ++ ++ // copy 2 element at a time ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -7); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_3); ++ __ lw(AT, tmp1, 0); ++ __ lw(tmp4, tmp1, 4); ++ __ lw(tmp5, tmp1, 8); ++ __ lw(tmp6, tmp1, 12); ++ __ sw(AT, tmp2, 0); ++ __ sw(tmp4, tmp2, 4); ++ __ sw(tmp5, tmp2, 8); ++ __ daddiu(tmp1, tmp1, 16); ++ __ daddiu(tmp2, tmp2, 16); ++ __ daddiu(tmp3, tmp3, -8); ++ __ daddiu(AT, tmp3, -8); ++ __ bgez(AT, l_3); ++ __ delayed()->sw(tmp6, tmp2, -4); ++ } ++ ++ __ bind(l_1); ++ // do single element copy (8 bit), can this happen? ++ { // FasterArrayCopy ++ __ daddiu(AT, tmp3, -3); ++ __ blez(AT, l_4); ++ __ delayed()->nop(); ++ ++ __ bind(l_5); ++ __ lhu(AT, tmp1, 0); ++ __ lhu(tmp4, tmp1, 2); ++ __ lhu(tmp5, tmp1, 4); ++ __ lhu(tmp6, tmp1, 6); ++ __ sh(AT, tmp2, 0); ++ __ sh(tmp4, tmp2, 2); ++ __ sh(tmp5, tmp2, 4); ++ __ daddiu(tmp1, tmp1, 8); ++ __ daddiu(tmp2, tmp2, 8); ++ __ daddiu(tmp3, tmp3, -4); ++ __ daddiu(AT, tmp3, -4); ++ __ bgez(AT, l_5); ++ __ delayed()->sh(tmp6, tmp2, -2); ++ } ++ // single element ++ __ bind(l_4); ++ ++ __ pop(tmp6); ++ __ pop(tmp5); ++ __ pop(tmp4); ++ ++ __ bind(l_14); ++ { // FasterArrayCopy ++ __ beq(R0, tmp3, l_13); ++ __ delayed()->nop(); ++ ++ __ bind(l_12); ++ __ lhu(AT, tmp1, 0); ++ __ sh(AT, tmp2, 0); ++ __ daddiu(tmp1, tmp1, 2); ++ __ daddiu(tmp2, tmp2, 2); ++ __ daddiu(tmp3, tmp3, -1); ++ __ daddiu(AT, tmp3, -1); ++ __ bgez(AT, l_12); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_13); ++ __ pop(tmp3); ++ __ pop(tmp2); ++ __ pop(tmp1); ++ ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ __ bind(l_debug); ++ __ stop("generate_disjoint_short_copy should not reach here"); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we ++ // let the hardware handle it. The two or four words within dwords ++ // or qwords that span cache line boundaries will still be loaded ++ // and stored atomically. ++ // ++ address generate_conjoint_short_copy(bool aligned, const char *name) { ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned; ++ ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jshort_disjoint_arraycopy() : ++ StubRoutines::jshort_disjoint_arraycopy(); ++ ++ array_overlap_test(nooverlap_target, 1); ++ ++ const Register from = A0; // source array address ++ const Register to = A1; // destination array address ++ const Register count = A2; // elements count ++ const Register end_from = T3; // source array end address ++ const Register end_to = T0; // destination array end address ++ const Register end_count = T1; // destination array end address ++ ++ __ push(end_from); ++ __ push(end_to); ++ __ push(end_count); ++ __ push(T8); ++ ++ // copy from high to low ++ __ move(end_count, count); ++ __ sll(AT, end_count, Address::times_2); ++ __ daddu(end_from, from, AT); ++ __ daddu(end_to, to, AT); ++ ++ // If end_from and end_to has differante alignment, unaligned copy is performed. ++ __ andi(AT, end_from, 3); ++ __ andi(T8, end_to, 3); ++ __ bne(AT, T8, l_copy_short); ++ __ delayed()->nop(); ++ ++ // First deal with the unaligned data at the top. ++ __ bind(l_unaligned); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_from, 3); ++ __ bne(AT, R0, l_from_unaligned); ++ __ delayed()->nop(); ++ ++ __ andi(AT, end_to, 3); ++ __ beq(AT, R0, l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // Copy 1 element if necessary to align to 4 bytes. ++ __ bind(l_from_unaligned); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_unaligned); ++ __ delayed()->nop(); ++ ++ // now end_to, end_from point to 4-byte aligned high-ends ++ // end_count contains byte count that is not copied. ++ // copy 4 bytes at a time ++ __ bind(l_4_bytes_aligned); ++ ++ __ daddiu(AT, end_count, -1); ++ __ blez(AT, l_copy_short); ++ __ delayed()->nop(); ++ ++ __ lw(AT, end_from, -4); ++ __ sw(AT, end_to, -4); ++ __ addiu(end_from, end_from, -4); ++ __ addiu(end_to, end_to, -4); ++ __ addiu(end_count, end_count, -2); ++ __ b(l_4_bytes_aligned); ++ __ delayed()->nop(); ++ ++ // copy 1 element at a time ++ __ bind(l_copy_short); ++ __ beq(end_count, R0, l_exit); ++ __ delayed()->nop(); ++ __ lhu(AT, end_from, -2); ++ __ sh(AT, end_to, -2); ++ __ daddiu(end_from, end_from, -2); ++ __ daddiu(end_to, end_to, -2); ++ __ daddiu(end_count, end_count, -1); ++ __ b(l_copy_short); ++ __ delayed()->nop(); ++ ++ __ bind(l_exit); ++ __ pop(T8); ++ __ pop(end_count); ++ __ pop(end_to); ++ __ pop(end_from); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4, l_5, l_6, l_7; ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ if(!aligned) { ++ __ xorr(AT, T3, T0); ++ __ andi(AT, AT, 7); ++ __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time ++ __ delayed()->nop(); ++ ++ __ andi(AT, T3, 7); ++ __ beq(AT, R0, l_6); //copy 2 elements each time ++ __ delayed()->nop(); ++ ++ __ lw(AT, T3, 0); ++ __ daddiu(T1, T1, -1); ++ __ sw(AT, T0, 0); ++ __ daddiu(T3, T3, 4); ++ __ daddiu(T0, T0, 4); ++ } ++ ++ { ++ __ bind(l_6); ++ __ daddiu(AT, T1, -1); ++ __ blez(AT, l_5); ++ __ delayed()->nop(); ++ ++ __ bind(l_7); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ daddiu(T3, T3, 8); ++ __ daddiu(T0, T0, 8); ++ __ daddiu(T1, T1, -2); ++ __ daddiu(AT, T1, -2); ++ __ bgez(AT, l_7); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(l_5); ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, 4); ++ __ addiu(T0, T0, 4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jint_disjoint_arraycopy() : ++ StubRoutines::jint_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 2); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ // no registers are destroyed by this call ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -4); ++ __ sll(AT, T1, Address::times_4); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -4); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ lw(AT, T3, 0); ++ __ sw(AT, T0, 0); ++ __ addiu(T3, T3, -4); ++ __ addiu(T0, T0, -4); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ // Side Effects: ++ // disjoint_int_copy_entry is set to the no-overlap entry point ++ // used by generate_conjoint_int_oop_copy(). ++ // ++ address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_3, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ // T3: source array address ++ // T0: destination array address ++ // T1: element count ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_3); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_3); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ // Arguments: ++ // aligned - true => Input and output aligned on a HeapWord == 8-byte boundary ++ // ignored ++ // is_oop - true => oop array, so generate store check code ++ // name - stub name string ++ // ++ // Inputs: ++ // c_rarg0 - source array address ++ // c_rarg1 - destination array address ++ // c_rarg2 - element count, treated as ssize_t, can be zero ++ // ++ // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let ++ // the hardware handle it. The two dwords within qwords that span ++ // cache line boundaries will still be loaded and stored atomicly. ++ // ++ address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) { ++ Label l_2, l_4; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target; ++ ++ if (is_oop) { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_oop_disjoint_arraycopy() : ++ StubRoutines::oop_disjoint_arraycopy(); ++ } else { ++ nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ } ++ ++ array_overlap_test(nooverlap_target, 3); ++ ++ DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT; ++ if (dest_uninitialized) { ++ decorators |= IS_DEST_UNINITIALIZED; ++ } ++ if (aligned) { ++ decorators |= ARRAYCOPY_ALIGNED; ++ } ++ ++ BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler(); ++ bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ push(T8); ++ __ push(T9); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ beq(T1, R0, l_4); ++ __ delayed()->nop(); ++ ++ __ align(16); ++ __ bind(l_2); ++ __ ld(AT, T3, 0); ++ __ sd(AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0, -8); ++ __ addiu(T1, T1, -1); ++ __ bne(T1, R0, l_2); ++ __ delayed()->nop(); ++ ++ // exit ++ __ bind(l_4); ++ bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1); ++ __ pop(T9); ++ __ pop(T8); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ //FIXME ++ address generate_disjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, 8); ++ __ addiu(T0, T0, 8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ ++ address generate_conjoint_long_copy(bool aligned, const char *name) { ++ Label l_1, l_2; ++ StubCodeMark mark(this, "StubRoutines", name); ++ __ align(CodeEntryAlignment); ++ address start = __ pc(); ++ address nooverlap_target = aligned ? ++ StubRoutines::arrayof_jlong_disjoint_arraycopy() : ++ StubRoutines::jlong_disjoint_arraycopy(); ++ array_overlap_test(nooverlap_target, 3); ++ ++ __ push(T3); ++ __ push(T0); ++ __ push(T1); ++ ++ __ move(T1, A2); ++ __ move(T3, A0); ++ __ move(T0, A1); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T3, AT); ++ __ daddiu(T3, AT, -8); ++ __ sll(AT, T1, Address::times_8); ++ __ addu(AT, T0, AT); ++ __ daddiu(T0, AT, -8); ++ ++ __ b(l_2); ++ __ delayed()->nop(); ++ __ align(16); ++ __ bind(l_1); ++ __ ld(AT, T3, 0); ++ __ sd (AT, T0, 0); ++ __ addiu(T3, T3, -8); ++ __ addiu(T0, T0,-8); ++ __ bind(l_2); ++ __ addiu(T1, T1, -1); ++ __ bgez(T1, l_1); ++ __ delayed()->nop(); ++ __ pop(T1); ++ __ pop(T0); ++ __ pop(T3); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return start; ++ } ++ ++ void generate_arraycopy_stubs() { ++ if (UseCompressedOops) { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_int_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_int_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } else { ++ StubRoutines::_oop_disjoint_arraycopy = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy"); ++ StubRoutines::_oop_arraycopy = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy"); ++ StubRoutines::_oop_disjoint_arraycopy_uninit = generate_disjoint_long_oop_copy(false, true, ++ "oop_disjoint_arraycopy_uninit", true); ++ StubRoutines::_oop_arraycopy_uninit = generate_conjoint_long_oop_copy(false, true, ++ "oop_arraycopy_uninit", true); ++ } ++ ++ StubRoutines::_jbyte_disjoint_arraycopy = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy"); ++ StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy"); ++ StubRoutines::_jint_disjoint_arraycopy = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy"); ++ StubRoutines::_jlong_disjoint_arraycopy = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy"); ++ ++ StubRoutines::_jbyte_arraycopy = generate_conjoint_byte_copy(false, "jbyte_arraycopy"); ++ StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy"); ++ StubRoutines::_jint_arraycopy = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy"); ++ StubRoutines::_jlong_arraycopy = generate_conjoint_long_copy(false, "jlong_arraycopy"); ++ ++ // We don't generate specialized code for HeapWord-aligned source ++ // arrays, so just use the code we've already generated ++ StubRoutines::_arrayof_jbyte_disjoint_arraycopy = StubRoutines::_jbyte_disjoint_arraycopy; ++ StubRoutines::_arrayof_jbyte_arraycopy = StubRoutines::_jbyte_arraycopy; ++ ++ StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy; ++ StubRoutines::_arrayof_jshort_arraycopy = StubRoutines::_jshort_arraycopy; ++ ++ StubRoutines::_arrayof_jint_disjoint_arraycopy = StubRoutines::_jint_disjoint_arraycopy; ++ StubRoutines::_arrayof_jint_arraycopy = StubRoutines::_jint_arraycopy; ++ ++ StubRoutines::_arrayof_jlong_disjoint_arraycopy = StubRoutines::_jlong_disjoint_arraycopy; ++ StubRoutines::_arrayof_jlong_arraycopy = StubRoutines::_jlong_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy = StubRoutines::_oop_disjoint_arraycopy; ++ StubRoutines::_arrayof_oop_arraycopy = StubRoutines::_oop_arraycopy; ++ ++ StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit = StubRoutines::_oop_disjoint_arraycopy_uninit; ++ StubRoutines::_arrayof_oop_arraycopy_uninit = StubRoutines::_oop_arraycopy_uninit; ++ ++ StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill"); ++ StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill"); ++ StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill"); ++ StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill"); ++ StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill"); ++ StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill"); ++ } ++ ++ // add a function to implement SafeFetch32 and SafeFetchN ++ void generate_safefetch(const char* name, int size, address* entry, ++ address* fault_pc, address* continuation_pc) { ++ // safefetch signatures: ++ // int SafeFetch32(int* adr, int errValue); ++ // intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue); ++ // ++ // arguments: ++ // A0 = adr ++ // A1 = errValue ++ // ++ // result: ++ // PPC_RET = *adr or errValue ++ ++ StubCodeMark mark(this, "StubRoutines", name); ++ ++ // Entry point, pc or function descriptor. ++ *entry = __ pc(); ++ ++ // Load *adr into A1, may fault. ++ *fault_pc = __ pc(); ++ switch (size) { ++ case 4: ++ // int32_t ++ __ lw(A1, A0, 0); ++ break; ++ case 8: ++ // int64_t ++ __ ld(A1, A0, 0); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ // return errValue or *adr ++ *continuation_pc = __ pc(); ++ __ addu(V0,A1,R0); ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ ++#undef __ ++#define __ masm-> ++ ++ // Continuation point for throwing of implicit exceptions that are ++ // not handled in the current activation. Fabricates an exception ++ // oop and initiates normal exception dispatching in this ++ // frame. Since we need to preserve callee-saved values (currently ++ // only for C2, but done for C1 as well) we need a callee-saved oop ++ // map and therefore have to make these stubs into RuntimeStubs ++ // rather than BufferBlobs. If the compiler needs all registers to ++ // be preserved between the fault point and the exception handler ++ // then it must assume responsibility for that in ++ // AbstractCompiler::continuation_for_implicit_null_exception or ++ // continuation_for_implicit_division_by_zero_exception. All other ++ // implicit exceptions (e.g., NullPointerException or ++ // AbstractMethodError on entry) are either at call sites or ++ // otherwise assume that stack unwinding will be initiated, so ++ // caller saved registers were assumed volatile in the compiler. ++ address generate_throw_exception(const char* name, ++ address runtime_entry, ++ bool restore_saved_exception_pc) { ++ // Information about frame layout at time of blocking runtime call. ++ // Note that we only have to preserve callee-saved registers since ++ // the compilers are responsible for supplying a continuation point ++ // if they expect all registers to be preserved. ++ enum layout { ++ thread_off, // last_java_sp ++ S7_off, // callee saved register sp + 1 ++ S6_off, // callee saved register sp + 2 ++ S5_off, // callee saved register sp + 3 ++ S4_off, // callee saved register sp + 4 ++ S3_off, // callee saved register sp + 5 ++ S2_off, // callee saved register sp + 6 ++ S1_off, // callee saved register sp + 7 ++ S0_off, // callee saved register sp + 8 ++ FP_off, ++ ret_address, ++ framesize ++ }; ++ ++ int insts_size = 2048; ++ int locs_size = 32; ++ ++ // CodeBuffer* code = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false, ++ // NULL, NULL, NULL, false, NULL, name, false); ++ CodeBuffer code (name , insts_size, locs_size); ++ OopMapSet* oop_maps = new OopMapSet(); ++ MacroAssembler* masm = new MacroAssembler(&code); ++ ++ address start = __ pc(); ++ ++ // This is an inlined and slightly modified version of call_VM ++ // which has the ability to fetch the return PC out of ++ // thread-local storage and also sets up last_Java_sp slightly ++ // differently than the real call_VM ++#ifndef OPT_THREAD ++ Register java_thread = TREG; ++ __ get_thread(java_thread); ++#else ++ Register java_thread = TREG; ++#endif ++ if (restore_saved_exception_pc) { ++ __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset())); ++ } ++ ++ __ enter(); // required for proper stackwalking of RuntimeStub frame ++ ++ __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog ++ __ sd(S0, SP, S0_off * wordSize); ++ __ sd(S1, SP, S1_off * wordSize); ++ __ sd(S2, SP, S2_off * wordSize); ++ __ sd(S3, SP, S3_off * wordSize); ++ __ sd(S4, SP, S4_off * wordSize); ++ __ sd(S5, SP, S5_off * wordSize); ++ __ sd(S6, SP, S6_off * wordSize); ++ __ sd(S7, SP, S7_off * wordSize); ++ ++ int frame_complete = __ pc() - start; ++ // push java thread (becomes first argument of C function) ++ __ sd(java_thread, SP, thread_off * wordSize); ++ if (java_thread != A0) ++ __ move(A0, java_thread); ++ ++ // Set up last_Java_sp and last_Java_fp ++ __ set_last_Java_frame(java_thread, SP, FP, NULL); ++ // Align stack ++ __ set64(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ ++ __ relocate(relocInfo::internal_pc_type); ++ { ++ intptr_t save_pc = (intptr_t)__ pc() + NativeMovConstReg::instruction_size + 28; ++ __ patchable_set48(AT, save_pc); ++ } ++ __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ ++ // Call runtime ++ __ call(runtime_entry); ++ __ delayed()->nop(); ++ // Generate oop map ++ OopMap* map = new OopMap(framesize, 0); ++ oop_maps->add_gc_map(__ offset(), map); ++ ++ // restore the thread (cannot use the pushed argument since arguments ++ // may be overwritten by C code generated by an optimizing compiler); ++ // however can use the register value directly if it is callee saved. ++#ifndef OPT_THREAD ++ __ get_thread(java_thread); ++#endif ++ ++ __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ __ reset_last_Java_frame(java_thread, true); ++ ++ // Restore callee save registers. This must be done after resetting the Java frame ++ __ ld(S0, SP, S0_off * wordSize); ++ __ ld(S1, SP, S1_off * wordSize); ++ __ ld(S2, SP, S2_off * wordSize); ++ __ ld(S3, SP, S3_off * wordSize); ++ __ ld(S4, SP, S4_off * wordSize); ++ __ ld(S5, SP, S5_off * wordSize); ++ __ ld(S6, SP, S6_off * wordSize); ++ __ ld(S7, SP, S7_off * wordSize); ++ ++ // discard arguments ++ __ move(SP, FP); // epilog ++ __ pop(FP); ++ ++ // check for pending exceptions ++#ifdef ASSERT ++ Label L; ++ __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ should_not_reach_here(); ++ __ bind(L); ++#endif //ASSERT ++ __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ RuntimeStub* stub = RuntimeStub::new_runtime_stub(name, ++ &code, ++ frame_complete, ++ framesize, ++ oop_maps, false); ++ return stub->entry_point(); ++ } ++ ++ // Initialization ++ void generate_initial() { ++ // Generates all stubs and initializes the entry points ++ ++ //------------------------------------------------------------- ++ //----------------------------------------------------------- ++ // entry points that exist in all platforms ++ // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller ++ // than the disadvantage of having a much more complicated generator structure. ++ // See also comment in stubRoutines.hpp. ++ StubRoutines::_forward_exception_entry = generate_forward_exception(); ++ StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address); ++ // is referenced by megamorphic call ++ StubRoutines::_catch_exception_entry = generate_catch_exception(); ++ ++ StubRoutines::_throw_StackOverflowError_entry = ++ generate_throw_exception("StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError), ++ false); ++ StubRoutines::_throw_delayed_StackOverflowError_entry = ++ generate_throw_exception("delayed StackOverflowError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError), ++ false); ++ } ++ ++ void generate_all() { ++ // Generates all stubs and initializes the entry points ++ ++ // These entry points require SharedInfo::stack0 to be set up in ++ // non-core builds and need to be relocatable, so they each ++ // fabricate a RuntimeStub internally. ++ StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError), false); ++ ++ StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false); ++ ++ StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception", ++ CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false); ++ ++ // entry points that are platform specific ++ ++ // support for verify_oop (must happen after universe_init) ++ StubRoutines::_verify_oop_subroutine_entry = generate_verify_oop(); ++#ifndef CORE ++ // arraycopy stubs used by compilers ++ generate_arraycopy_stubs(); ++#endif ++ ++ // Safefetch stubs. ++ generate_safefetch("SafeFetch32", sizeof(int), &StubRoutines::_safefetch32_entry, ++ &StubRoutines::_safefetch32_fault_pc, ++ &StubRoutines::_safefetch32_continuation_pc); ++ generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry, ++ &StubRoutines::_safefetchN_fault_pc, ++ &StubRoutines::_safefetchN_continuation_pc); ++ ++#ifdef COMPILER2 ++ if (UseMontgomeryMultiplyIntrinsic) { ++ StubRoutines::_montgomeryMultiply ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply); ++ } ++ if (UseMontgomerySquareIntrinsic) { ++ StubRoutines::_montgomerySquare ++ = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square); ++ } ++#endif ++ } ++ ++ public: ++ StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) { ++ if (all) { ++ generate_all(); ++ } else { ++ generate_initial(); ++ } ++ } ++}; // end class declaration ++ ++void StubGenerator_generate(CodeBuffer* code, bool all) { ++ StubGenerator g(code, all); ++} +diff --git a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp +new file mode 100644 +index 0000000000..920c08844e +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp +@@ -0,0 +1,59 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP ++ ++// This file holds the platform specific parts of the StubRoutines ++// definition. See stubRoutines.hpp for a description on how to ++// extend it. ++ ++static bool returns_to_call_stub(address return_pc){ ++ return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return(); ++} ++ ++enum platform_dependent_constants { ++ code_size1 = 20000, // simply increase if too small (assembler will crash if too small) ++ code_size2 = 40000 // simply increase if too small (assembler will crash if too small) ++}; ++ ++class gs2 { ++ friend class StubGenerator; ++ friend class VMStructs; ++ private: ++ // If we call compiled code directly from the call stub we will ++ // need to adjust the return back to the call stub to a specialized ++ // piece of code that can handle compiled results and cleaning the fpu ++ // stack. The variable holds that location. ++ static address _call_stub_compiled_return; ++ ++public: ++ // Call back points for traps in compiled code ++ static address get_call_stub_compiled_return() { return _call_stub_compiled_return; } ++ static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp +new file mode 100644 +index 0000000000..358d580d52 +--- /dev/null ++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp +@@ -0,0 +1,35 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++ ++// a description of how to extend it, see the stubRoutines.hpp file. ++ ++//find the last fp value ++address StubRoutines::gs2::_call_stub_compiled_return = NULL; +diff --git a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp +new file mode 100644 +index 0000000000..19e2f29c59 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp +@@ -0,0 +1,2149 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/bytecodeHistogram.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/templateInterpreterGenerator.hpp" ++#include "interpreter/templateTable.hpp" ++#include "oops/arrayOop.hpp" ++#include "oops/methodData.hpp" ++#include "oops/method.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/jvmtiExport.hpp" ++#include "prims/jvmtiThreadState.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/deoptimization.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "runtime/timer.hpp" ++#include "runtime/vframeArray.hpp" ++#include "utilities/debug.hpp" ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++int TemplateInterpreter::InterpreterCodeSize = 500 * K; ++ ++#ifdef PRODUCT ++#define BLOCK_COMMENT(str) /* nothing */ ++#else ++#define BLOCK_COMMENT(str) __ block_comment(str) ++#endif ++ ++address TemplateInterpreterGenerator::generate_slow_signature_handler() { ++ address entry = __ pc(); ++ ++ // Rmethod: method ++ // LVP: pointer to locals ++ // A3: first stack arg ++ __ move(A3, SP); ++ __ daddiu(SP, SP, -10 * wordSize); ++ __ sd(RA, SP, 0); ++ __ call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::slow_signature_handler), ++ Rmethod, LVP, A3); ++ ++ // V0: result handler ++ ++ // Stack layout: ++ // ... ++ // 10 stack arg0 <--- old sp ++ // 9 float/double identifiers ++ // 8 register arg7 ++ // ... ++ // 2 register arg1 ++ // 1 aligned slot ++ // SP: 0 return address ++ ++ // Do FP first so we can use T3 as temp ++ __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers ++ ++ // A0 is for env. ++ // If the mothed is not static, A1 will be corrected in generate_native_entry. ++ for ( int i = 1; i < Argument::n_register_parameters; i++ ) { ++ Register reg = as_Register(i + A0->encoding()); ++ FloatRegister floatreg = as_FloatRegister(i + F12->encoding()); ++ Label isfloatordouble, isdouble, next; ++ ++ __ andi(AT, T3, 1 << (i*2)); // Float or Double? ++ __ bne(AT, R0, isfloatordouble); ++ __ delayed()->nop(); ++ ++ // Do Int register here ++ __ ld(reg, SP, (1 + i) * wordSize); ++ __ b (next); ++ __ delayed()->nop(); ++ ++ __ bind(isfloatordouble); ++ __ andi(AT, T3, 1 << ((i*2)+1)); // Double? ++ __ bne(AT, R0, isdouble); ++ __ delayed()->nop(); ++ ++ // Do Float Here ++ __ lwc1(floatreg, SP, (1 + i) * wordSize); ++ __ b(next); ++ __ delayed()->nop(); ++ ++ // Do Double here ++ __ bind(isdouble); ++ __ ldc1(floatreg, SP, (1 + i) * wordSize); ++ ++ __ bind(next); ++ } ++ ++ __ ld(RA, SP, 0); ++ __ daddiu(SP, SP, 10 * wordSize); ++ __ jr(RA); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.update(int crc, int b) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_update_entry() { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++ * Method entry for static native methods: ++ * int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len) ++ * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) ++ */ ++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32Intrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++/** ++* Method entry for static (non-native) methods: ++* int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end) ++* int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end) ++*/ ++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) { ++ if (UseCRC32CIntrinsics) { ++ address entry = __ pc(); ++ Unimplemented(); ++ return entry; ++ } ++ return NULL; ++} ++ ++// ++// Various method entries ++// ++ ++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) { ++ if (!InlineIntrinsics) return NULL; // Generate a vanilla entry ++ ++ // These don't need a safepoint check because they aren't virtually ++ // callable. We won't enter these intrinsics from compiled code. ++ // If in the future we added an intrinsic which was virtually callable ++ // we'd have to worry about how to safepoint so that this code is used. ++ ++ // mathematical functions inlined by compiler ++ // (interpreter must provide identical implementation ++ // in order to avoid monotonicity bugs when switching ++ // from interpreter to compiler in the middle of some ++ // computation) ++ // ++ // stack: ++ // [ arg ] <-- sp ++ // [ arg ] ++ // retaddr in ra ++ ++ address entry_point = NULL; ++ switch (kind) { ++ case Interpreter::java_lang_math_abs: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ abs_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sqrt: ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ sqrt_d(F0, F12); ++ __ move(SP, Rsender); ++ break; ++ case Interpreter::java_lang_math_sin : ++ case Interpreter::java_lang_math_cos : ++ case Interpreter::java_lang_math_tan : ++ case Interpreter::java_lang_math_log : ++ case Interpreter::java_lang_math_log10 : ++ case Interpreter::java_lang_math_exp : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 1); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_pow : ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 0); ++ __ move(SP, Rsender); ++ __ dmtc1(RA, F24); ++ __ dmtc1(SP, F25); ++ __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes)); ++ generate_transcendental_entry(kind, 2); ++ __ dmfc1(SP, F25); ++ __ dmfc1(RA, F24); ++ break; ++ case Interpreter::java_lang_math_fmaD : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ ldc1(F12, SP, 4 * Interpreter::stackElementSize); ++ __ ldc1(F13, SP, 2 * Interpreter::stackElementSize); ++ __ ldc1(F14, SP, 0); ++ __ madd_d(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ case Interpreter::java_lang_math_fmaF : ++ if (UseFMA) { ++ entry_point = __ pc(); ++ __ lwc1(F12, SP, 2 * Interpreter::stackElementSize); ++ __ lwc1(F13, SP, Interpreter::stackElementSize); ++ __ lwc1(F14, SP, 0); ++ __ madd_s(F0, F14, F13, F12); ++ __ move(SP, Rsender); ++ } ++ break; ++ default: ++ ; ++ } ++ if (entry_point) { ++ __ jr(RA); ++ __ delayed()->nop(); ++ } ++ ++ return entry_point; ++} ++ ++ // double trigonometrics and transcendentals ++ // static jdouble dsin(jdouble x); ++ // static jdouble dcos(jdouble x); ++ // static jdouble dtan(jdouble x); ++ // static jdouble dlog(jdouble x); ++ // static jdouble dlog10(jdouble x); ++ // static jdouble dexp(jdouble x); ++ // static jdouble dpow(jdouble x, jdouble y); ++ ++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) { ++ address fn; ++ switch (kind) { ++ case Interpreter::java_lang_math_sin : ++ if (StubRoutines::dsin() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin()); ++ } ++ break; ++ case Interpreter::java_lang_math_cos : ++ if (StubRoutines::dcos() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos()); ++ } ++ break; ++ case Interpreter::java_lang_math_tan : ++ if (StubRoutines::dtan() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan()); ++ } ++ break; ++ case Interpreter::java_lang_math_log : ++ if (StubRoutines::dlog() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog()); ++ } ++ break; ++ case Interpreter::java_lang_math_log10 : ++ if (StubRoutines::dlog10() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10()); ++ } ++ break; ++ case Interpreter::java_lang_math_exp : ++ if (StubRoutines::dexp() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp()); ++ } ++ break; ++ case Interpreter::java_lang_math_pow : ++ if (StubRoutines::dpow() == NULL) { ++ fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow); ++ } else { ++ fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow()); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ fn = NULL; // unreachable ++ } ++ __ li(T9, fn); ++ __ jalr(T9); ++ __ delayed()->nop(); ++} ++ ++// Abstract method entry ++// Attempt to execute abstract method. Throw exception ++address TemplateInterpreterGenerator::generate_abstract_entry(void) { ++ ++ // Rmethod: methodOop ++ // V0: receiver (unused) ++ // Rsender : sender 's sp ++ address entry_point = __ pc(); ++ ++ // abstract method entry ++ // throw exception ++ // adjust stack to what a normal return would do ++ __ empty_expression_stack(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ return entry_point; ++} ++ ++ ++const int method_offset = frame::interpreter_frame_method_offset * wordSize; ++const int bci_offset = frame::interpreter_frame_bcp_offset * wordSize; ++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize; ++ ++//----------------------------------------------------------------------------- ++ ++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() { ++ address entry = __ pc(); ++ ++#ifdef ASSERT ++ { ++ Label L; ++ __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ subu(T1, T1, SP); // T1 = maximal sp for current fp ++ __ bgez(T1, L); // check if frame is complete ++ __ delayed()->nop(); ++ __ stop("interpreter frame not set up"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // Restore bcp under the assumption that the current frame is still ++ // interpreted ++ // FIXME: please change the func restore_bcp ++ // S0 is the conventional register for bcp ++ __ restore_bcp(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // throw exception ++ // FIXME: why do not pass parameter thread ? ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError)); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() { ++ address entry = __ pc(); ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ // ??? convention: expect array in register A1 ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_ClassCastException_handler() { ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an ++ // exception happened ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException), FSR); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_exception_handler_common( ++ const char* name, const char* message, bool pass_oop) { ++ assert(!pass_oop || message == NULL, "either oop or message but not both"); ++ address entry = __ pc(); ++ ++ // expression stack must be empty before entering the VM if an exception happened ++ __ empty_expression_stack(); ++ // setup parameters ++ __ li(A1, (long)name); ++ if (pass_oop) { ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR); ++ } else { ++ __ li(A2, (long)message); ++ __ call_VM(V0, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2); ++ } ++ // throw exception ++ __ jmp(Interpreter::throw_exception_entry(), relocInfo::none); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) { ++ ++ address entry = __ pc(); ++ ++ // Restore stack bottom in case i2c adjusted stack ++ __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize)); ++ // and NULL it as marker that sp is now tos until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ __ restore_bcp(); ++ __ restore_locals(); ++ ++ // mdp: T8 ++ // ret: FSR ++ // tmp: T9 ++ if (state == atos) { ++ Register mdp = T8; ++ Register tmp = T9; ++ __ profile_return_type(mdp, FSR, tmp); ++ } ++ ++ ++ const Register cache = T9; ++ const Register index = T3; ++ __ get_cache_and_index_at_bcp(cache, index, 1, index_size); ++ ++ const Register flags = cache; ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask); ++ __ dsll(AT, flags, Interpreter::logStackElementSize); ++ __ daddu(SP, SP, AT); ++ ++ Register java_thread; ++#ifndef OPT_THREAD ++ java_thread = T9; ++ __ get_thread(java_thread); ++#else ++ java_thread = TREG; ++#endif ++ ++ __ check_and_handle_popframe(java_thread); ++ __ check_and_handle_earlyret(java_thread); ++ ++ __ dispatch_next(state, step); ++ ++ return entry; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, ++ int step, ++ address continuation) { ++ address entry = __ pc(); ++ // NULL last_sp until next java call ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ restore_bcp(); ++ __ restore_locals(); ++ // handle exceptions ++ { ++ Label L; ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ ld(AT, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ if (continuation == NULL) { ++ __ dispatch_next(state, step); ++ } else { ++ __ jump_to_entry(continuation); ++ __ delayed()->nop(); ++ } ++ return entry; ++} ++ ++int AbstractInterpreter::BasicType_as_index(BasicType type) { ++ int i = 0; ++ switch (type) { ++ case T_BOOLEAN: i = 0; break; ++ case T_CHAR : i = 1; break; ++ case T_BYTE : i = 2; break; ++ case T_SHORT : i = 3; break; ++ case T_INT : // fall through ++ case T_LONG : // fall through ++ case T_VOID : i = 4; break; ++ case T_FLOAT : i = 5; break; ++ case T_DOUBLE : i = 6; break; ++ case T_OBJECT : // fall through ++ case T_ARRAY : i = 7; break; ++ default : ShouldNotReachHere(); ++ } ++ assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers, ++ "index out of bounds"); ++ return i; ++} ++ ++ ++address TemplateInterpreterGenerator::generate_result_handler_for( ++ BasicType type) { ++ address entry = __ pc(); ++ switch (type) { ++ case T_BOOLEAN: __ c2bool(V0); break; ++ case T_CHAR : __ andi(V0, V0, 0xFFFF); break; ++ case T_BYTE : __ sign_extend_byte (V0); break; ++ case T_SHORT : __ sign_extend_short(V0); break; ++ case T_INT : /* nothing to do */ break; ++ case T_FLOAT : /* nothing to do */ break; ++ case T_DOUBLE : /* nothing to do */ break; ++ case T_OBJECT : ++ { ++ __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ verify_oop(V0); // and verify it ++ } ++ break; ++ default : ShouldNotReachHere(); ++ } ++ __ jr(RA); // return from result handler ++ __ delayed()->nop(); ++ return entry; ++} ++ ++address TemplateInterpreterGenerator::generate_safept_entry_for( ++ TosState state, ++ address runtime_entry) { ++ address entry = __ pc(); ++ __ push(state); ++ __ call_VM(noreg, runtime_entry); ++ __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos)); ++ return entry; ++} ++ ++ ++ ++// Helpers for commoning out cases in the various type of method entries. ++// ++ ++ ++// increment invocation count & check for overflow ++// ++// Note: checking for negative value instead of overflow ++// so we have a 'sticky' overflow test ++// ++// prerequisites : method in T0, invocation counter in T3 ++void TemplateInterpreterGenerator::generate_counter_incr( ++ Label* overflow, ++ Label* profile_method, ++ Label* profile_method_continue) { ++ Label done; ++ const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ const Address backedge_counter (FSR, in_bytes(MethodCounters::backedge_counter_offset()) ++ + in_bytes(InvocationCounter::counter_offset())); ++ ++ __ get_method_counters(Rmethod, FSR, done); ++ ++ if (ProfileInterpreter) { // %%% Merge this into methodDataOop ++ __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ __ incrementl(T9, 1); ++ __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset())); ++ } ++ // Update standard invocation counters ++ __ lw(T3, invocation_counter); ++ __ increment(T3, InvocationCounter::count_increment); ++ __ sw(T3, invocation_counter); // save invocation count ++ ++ __ lw(FSR, backedge_counter); // load backedge counter ++ __ li(AT, InvocationCounter::count_mask_value); // mask out the status bits ++ __ andr(FSR, FSR, AT); ++ ++ __ daddu(T3, T3, FSR); // add both counters ++ ++ if (ProfileInterpreter && profile_method != NULL) { ++ // Test to see if we should create a method data oop ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ bne_far(AT, R0, *profile_method_continue); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile_method ++ __ test_method_data_pointer(FSR, *profile_method); ++ } ++ ++ if (Assembler::is_simm16(CompileThreshold)) { ++ __ srl(AT, T3, InvocationCounter::count_shift); ++ __ slti(AT, AT, CompileThreshold); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T3, AT); ++ } ++ ++ __ beq_far(AT, R0, *overflow); ++ __ delayed()->nop(); ++ __ bind(done); ++} ++ ++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) { ++ ++ // Asm interpreter on entry ++ // S7 - locals ++ // S0 - bcp ++ // Rmethod - method ++ // FP - interpreter frame ++ ++ // On return (i.e. jump to entry_point) ++ // Rmethod - method ++ // RA - return address of interpreter caller ++ // tos - the last parameter to Java method ++ // SP - sender_sp ++ ++ // the bcp is valid if and only if it's not null ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), R0); ++ __ ld(Rmethod, FP, method_offset); ++ // Preserve invariant that S0/S7 contain bcp/locals of sender frame ++ __ b_far(do_continue); ++ __ delayed()->nop(); ++} ++ ++// See if we've got enough room on the stack for locals plus overhead. ++// The expression stack grows down incrementally, so the normal guard ++// page mechanism will work for that. ++// ++// NOTE: Since the additional locals are also always pushed (wasn't ++// obvious in generate_method_entry) so the guard should work for them ++// too. ++// ++// Args: ++// T2: number of additional locals this frame needs (what we must check) ++// T0: Method* ++// ++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) { ++ // see if we've got enough room on the stack for locals plus overhead. ++ // the expression stack grows down incrementally, so the normal guard ++ // page mechanism will work for that. ++ // ++ // Registers live on entry: ++ // ++ // T0: Method* ++ // T2: number of additional locals this frame needs (what we must check) ++ ++ // NOTE: since the additional locals are also always pushed (wasn't obvious in ++ // generate_method_entry) so the guard should work for them too. ++ // ++ ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++ // total overhead size: entry_size + (saved fp thru expr stack bottom). ++ // be sure to change this if you add/subtract anything to/from the overhead area ++ const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize) ++ + entry_size; ++ ++ const int page_size = os::vm_page_size(); ++ ++ Label after_frame_check; ++ ++ // see if the frame is greater than one page in size. If so, ++ // then we need to verify there is enough stack space remaining ++ // for the additional locals. ++ __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize); ++ __ slt(AT, AT, T2); ++ __ beq(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // compute sp as if this were going to be the last frame on ++ // the stack before the red zone ++#ifndef OPT_THREAD ++ Register thread = T1; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ ++ // locals + overhead, in bytes ++ __ dsll(T3, T2, Interpreter::logStackElementSize); ++ __ daddiu(T3, T3, overhead_size); // locals * 4 + overhead_size --> T3 ++ ++#ifdef ASSERT ++ Label stack_base_okay, stack_size_okay; ++ // verify that thread stack base is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); ++ __ bne(AT, R0, stack_base_okay); ++ __ delayed()->nop(); ++ __ stop("stack base is zero"); ++ __ bind(stack_base_okay); ++ // verify that thread stack size is non-zero ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); ++ __ bne(AT, R0, stack_size_okay); ++ __ delayed()->nop(); ++ __ stop("stack size is zero"); ++ __ bind(stack_size_okay); ++#endif ++ ++ // Add stack base to locals and subtract stack size ++ __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT ++ __ daddu(T3, T3, AT); // locals * 4 + overhead_size + stack_base--> T3 ++ __ ld(AT, thread, in_bytes(Thread::stack_size_offset())); // stack_size --> AT ++ __ dsubu(T3, T3, AT); // locals * 4 + overhead_size + stack_base - stack_size --> T3 ++ ++ // Use the bigger size for banging. ++ const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size()); ++ ++ // add in the redzone and yellow size ++ __ move(AT, max_bang_size); ++ __ addu(T3, T3, AT); ++ ++ // check against the current stack bottom ++ __ slt(AT, T3, SP); ++ __ bne(AT, R0, after_frame_check); ++ __ delayed()->nop(); ++ ++ // Note: the restored frame is not necessarily interpreted. ++ // Use the shared runtime version of the StackOverflowError. ++ __ move(SP, Rsender); ++ assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated"); ++ __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++ // all done with frame size check ++ __ bind(after_frame_check); ++} ++ ++// Allocate monitor and lock method (asm interpreter) ++// Rmethod - Method* ++void TemplateInterpreterGenerator::lock_method(void) { ++ // synchronize method ++ const int entry_size = frame::interpreter_frame_monitor_size() * wordSize; ++ ++#ifdef ASSERT ++ { Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T0, T0, JVM_ACC_SYNCHRONIZED); ++ __ bne(T0, R0, L); ++ __ delayed()->nop(); ++ __ stop("method doesn't need synchronization"); ++ __ bind(L); ++ } ++#endif // ASSERT ++ // get synchronization object ++ { ++ Label done; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, T0, JVM_ACC_STATIC); ++ __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0)); ++ __ beq(T2, R0, done); ++ __ delayed()->nop(); ++ __ load_mirror(T0, Rmethod, T9); ++ __ bind(done); ++ } ++ // add space for monitor & lock ++ __ daddiu(SP, SP, (-1) * entry_size); // add space for a monitor entry ++ __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ // set new monitor block top ++ __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes()); // store object ++ // FIXME: I do not know what lock_object will do and what it will need ++ __ move(c_rarg0, SP); // object address ++ __ lock_object(c_rarg0); ++} ++ ++// Generate a fixed interpreter frame. This is identical setup for ++// interpreted methods and for native methods hence the shared code. ++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) { ++ ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- T0(sender's sp) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // initialize fixed part of activation frame ++ // sender's sp in Rsender ++ int i = 0; ++ int frame_size = 10; ++#ifndef CORE ++ ++frame_size; ++#endif ++ __ daddiu(SP, SP, (-frame_size) * wordSize); ++ __ sd(RA, SP, (frame_size - 1) * wordSize); // save return address ++ __ sd(FP, SP, (frame_size - 2) * wordSize); // save sender's fp ++ __ daddiu(FP, SP, (frame_size - 2) * wordSize); ++ __ sd(Rsender, FP, (-++i) * wordSize); // save sender's sp ++ __ sd(R0, FP,(-++i) * wordSize); //save last_sp as null ++ __ sd(LVP, FP, (-++i) * wordSize); // save locals offset ++ __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase ++ __ sd(Rmethod, FP, (-++i) * wordSize); // save Method* ++ // Get mirror and store it in the frame as GC root for this Method* ++ __ load_mirror(T2, Rmethod, T9); ++ __ sd(T2, FP, (-++i) * wordSize); // Mirror ++#ifndef CORE ++ if (ProfileInterpreter) { ++ Label method_data_continue; ++ __ ld(AT, Rmethod, in_bytes(Method::method_data_offset())); ++ __ beq(AT, R0, method_data_continue); ++ __ delayed()->nop(); ++ __ daddiu(AT, AT, in_bytes(MethodData::data_offset())); ++ __ bind(method_data_continue); ++ __ sd(AT, FP, (-++i) * wordSize); ++ } else { ++ __ sd(R0, FP, (-++i) * wordSize); ++ } ++#endif // !CORE ++ ++ __ ld(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld(T2, T2, ConstantPool::cache_offset_in_bytes()); ++ __ sd(T2, FP, (-++i) * wordSize); // set constant pool cache ++ if (native_call) { ++ __ sd(R0, FP, (-++i) * wordSize); // no bcp ++ } else { ++ __ sd(BCP, FP, (-++i) * wordSize); // set bcp ++ } ++ __ sd(SP, FP, (-++i) * wordSize); // reserve word for pointer to expression stack bottom ++ assert(i + 2 == frame_size, "i + 2 should be equal to frame_size"); ++} ++ ++// End of helpers ++ ++// Various method entries ++//------------------------------------------------------------------------------------------------------------------------ ++// ++// ++ ++// Method entry for java.lang.ref.Reference.get. ++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) { ++ address entry = __ pc(); ++ Label slow_path; ++ __ b(slow_path); ++ __ delayed()->nop(); ++ ++ // generate a vanilla interpreter entry as the slow path ++ __ bind(slow_path); ++ __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals)); ++ __ delayed()->nop(); ++ return entry; ++} ++ ++// Interpreter stub for calling a native method. (asm interpreter) ++// This sets up a somewhat different looking stack for calling the ++// native method than the typical interpreter frame setup. ++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ // Rsender: sender's sp ++ // Rmethod: Method* ++ address entry_point = __ pc(); ++ ++#ifndef CORE ++ const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset())); ++#endif ++ ++ // get parameter size (always needed) ++ // the size in the java stack ++ __ ld(V0, Rmethod, in_bytes(Method::const_offset())); ++ __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // native calls don't need the stack size check since they have no expression stack ++ // and the arguments are already on the stack and we only add a handful of words ++ // to the stack ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ ++ // for natives the size of locals is zero ++ ++ // compute beginning of parameters (S7) ++ __ dsll(LVP, V0, Address::times_8); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ ++ // add 2 zero-initialized slots for native calls ++ // 1 slot for native oop temp offset (setup via runtime) ++ // 1 slot for static native result handler3 (setup via runtime) ++ __ push2(R0, R0); ++ ++ // Layout of frame at this point ++ // [ method holder mirror ] <--- sp ++ // [ result type info ] ++ // [ argument word n-1 ] <--- T0 ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++#ifndef CORE ++ if (inc_counter) __ lw(T3, invocation_counter); // (pre-)fetch invocation count ++#endif ++ ++ // initialize fixed part of activation frame ++ generate_fixed_frame(true); ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- sender's sp ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ ++ // make sure method is native & not abstract ++#ifdef ASSERT ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_NATIVE); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(AT, T0, JVM_ACC_ABSTRACT); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, NULL, NULL); ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++#endif // CORE ++ ++ bang_stack_shadow_pages(true); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ if (synchronized) { ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, T0, JVM_ACC_SYNCHRONIZED); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // after method_lock, the layout of frame is as following ++ // ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in asm"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ // work registers ++ const Register method = Rmethod; ++ const Register t = T8; ++ ++ __ get_method(method); ++ { ++ Label L, Lstatic; ++ __ ld(t,method,in_bytes(Method::const_offset())); ++ __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset())); ++ // MIPS n64 ABI: caller does not reserve space for the register auguments. ++ // A0 and A1(if needed) ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, AT, JVM_ACC_STATIC); ++ __ beq(AT, R0, Lstatic); ++ __ delayed()->nop(); ++ __ daddiu(t, t, 1); ++ __ bind(Lstatic); ++ __ daddiu(t, t, -7); ++ __ blez(t, L); ++ __ delayed()->nop(); ++ __ dsll(t, t, Address::times_8); ++ __ dsubu(SP, SP, t); ++ __ bind(L); ++ } ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP, SP, AT); ++ __ move(AT, SP); ++ // [ ] <--- sp ++ // ... (size of parameters - 8 ) ++ // [ monitor entry ] ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer (0) ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Mirror ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ method holder mirror ] ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // get signature handler ++ { ++ Label L; ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bne(T9, R0, L); ++ __ delayed()->nop(); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::signature_handler_offset())); ++ __ bind(L); ++ } ++ ++ // call signature handler ++ // FIXME: when change codes in InterpreterRuntime, note this point ++ // from: begin of parameters ++ assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code"); ++ // to: current sp ++ assert(InterpreterRuntime::SignatureHandlerGenerator::to () == SP, "adjust this code"); ++ // temp: T3 ++ assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t , "adjust this code"); ++ ++ __ jalr(T9); ++ __ delayed()->nop(); ++ __ get_method(method); ++ ++ // ++ // if native function is static, and its second parameter has type length of double word, ++ // and first parameter has type length of word, we have to reserve one word ++ // for the first parameter, according to mips o32 abi. ++ // if native function is not static, and its third parameter has type length of double word, ++ // and second parameter has type length of word, we have to reserve one word for the second ++ // parameter. ++ // ++ ++ ++ // result handler is in V0 ++ // set result handler ++ __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize); ++ ++#define FIRSTPARA_SHIFT_COUNT 5 ++#define SECONDPARA_SHIFT_COUNT 9 ++#define THIRDPARA_SHIFT_COUNT 13 ++#define PARA_MASK 0xf ++ ++ // pass mirror handle if static call ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(AT, t, JVM_ACC_STATIC); ++ __ beq(AT, R0, L); ++ __ delayed()->nop(); ++ ++ // get mirror ++ __ load_mirror(t, method, T9); ++ // copy mirror into activation frame ++ __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ // pass handle to mirror ++ __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize); ++ __ move(A1, t); ++ __ bind(L); ++ } ++ ++ // [ mthd holder mirror ptr ] <--- sp --------------------| (only for static method) ++ // [ ] | ++ // ... size of parameters(or +1) | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // get native function entry point ++ { Label L; ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry()); ++ __ bne(V1, T9, L); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method); ++ __ get_method(method); ++ __ ld(T9, method, in_bytes(Method::native_function_offset())); ++ __ bind(L); ++ } ++ ++ // pass JNIEnv ++ // native function in T9 ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset())); ++ __ move(A0, t); ++ // [ jni environment ] <--- sp ++ // [ mthd holder mirror ptr ] ---------------------------->| (only for static method) ++ // [ ] | ++ // ... size of parameters | ++ // [ monitor entry ] | ++ // ... | ++ // [ monitor entry ] | ++ // [ monitor block top ] ( the top monitor entry ) | ++ // [ byte code pointer (0) ] (if native, bcp = 0) | ++ // [ constant pool cache ] | ++ // [ Mirror ] | ++ // [ Method* ] | ++ // [ locals offset ] | ++ // [ sender's sp ] | ++ // [ sender's fp ] | ++ // [ return address ] <--- fp | ++ // [ method holder mirror ] <----------------------------| ++ // [ result type info ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- S7 ++ ++ // set_last_Java_frame_before_call ++ __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset())); ++ // Change state to native (we save the return address in the thread, since it might not ++ // be pushed on the stack when we do a a stack traversal). It is enough that the pc() ++ // points into the right code segment. It does not have to be the correct return pc. ++ __ li(t, __ pc()); ++ __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset())); ++ __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset())); ++ ++ // change thread state ++#ifdef ASSERT ++ { ++ Label L; ++ __ lw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ daddiu(t, t, (-1) * _thread_in_Java); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ __ stop("Wrong thread state in native stub"); ++ __ bind(L); ++ } ++#endif ++ ++ __ move(t, _thread_in_native); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ // call native method ++ __ jalr(T9); ++ __ delayed()->nop(); ++ // result potentially in V0 or F0 ++ ++ ++ // via _last_native_pc and not via _last_jave_sp ++ // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result. ++ // If the order changes or anything else is added to the stack the code in ++ // interpreter_frame_result will have to be changed. ++ //FIXME, should modify here ++ // save return value to keep the value from being destroyed by other calls ++ __ push(dtos); ++ __ push(ltos); ++ ++ // change thread state ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(t, _thread_in_native_trans); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ ++ if(os::is_MP()) { ++ if (UseMembar) { ++ // Force this write out before the read below ++ __ sync(); ++ } else { ++ // Write serialization page so VM thread can do a pseudo remote membar. ++ // We use the current thread pointer to calculate a thread specific ++ // offset to write to within the page. This minimizes bus traffic ++ // due to cache line collision. ++ __ serialize_memory(thread, A0); ++ } ++ } ++ ++ // check for safepoint operation in progress and/or pending suspend requests ++ { Label Continue; ++ ++ // Don't use call_VM as it will see a possible pending exception and forward it ++ // and never return here preventing us from clearing _last_native_pc down below. ++ // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are ++ // preserved and correspond to the bcp/locals pointers. So we do a runtime call ++ // by hand. ++ // ++ Label slow_path; ++ ++ __ safepoint_poll_acquire(slow_path, thread); ++ __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset())); ++ __ beq(AT, R0, Continue); ++ __ delayed()->nop(); ++ __ bind(slow_path); ++ __ move(A0, thread); ++ __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), ++ relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(Continue); ++ } ++ ++ // change thread state ++ __ move(t, _thread_in_Java); ++ if(os::is_MP()) { ++ __ sync(); // store release ++ } ++ __ sw(t, thread, in_bytes(JavaThread::thread_state_offset())); ++ __ reset_last_Java_frame(thread, true); ++ ++ if (CheckJNICalls) { ++ // clear_pending_jni_exception_check ++ __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset())); ++ } ++ ++ // reset handle block ++ __ ld(t, thread, in_bytes(JavaThread::active_handles_offset())); ++ __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes()); ++ ++ // If result was an oop then unbox and save it in the frame ++ { ++ Label no_oop; ++ //FIXME, addi only support 16-bit imeditate ++ __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize); ++ __ li(T0, AbstractInterpreter::result_handler(T_OBJECT)); ++ __ bne(AT, T0, no_oop); ++ __ delayed()->nop(); ++ __ pop(ltos); ++ // Unbox oop result, e.g. JNIHandles::resolve value. ++ __ resolve_jobject(V0, thread, T9); ++ __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize); ++ // keep stack depth as expected by pushing oop which will eventually be discarded ++ __ push(ltos); ++ __ bind(no_oop); ++ } ++ { ++ Label no_reguard; ++ __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset())); ++ __ move(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled); ++ __ bne(t, AT, no_reguard); ++ __ delayed()->nop(); ++ __ pushad(); ++ __ move(S5_heapbase, SP); ++ __ move(AT, -StackAlignmentInBytes); ++ __ andr(SP, SP, AT); ++ __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type); ++ __ delayed()->nop(); ++ __ move(SP, S5_heapbase); ++ __ popad(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++ __ bind(no_reguard); ++ } ++ // restore BCP to have legal interpreter frame, ++ // i.e., bci == 0 <=> BCP == code_base() ++ // Can't call_VM until bcp is within reasonable. ++ __ get_method(method); // method is junk from thread_in_native to now. ++ __ ld(BCP, method, in_bytes(Method::const_offset())); ++ __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset()))); ++ // handle exceptions (exception handling will handle unlocking!) ++ { ++ Label L; ++ __ ld(t, thread, in_bytes(Thread::pending_exception_offset())); ++ __ beq(t, R0, L); ++ __ delayed()->nop(); ++ // Note: At some point we may want to unify this with the code used in ++ // call_VM_base(); ++ // i.e., we should use the StubRoutines::forward_exception code. For now this ++ // doesn't work here because the sp is not correctly set at this point. ++ __ MacroAssembler::call_VM(noreg, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_pending_exception)); ++ __ should_not_reach_here(); ++ __ bind(L); ++ } ++ ++ // do unlocking if necessary ++ { ++ Label L; ++ __ lw(t, method, in_bytes(Method::access_flags_offset())); ++ __ andi(t, t, JVM_ACC_SYNCHRONIZED); ++ __ beq(t, R0, L); ++ // the code below should be shared with interpreter macro assembler implementation ++ { ++ Label unlock; ++ // BasicObjectLock will be first in list, ++ // since this is a synchronized method. However, need ++ // to check that the object has not been unlocked by ++ // an explicit monitorexit bytecode. ++ __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock)); ++ // address of first monitor ++ ++ __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ bne(t, R0, unlock); ++ __ delayed()->nop(); ++ ++ // Entry already unlocked, need to throw exception ++ __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ __ bind(unlock); ++ __ unlock_object(c_rarg0); ++ } ++ __ bind(L); ++ } ++ ++ // jvmti/jvmpi support ++ // Note: This must happen _after_ handling/throwing any exceptions since ++ // the exception handler code notifies the runtime of method exits ++ // too. If this happens before, method entry/exit notifications are ++ // not properly paired (was bug - gri 11/22/99). ++ __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI); ++ ++ // restore potential result in V0, ++ // call result handler to restore potential result in ST0 & handle result ++ ++ __ pop(ltos); ++ __ pop(dtos); ++ ++ __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize); ++ __ jalr(t); ++ __ delayed()->nop(); ++ ++ ++ // remove activation ++ __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp ++ __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address ++ __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++#ifndef CORE ++ if (inc_counter) { ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ // entry_point is the beginning of this ++ // function and checks again for compiled code ++ } ++#endif ++ return entry_point; ++} ++ ++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) { ++ // Quick & dirty stack overflow checking: bang the stack & handle trap. ++ // Note that we do the banging after the frame is setup, since the exception ++ // handling code expects to find a valid interpreter frame on the stack. ++ // Doing the banging earlier fails if the caller frame is not an interpreter ++ // frame. ++ // (Also, the exception throwing code expects to unlock any synchronized ++ // method receiever, so do the banging after locking the receiver.) ++ ++ // Bang each page in the shadow zone. We can't assume it's been done for ++ // an interpreter frame with greater than a page of locals, so each page ++ // needs to be checked. Only true for non-native. ++ if (UseStackBanging) { ++ const int page_size = os::vm_page_size(); ++ const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size; ++ const int start_page = native_call ? n_shadow_pages : 1; ++ BLOCK_COMMENT("bang_stack_shadow_pages:"); ++ for (int pages = start_page; pages <= n_shadow_pages; pages++) { ++ __ bang_stack_with_offset(pages*page_size); ++ } ++ } ++} ++ ++// ++// Generic interpreted method entry to (asm) interpreter ++// ++// Layout of frame just at the entry ++// ++// [ argument word n-1 ] <--- sp ++// ... ++// [ argument word 0 ] ++// assume Method* in Rmethod before call this method. ++// prerequisites to the generated stub : the callee Method* in Rmethod ++// note you must save the caller bcp before call the generated stub ++// ++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) { ++ // determine code generation flags ++ bool inc_counter = UseCompiler || CountCompiledCalls || LogTouchedMethods; ++ ++ // Rmethod: Method* ++ // Rsender: sender 's sp ++ address entry_point = __ pc(); ++ ++ const Address invocation_counter(Rmethod, ++ in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset())); ++ ++ // get parameter size (always needed) ++ __ ld(T3, Rmethod, in_bytes(Method::const_offset())); //T3 --> Rmethod._constMethod ++ __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset())); ++ ++ // Rmethod: Method* ++ // V0: size of parameters ++ // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i ++ // get size of locals in words to T2 ++ __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset())); ++ // T2 = no. of additional locals, locals include parameters ++ __ dsubu(T2, T2, V0); ++ ++ // see if we've got enough room on the stack for locals plus overhead. ++ // Layout of frame at this point ++ // ++ // [ argument word n-1 ] <--- sp ++ // ... ++ // [ argument word 0 ] ++ generate_stack_overflow_check(); ++ // after this function, the layout of frame does not change ++ ++ // compute beginning of parameters (LVP) ++ __ dsll(LVP, V0, LogBytesPerWord); ++ __ daddiu(LVP, LVP, (-1) * wordSize); ++ __ daddu(LVP, LVP, SP); ++ ++ // T2 - # of additional locals ++ // allocate space for locals ++ // explicitly initialize locals ++ { ++ Label exit, loop; ++ __ beq(T2, R0, exit); ++ __ delayed()->nop(); ++ ++ __ bind(loop); ++ __ daddiu(SP, SP, (-1) * wordSize); ++ __ daddiu(T2, T2, -1); // until everything initialized ++ __ bne(T2, R0, loop); ++ __ delayed()->sd(R0, SP, 0); // initialize local variables ++ ++ __ bind(exit); ++ } ++ ++ // ++ // [ local var m-1 ] <--- sp ++ // ... ++ // [ local var 0 ] ++ // [ argument word n-1 ] <--- T0? ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ // initialize fixed part of activation frame ++ ++ generate_fixed_frame(false); ++ ++ ++ // after this function, the layout of frame is as following ++ // ++ // [ monitor block top ] <--- sp ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] <--- fp ++ // [ return address ] ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // make sure method is not native & not abstract ++#ifdef ASSERT ++ __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_NATIVE); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute native method as non-native"); ++ __ bind(L); ++ } ++ { ++ Label L; ++ __ andi(T2, AT, JVM_ACC_ABSTRACT); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("tried to execute abstract method in interpreter"); ++ __ bind(L); ++ } ++#endif ++ ++ // Since at this point in the method invocation the exception handler ++ // would try to exit the monitor of synchronized methods which hasn't ++ // been entered yet, we set the thread local variable ++ // _do_not_unlock_if_synchronized to true. The remove_activation will ++ // check this flag. ++ ++#ifndef OPT_THREAD ++ Register thread = T8; ++ __ get_thread(thread); ++#else ++ Register thread = TREG; ++#endif ++ __ move(AT, (int)true); ++ __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++#ifndef CORE ++ ++ // mdp : T8 ++ // tmp1: T9 ++ // tmp2: T2 ++ __ profile_parameters_type(T8, T9, T2); ++ ++ // increment invocation count & check for overflow ++ Label invocation_counter_overflow; ++ Label profile_method; ++ Label profile_method_continue; ++ if (inc_counter) { ++ generate_counter_incr(&invocation_counter_overflow, ++ &profile_method, ++ &profile_method_continue); ++ if (ProfileInterpreter) { ++ __ bind(profile_method_continue); ++ } ++ } ++ ++ Label continue_after_compile; ++ __ bind(continue_after_compile); ++ ++#endif // CORE ++ ++ bang_stack_shadow_pages(false); ++ ++ // reset the _do_not_unlock_if_synchronized flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset())); ++ ++ // check for synchronized methods ++ // Must happen AFTER invocation_counter check and stack overflow check, ++ // so method is not locked if overflows. ++ // ++ if (synchronized) { ++ // Allocate monitor and lock method ++ lock_method(); ++ } else { ++ // no synchronization necessary ++#ifdef ASSERT ++ { Label L; ++ __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset())); ++ __ andi(T2, AT, JVM_ACC_SYNCHRONIZED); ++ __ beq(T2, R0, L); ++ __ delayed()->nop(); ++ __ stop("method needs synchronization"); ++ __ bind(L); ++ } ++#endif ++ } ++ ++ // layout of frame after lock_method ++ // [ monitor entry ] <--- sp ++ // ... ++ // [ monitor entry ] ++ // [ monitor block top ] ( the top monitor entry ) ++ // [ byte code pointer ] (if native, bcp = 0) ++ // [ constant pool cache ] ++ // [ Method* ] ++ // [ locals offset ] ++ // [ sender's sp ] ++ // [ sender's fp ] ++ // [ return address ] <--- fp ++ // [ local var m-1 ] ++ // ... ++ // [ local var 0 ] ++ // [ argumnet word n-1 ] <--- ( sender's sp ) ++ // ... ++ // [ argument word 0 ] <--- LVP ++ ++ ++ // start execution ++#ifdef ASSERT ++ { ++ Label L; ++ __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ beq(AT, SP, L); ++ __ delayed()->nop(); ++ __ stop("broken stack frame setup in interpreter in native"); ++ __ bind(L); ++ } ++#endif ++ ++ // jvmti/jvmpi support ++ __ notify_method_entry(); ++ ++ __ dispatch_next(vtos); ++ ++ // invocation counter overflow ++ if (inc_counter) { ++ if (ProfileInterpreter) { ++ // We have decided to profile this method in the interpreter ++ __ bind(profile_method); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ get_method(Rmethod); ++ __ b(profile_method_continue); ++ __ delayed()->nop(); ++ } ++ // Handle overflow of counter and compile method ++ __ bind(invocation_counter_overflow); ++ generate_counter_overflow(continue_after_compile); ++ } ++ ++ return entry_point; ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateInterpreterGenerator::generate_throw_exception() { ++ // Entry point in previous activation (i.e., if the caller was ++ // interpreted) ++ Interpreter::_rethrow_exception_entry = __ pc(); ++ // Restore sp to interpreter_frame_last_sp even though we are going ++ // to empty the expression stack for the exception processing. ++ __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ // V0: exception ++ // V1: return address/pc that threw exception ++ __ restore_bcp(); // BCP points to call/send ++ __ restore_locals(); ++ ++ //add for compressedoops ++ __ reinit_heapbase(); ++ // Entry point for exceptions thrown within interpreter code ++ Interpreter::_throw_exception_entry = __ pc(); ++ // expression stack is undefined here ++ // V0: exception ++ // BCP: exception bcp ++ __ verify_oop(V0); ++ ++ // expression stack must be empty before entering the VM in case of an exception ++ __ empty_expression_stack(); ++ // find exception handler address and preserve exception oop ++ __ move(A1, V0); ++ __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1); ++ // V0: exception handler entry point ++ // V1: preserved exception oop ++ // S0: bcp for exception handler ++ __ push(V1); // push exception which is now the only value on the stack ++ __ jr(V0); // jump to exception handler (may be _remove_activation_entry!) ++ __ delayed()->nop(); ++ ++ // If the exception is not handled in the current frame the frame is removed and ++ // the exception is rethrown (i.e. exception continuation is _rethrow_exception). ++ // ++ // Note: At this point the bci is still the bxi for the instruction which caused ++ // the exception and the expression stack is empty. Thus, for any VM calls ++ // at this point, GC will find a legal oop map (with empty expression stack). ++ ++ // In current activation ++ // V0: exception ++ // BCP: exception bcp ++ ++ // ++ // JVMTI PopFrame support ++ // ++ ++ Interpreter::_remove_activation_preserving_args_entry = __ pc(); ++ __ empty_expression_stack(); ++ // Set the popframe_processing bit in pending_popframe_condition indicating that we are ++ // currently handling popframe, so that call_VMs that may happen later do not trigger new ++ // popframe handling cycles. ++#ifndef OPT_THREAD ++ Register thread = T2; ++ __ get_thread(T2); ++#else ++ Register thread = TREG; ++#endif ++ __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ __ ori(T3, T3, JavaThread::popframe_processing_bit); ++ __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#ifndef CORE ++ { ++ // Check to see whether we are returning to a deoptimized frame. ++ // (The PopFrame call ensures that the caller of the popped frame is ++ // either interpreted or compiled and deoptimizes it if compiled.) ++ // In this case, we can't call dispatch_next() after the frame is ++ // popped, but instead must save the incoming arguments and restore ++ // them after deoptimization has occurred. ++ // ++ // Note that we don't compare the return PC against the ++ // deoptimization blob's unpack entry because of the presence of ++ // adapter frames in C2. ++ Label caller_not_deoptimized; ++ __ ld(A0, FP, frame::return_addr_offset * wordSize); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0); ++ __ bne(V0, R0, caller_not_deoptimized); ++ __ delayed()->nop(); ++ ++ // Compute size of arguments for saving when returning to deoptimized caller ++ __ get_method(A1); ++ __ verify_oop(A1); ++ __ ld( A1, A1, in_bytes(Method::const_offset())); ++ __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset())); ++ __ shl(A1, Interpreter::logStackElementSize); ++ __ restore_locals(); ++ __ dsubu(A2, LVP, A1); ++ __ daddiu(A2, A2, wordSize); ++ // Save these arguments ++#ifndef OPT_THREAD ++ __ get_thread(A0); ++#else ++ __ move(A0, TREG); ++#endif ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2); ++ ++ __ remove_activation(vtos, T9, false, false, false); ++ ++ // Inform deoptimization that it is responsible for restoring these arguments ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ // Continue in deoptimization handler ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(caller_not_deoptimized); ++ } ++#endif /* !CORE */ ++ ++ __ remove_activation(vtos, T3, ++ /* throw_monitor_exception */ false, ++ /* install_monitor_exception */ false, ++ /* notify_jvmdi */ false); ++ ++ // Clear the popframe condition flag ++ // Finish with popframe handling ++ // A previous I2C followed by a deoptimization might have moved the ++ // outgoing arguments further up the stack. PopFrame expects the ++ // mutations to those outgoing arguments to be preserved and other ++ // constraints basically require this frame to look exactly as ++ // though it had previously invoked an interpreted activation with ++ // no space between the top of the expression stack (current ++ // last_sp) and the top of stack. Rather than force deopt to ++ // maintain this kind of invariant all the time we call a small ++ // fixup routine to move the mutated arguments onto the top of our ++ // expression stack if necessary. ++ __ move(T8, SP); ++ __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ // PC must point into interpreter here ++ __ set_last_Java_frame(thread, noreg, FP, __ pc()); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2); ++ __ reset_last_Java_frame(thread, true); ++ // Restore the last_sp and null it out ++ __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize); ++ ++ ++ ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++ // Finish with popframe handling ++ __ restore_bcp(); ++ __ restore_locals(); ++#ifndef CORE ++ // The method data pointer was incremented already during ++ // call profiling. We have to restore the mdp for the current bcp. ++ if (ProfileInterpreter) { ++ __ set_method_data_pointer_for_bcp(); ++ } ++#endif // !CORE ++ // Clear the popframe condition flag ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ move(AT, JavaThread::popframe_inactive); ++ __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset())); ++ ++#if INCLUDE_JVMTI ++ { ++ Label L_done; ++ ++ __ lbu(AT, BCP, 0); ++ __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic); ++ __ bne(AT, R0, L_done); ++ __ delayed()->nop(); ++ ++ // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call. ++ // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL. ++ ++ __ get_method(T9); ++ __ ld(T8, LVP, 0); ++ __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP); ++ ++ __ beq(T8, R0, L_done); ++ __ delayed()->nop(); ++ ++ __ sd(T8, SP, 0); ++ __ bind(L_done); ++ } ++#endif // INCLUDE_JVMTI ++ ++ __ dispatch_next(vtos); ++ // end of PopFrame support ++ ++ Interpreter::_remove_activation_entry = __ pc(); ++ ++ // preserve exception over this code sequence ++ __ pop(T0); ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset())); ++ // remove the activation (without doing throws on illegalMonitorExceptions) ++ __ remove_activation(vtos, T3, false, true, false); ++ // restore exception ++ __ get_vm_result(T0, thread); ++ __ verify_oop(T0); ++ ++ // In between activations - previous activation type unknown yet ++ // compute continuation point - the continuation point expects ++ // the following registers set up: ++ // ++ // T0: exception ++ // T1: return address/pc that threw exception ++ // SP: expression stack of caller ++ // FP: fp of caller ++ __ push2(T0, T3); // save exception and return address ++ __ move(A1, T3); ++ __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1); ++ __ move(T9, V0); // save exception handler ++ __ pop2(V0, V1); // restore return address and exception ++ ++ // Note that an "issuing PC" is actually the next PC after the call ++ __ jr(T9); // jump to exception handler of caller ++ __ delayed()->nop(); ++} ++ ++ ++// ++// JVMTI ForceEarlyReturn support ++// ++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) { ++ address entry = __ pc(); ++ __ restore_bcp(); ++ __ restore_locals(); ++ __ empty_expression_stack(); ++ __ empty_FPU_stack(); ++ __ load_earlyret_value(state); ++ ++#ifndef OPT_THREAD ++ __ get_thread(TREG); ++#endif ++ __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset())); ++ const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset())); ++ // Clear the earlyret state ++ __ move(AT, JvmtiThreadState::earlyret_inactive); ++ __ sw(AT, cond_addr); ++ __ sync(); ++ ++ ++ __ remove_activation(state, T0, ++ false, /* throw_monitor_exception */ ++ false, /* install_monitor_exception */ ++ true); /* notify_jvmdi */ ++ __ sync(); ++ __ jr(T0); ++ __ delayed()->nop(); ++ return entry; ++} // end of ForceEarlyReturn support ++ ++ ++//----------------------------------------------------------------------------- ++// Helper for vtos entry point generation ++ ++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t, ++ address& bep, ++ address& cep, ++ address& sep, ++ address& aep, ++ address& iep, ++ address& lep, ++ address& fep, ++ address& dep, ++ address& vep) { ++ assert(t->is_valid() && t->tos_in() == vtos, "illegal template"); ++ Label L; ++ fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop(); ++ dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop(); ++ lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop(); ++ aep =__ pc(); __ push(atos); __ b(L); __ delayed()->nop(); ++ bep = cep = sep = ++ iep = __ pc(); __ push(itos); ++ vep = __ pc(); ++ __ bind(L); ++ generate_and_dispatch(t); ++} ++ ++ ++/* ++//----------------------------------------------------------------------------- ++// Generation of individual instructions ++ ++// helpers for generate_and_dispatch ++ ++ ++InterpreterGenerator::InterpreterGenerator(StubQueue* code) ++ : TemplateInterpreterGenerator(code) { ++ generate_all(); // down here so it can be "virtual" ++} ++*/ ++ ++//----------------------------------------------------------------------------- ++ ++// Non-product code ++#ifndef PRODUCT ++address TemplateInterpreterGenerator::generate_trace_code(TosState state) { ++ address entry = __ pc(); ++ ++ // prepare expression stack ++ __ push(state); // save tosca ++ ++ // tos & tos2 ++ // trace_bytecode need actually 4 args, the last two is tos&tos2 ++ // this work fine for x86. but mips o32 call convention will store A2-A3 ++ // to the stack position it think is the tos&tos2 ++ // when the expression stack have no more than 2 data, error occur. ++ __ ld(A2, SP, 0); ++ __ ld(A3, SP, 1 * wordSize); ++ ++ // pass arguments & call tracer ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3); ++ __ move(RA, V0); // make sure return address is not destroyed by pop(state) ++ ++ // restore expression stack ++ __ pop(state); // restore tosca ++ ++ // return ++ __ jr(RA); ++ __ delayed()->nop(); ++ ++ return entry; ++} ++ ++void TemplateInterpreterGenerator::count_bytecode() { ++ __ li(T8, (long)&BytecodeCounter::_counter_value); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) { ++ __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) { ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ lw(T9, T8, 0); ++ __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes); ++ __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes); ++ __ orr(T9, T9, T8); ++ __ li(T8, (long)&BytecodePairHistogram::_index); ++ __ sw(T9, T8, 0); ++ __ dsll(T9, T9, 2); ++ __ li(T8, (long)BytecodePairHistogram::_counters); ++ __ daddu(T8, T8, T9); ++ __ lw(AT, T8, 0); ++ __ daddiu(AT, AT, 1); ++ __ sw(AT, T8, 0); ++} ++ ++ ++void TemplateInterpreterGenerator::trace_bytecode(Template* t) { ++ // Call a little run-time stub to avoid blow-up for each bytecode. ++ // The run-time runtime saves the right registers, depending on ++ // the tosca in-state for the given template. ++ ++ address entry = Interpreter::trace_code(t->tos_in()); ++ assert(entry != NULL, "entry must have been generated"); ++ __ call(entry, relocInfo::none); ++ __ delayed()->nop(); ++ //add for compressedoops ++ __ reinit_heapbase(); ++} ++ ++ ++void TemplateInterpreterGenerator::stop_interpreter_at() { ++ Label L; ++ __ li(T8, long(&BytecodeCounter::_counter_value)); ++ __ lw(T8, T8, 0); ++ __ move(AT, StopInterpreterAt); ++ __ bne(T8, AT, L); ++ __ delayed()->nop(); ++ __ brk(5); ++ __ delayed()->nop(); ++ __ bind(L); ++} ++#endif // !PRODUCT +diff --git a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp +new file mode 100644 +index 0000000000..46a88aba26 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP ++ ++ static void prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index = noreg, // itable index, MethodType, etc. ++ Register recv = noreg, // if caller wants to see it ++ Register flags = noreg // if caller wants to test it ++ ); ++ static void invokevirtual_helper(Register index, Register recv, ++ Register flags); ++ static void volatile_barrier(); ++ ++ // Helpers ++ static void index_check(Register array, Register index); ++ static void index_check_without_pop(Register array, Register index); ++ ++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP +diff --git a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp +new file mode 100644 +index 0000000000..5265483830 +--- /dev/null ++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp +@@ -0,0 +1,4688 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "interpreter/interpreter.hpp" ++#include "interpreter/interpreterRuntime.hpp" ++#include "interpreter/interp_masm.hpp" ++#include "interpreter/templateTable.hpp" ++#include "memory/universe.hpp" ++#include "oops/methodData.hpp" ++#include "oops/objArrayKlass.hpp" ++#include "oops/oop.inline.hpp" ++#include "prims/methodHandles.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/synchronizer.hpp" ++#include "utilities/macros.hpp" ++ ++ ++#ifndef CC_INTERP ++ ++#define __ _masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++// Platform-dependent initialization ++ ++void TemplateTable::pd_initialize() { ++ // No mips specific initialization ++} ++ ++// Address computation: local variables ++ ++static inline Address iaddress(int n) { ++ return Address(LVP, Interpreter::local_offset_in_bytes(n)); ++} ++ ++static inline Address laddress(int n) { ++ return iaddress(n + 1); ++} ++ ++static inline Address faddress(int n) { ++ return iaddress(n); ++} ++ ++static inline Address daddress(int n) { ++ return laddress(n); ++} ++ ++static inline Address aaddress(int n) { ++ return iaddress(n); ++} ++static inline Address haddress(int n) { return iaddress(n + 0); } ++ ++ ++static inline Address at_sp() { return Address(SP, 0); } ++static inline Address at_sp_p1() { return Address(SP, 1 * wordSize); } ++static inline Address at_sp_p2() { return Address(SP, 2 * wordSize); } ++ ++// At top of Java expression stack which may be different than sp(). It ++// isn't for category 1 objects. ++static inline Address at_tos () { ++ Address tos = Address(SP, Interpreter::expr_offset_in_bytes(0)); ++ return tos; ++} ++ ++static inline Address at_tos_p1() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(1)); ++} ++ ++static inline Address at_tos_p2() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(2)); ++} ++ ++static inline Address at_tos_p3() { ++ return Address(SP, Interpreter::expr_offset_in_bytes(3)); ++} ++ ++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator ++Address TemplateTable::at_bcp(int offset) { ++ assert(_desc->uses_bcp(), "inconsistent uses_bcp information"); ++ return Address(BCP, offset); ++} ++ ++// Miscelaneous helper routines ++// Store an oop (or NULL) at the address described by obj. ++// If val == noreg this means store a NULL ++ ++static void do_oop_store(InterpreterMacroAssembler* _masm, ++ Address dst, ++ Register val, ++ DecoratorSet decorators = 0) { ++ assert(val == noreg || val == V0, "parameter is just for looks"); ++ __ store_heap_oop(dst, val, T9, T1, decorators); ++} ++ ++static void do_oop_load(InterpreterMacroAssembler* _masm, ++ Address src, ++ Register dst, ++ DecoratorSet decorators = 0) { ++ __ load_heap_oop(dst, src, T9, T1, decorators); ++} ++ ++// bytecode folding ++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg, ++ Register tmp_reg, bool load_bc_into_bc_reg/*=true*/, ++ int byte_no) { ++ if (!RewriteBytecodes) return; ++ Label L_patch_done; ++ ++ switch (bc) { ++ case Bytecodes::_fast_aputfield: ++ case Bytecodes::_fast_bputfield: ++ case Bytecodes::_fast_zputfield: ++ case Bytecodes::_fast_cputfield: ++ case Bytecodes::_fast_dputfield: ++ case Bytecodes::_fast_fputfield: ++ case Bytecodes::_fast_iputfield: ++ case Bytecodes::_fast_lputfield: ++ case Bytecodes::_fast_sputfield: ++ { ++ // We skip bytecode quickening for putfield instructions when ++ // the put_code written to the constant pool cache is zero. ++ // This is required so that every execution of this instruction ++ // calls out to InterpreterRuntime::resolve_get_put to do ++ // additional, required work. ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ assert(load_bc_into_bc_reg, "we use bc_reg as temp"); ++ __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1); ++ __ daddiu(bc_reg, R0, bc); ++ __ beq(tmp_reg, R0, L_patch_done); ++ __ delayed()->nop(); ++ } ++ break; ++ default: ++ assert(byte_no == -1, "sanity"); ++ // the pair bytecodes have already done the load. ++ if (load_bc_into_bc_reg) { ++ __ move(bc_reg, bc); ++ } ++ } ++ ++ if (JvmtiExport::can_post_breakpoint()) { ++ Label L_fast_patch; ++ // if a breakpoint is present we can't rewrite the stream directly ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, Bytecodes::_breakpoint); ++ __ bne(tmp_reg, AT, L_fast_patch); ++ __ delayed()->nop(); ++ ++ __ get_method(tmp_reg); ++ // Let breakpoint table handling rewrite to quicker bytecode ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg); ++ ++ __ b(L_patch_done); ++ __ delayed()->nop(); ++ __ bind(L_fast_patch); ++ } ++ ++#ifdef ASSERT ++ Label L_okay; ++ __ lbu(tmp_reg, at_bcp(0)); ++ __ move(AT, (int)Bytecodes::java_code(bc)); ++ __ beq(tmp_reg, AT, L_okay); ++ __ delayed()->nop(); ++ __ beq(tmp_reg, bc_reg, L_patch_done); ++ __ delayed()->nop(); ++ __ stop("patching the wrong bytecode"); ++ __ bind(L_okay); ++#endif ++ ++ // patch bytecode ++ __ sb(bc_reg, at_bcp(0)); ++ __ bind(L_patch_done); ++} ++ ++ ++// Individual instructions ++ ++void TemplateTable::nop() { ++ transition(vtos, vtos); ++ // nothing to do ++} ++ ++void TemplateTable::shouldnotreachhere() { ++ transition(vtos, vtos); ++ __ stop("shouldnotreachhere bytecode"); ++} ++ ++void TemplateTable::aconst_null() { ++ transition(vtos, atos); ++ __ move(FSR, R0); ++} ++ ++void TemplateTable::iconst(int value) { ++ transition(vtos, itos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::lconst(int value) { ++ transition(vtos, ltos); ++ if (value == 0) { ++ __ move(FSR, R0); ++ } else { ++ __ move(FSR, value); ++ } ++} ++ ++void TemplateTable::fconst(int value) { ++ transition(vtos, ftos); ++ switch( value ) { ++ case 0: __ mtc1(R0, FSF); return; ++ case 1: __ addiu(AT, R0, 1); break; ++ case 2: __ addiu(AT, R0, 2); break; ++ default: ShouldNotReachHere(); ++ } ++ __ mtc1(AT, FSF); ++ __ cvt_s_w(FSF, FSF); ++} ++ ++void TemplateTable::dconst(int value) { ++ transition(vtos, dtos); ++ switch( value ) { ++ case 0: __ dmtc1(R0, FSF); ++ return; ++ case 1: __ daddiu(AT, R0, 1); ++ __ dmtc1(AT, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ default: ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::bipush() { ++ transition(vtos, itos); ++ __ lb(FSR, at_bcp(1)); ++} ++ ++void TemplateTable::sipush() { ++ transition(vtos, itos); ++ __ lb(FSR, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(FSR, FSR, 8); ++ __ orr(FSR, FSR, AT); ++} ++ ++// T1 : tags ++// T2 : index ++// T3 : cpool ++// T8 : tag ++void TemplateTable::ldc(bool wide) { ++ transition(vtos, vtos); ++ Label call_ldc, notFloat, notClass, notInt, Done; ++ // get index in cpool ++ if (wide) { ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ } else { ++ __ lbu(T2, at_bcp(1)); ++ } ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type ++ if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ //now T1 is the tag ++ ++ // unresolved class - get the resolved class ++ __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // unresolved class in error (resolution failed) - call into runtime ++ // so that the same error from first resolution attempt is thrown. ++ __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError); ++ __ beq(AT, R0, call_ldc); ++ __ delayed()->nop(); ++ ++ // resolved class - need to call vm to get java mirror of the class ++ __ daddiu(AT, T1, - JVM_CONSTANT_Class); ++ __ bne(AT, R0, notClass); ++ __ delayed()->dsll(T2, T2, Address::times_8); ++ ++ __ bind(call_ldc); ++ __ move(A1, wide); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1); ++ //__ push(atos); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->sd(FSR, SP, 0); // added for performance issue ++ ++ __ bind(notClass); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Float); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ lwc1(FSF, AT, base_offset); ++ } ++ //__ push_f(); ++ __ daddiu(SP, SP, - Interpreter::stackElementSize); ++ __ b(Done); ++ __ delayed()->swc1(FSF, SP, 0); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Integer); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) { ++ __ gslwx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(T0, T3, T2); ++ __ lw(FSR, T0, base_offset); ++ } ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // assume the tag is for condy; if not, the VM runtime will tell us ++ __ bind(notInt); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++void TemplateTable::condy_helper(Label& Done) { ++ const Register obj = FSR; ++ const Register off = SSR; ++ const Register flags = T3; ++ const Register rarg = A1; ++ __ move(rarg, (int)bytecode()); ++ __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg); ++ __ get_vm_result_2(flags, TREG); ++ // VMr = obj = base address to find primitive value to push ++ // VMr2 = flags = (tos, off) using format of CPCE::_flags ++ __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask); ++ __ daddu(obj, off, obj); ++ const Address field(obj, 0 * wordSize); ++ ++ // What sort of thing are we loading? ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ ++ switch (bytecode()) { ++ case Bytecodes::_ldc: ++ case Bytecodes::_ldc_w: ++ { ++ // tos in (itos, ftos, stos, btos, ctos, ztos) ++ Label notInt, notFloat, notShort, notByte, notChar, notBool; ++ __ daddiu(AT, flags, -itos); ++ __ bne(AT, R0, notInt); ++ __ delayed()->nop(); ++ // itos ++ __ ld(obj, field); ++ __ push(itos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ daddiu(AT, flags, -ftos); ++ __ bne(AT, R0, notFloat); ++ __ delayed()->nop(); ++ // ftos ++ __ lwc1(FSF, field); ++ __ push(ftos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ daddiu(AT, flags, -stos); ++ __ bne(AT, R0, notShort); ++ __ delayed()->nop(); ++ // stos ++ __ lh(obj, field); ++ __ push(stos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ daddiu(AT, flags, -btos); ++ __ bne(AT, R0, notByte); ++ __ delayed()->nop(); ++ // btos ++ __ lb(obj, field); ++ __ push(btos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notByte); ++ __ daddiu(AT, flags, -ctos); ++ __ bne(AT, R0, notChar); ++ __ delayed()->nop(); ++ // ctos ++ __ lhu(obj, field); ++ __ push(ctos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ daddiu(AT, flags, -ztos); ++ __ bne(AT, R0, notBool); ++ __ delayed()->nop(); ++ // ztos ++ __ lbu(obj, field); ++ __ push(ztos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notBool); ++ break; ++ } ++ ++ case Bytecodes::_ldc2_w: ++ { ++ Label notLong, notDouble; ++ __ daddiu(AT, flags, -ltos); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ // ltos ++ __ ld(obj, field); ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ daddiu(AT, flags, -dtos); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ // dtos ++ __ ldc1(FSF, field); ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ break; ++ } ++ ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ __ stop("bad ldc/condy"); ++} ++ ++// Fast path for caching oop constants. ++void TemplateTable::fast_aldc(bool wide) { ++ transition(vtos, atos); ++ ++ Register result = FSR; ++ Register tmp = SSR; ++ Register rarg = A1; ++ int index_size = wide ? sizeof(u2) : sizeof(u1); ++ ++ Label resolved; ++ ++ // We are resolved if the resolved reference cache entry contains a ++ // non-null object (String, MethodType, etc.) ++ assert_different_registers(result, tmp); ++ __ get_cache_index_at_bcp(tmp, 1, index_size); ++ __ load_resolved_reference_at_index(result, tmp, T9); ++ __ bne(result, R0, resolved); ++ __ delayed()->nop(); ++ ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc); ++ // first time invocation - must resolve first ++ int i = (int)bytecode(); ++ __ move(rarg, i); ++ __ call_VM(result, entry, rarg); ++ ++ __ bind(resolved); ++ ++ { // Check for the null sentinel. ++ // If we just called the VM, it already did the mapping for us, ++ // but it's harmless to retry. ++ Label notNull; ++ __ set64(rarg, (long)Universe::the_null_sentinel_addr()); ++ __ ld_ptr(tmp, Address(rarg)); ++ __ bne(tmp, result, notNull); ++ __ delayed()->nop(); ++ __ xorr(result, result, result); // NULL object reference ++ __ bind(notNull); ++ } ++ ++ if (VerifyOops) { ++ __ verify_oop(result); ++ } ++} ++ ++ ++// used register: T2, T3, T1 ++// T2 : index ++// T3 : cpool ++// T1 : tag ++void TemplateTable::ldc2_w() { ++ transition(vtos, vtos); ++ Label notDouble, notLong, Done; ++ ++ // get index in cpool ++ __ get_unsigned_2_byte_index_at_bcp(T2, 1); ++ ++ __ get_cpool_and_tags(T3, T1); ++ ++ const int base_offset = ConstantPool::header_size() * wordSize; ++ const int tags_offset = Array::base_offset_in_bytes(); ++ ++ // get type in T1 ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(T1, T1, T2, tags_offset); ++ } else { ++ __ daddu(AT, T1, T2); ++ __ lb(T1, AT, tags_offset); ++ } ++ ++ __ daddiu(AT, T1, -JVM_CONSTANT_Double); ++ __ bne(AT, R0, notDouble); ++ __ delayed()->nop(); ++ ++ // dtos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldxc1(FSF, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ldc1(FSF, AT, base_offset); ++ } ++ __ push(dtos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ daddiu(AT, T1, -JVM_CONSTANT_Long); ++ __ bne(AT, R0, notLong); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ dsll(T2, T2, Address::times_8); ++ if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) { ++ __ gsldx(FSR, T3, T2, base_offset); ++ } else { ++ __ daddu(AT, T3, T2); ++ __ ld(FSR, AT, base_offset); ++ } ++ __ push(ltos); ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ condy_helper(Done); ++ ++ __ bind(Done); ++} ++ ++// we compute the actual local variable address here ++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here ++void TemplateTable::locals_index(Register reg, int offset) { ++ __ lbu(reg, at_bcp(offset)); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++void TemplateTable::iload() { ++ iload_internal(); ++} ++ ++void TemplateTable::nofast_iload() { ++ iload_internal(may_not_rewrite); ++} ++ ++// this method will do bytecode folding of the two form: ++// iload iload iload caload ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::iload_internal(RewriteControl rc) { ++ transition(vtos, itos); ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload))); ++ // if _iload, wait to rewrite to iload2. We only want to rewrite the ++ // last two iloads in a pair. Comparing against fast_iload means that ++ // the next bytecode is neither an iload or a caload, and therefore ++ // an iload pair. ++ __ move(AT, Bytecodes::_iload); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ __ move(T3, Bytecodes::_fast_iload2); ++ __ move(AT, Bytecodes::_fast_iload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _caload, rewrite to fast_icaload ++ __ move(T3, Bytecodes::_fast_icaload); ++ __ move(AT, Bytecodes::_caload); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // rewrite so iload doesn't check again. ++ __ move(T3, Bytecodes::_fast_iload); ++ ++ // rewrite ++ // T3 : fast bytecode ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_iload, T3, T2, false); ++ __ bind(done); ++ } ++ ++ // Get the local value into tos ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload2() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ push(itos); ++ locals_index(T2, 3); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fast_iload() { ++ transition(vtos, itos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::lload() { ++ transition(vtos, ltos); ++ locals_index(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::fload() { ++ transition(vtos, ftos); ++ locals_index(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::dload() { ++ transition(vtos, dtos); ++ locals_index(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::aload() { ++ transition(vtos, atos); ++ locals_index(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++void TemplateTable::locals_index_wide(Register reg) { ++ __ get_unsigned_2_byte_index_at_bcp(reg, 2); ++ __ dsll(reg, reg, Address::times_8); ++ __ dsubu(reg, LVP, reg); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_iload() { ++ transition(vtos, itos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_lload() { ++ transition(vtos, ltos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_fload() { ++ transition(vtos, ftos); ++ locals_index_wide(T2); ++ __ lwc1(FSF, T2, 0); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_dload() { ++ transition(vtos, dtos); ++ locals_index_wide(T2); ++ __ ldc1(FSF, T2, -wordSize); ++} ++ ++// used register T2 ++// T2 : index ++void TemplateTable::wide_aload() { ++ transition(vtos, atos); ++ locals_index_wide(T2); ++ __ ld(FSR, T2, 0); ++} ++ ++// we use A2 as the regiser for index, BE CAREFUL! ++// we dont use our tge 29 now, for later optimization ++void TemplateTable::index_check(Register array, Register index) { ++ // Pop ptr into array ++ __ pop_ptr(array); ++ index_check_without_pop(array, index); ++} ++ ++void TemplateTable::index_check_without_pop(Register array, Register index) { ++ // destroys A2 ++ // check array ++ __ null_check(array, arrayOopDesc::length_offset_in_bytes()); ++ ++ // sign extend since tos (index) might contain garbage in upper bits ++ __ sll(index, index, 0); ++ ++ // check index ++ Label ok; ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++#ifndef OPT_RANGECHECK ++ __ sltu(AT, index, AT); ++ __ bne(AT, R0, ok); ++ __ delayed()->nop(); ++ ++ //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2 ++ if (A1 != array) __ move(A1, array); ++ if (A2 != index) __ move(A2, index); ++ __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry); ++ __ delayed()->nop(); ++ __ bind(ok); ++#else ++ __ lw(AT, array, arrayOopDesc::length_offset_in_bytes()); ++ __ move(A2, index); ++ __ tgeu(A2, AT, 29); ++#endif ++} ++ ++void TemplateTable::iaload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); ++ ++ __ warn("iaload Unimplemented yet"); ++ __ gslwle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::laload() { ++ transition(itos, ltos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); ++ ++ __ warn("laload Unimplemented yet"); ++ __ gsldle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, Address::times_8); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::faload() { ++ transition(itos, ftos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ shl(AT, 2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); ++ ++ __ warn("faload Unimplemented yet"); ++ __ gslwlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ shl(FSR, 2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::daload() { ++ transition(itos, dtos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, 3); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, 3); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); ++ ++ __ warn("daload Unimplemented yet"); ++ __ gsldlec1(FSF, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(AT, FSR, 3); ++ __ daddu(T9, SSR, AT); ++ __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::aaload() { ++ transition(itos, atos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8); ++ __ daddu(FSR, SSR, FSR); ++ //add for compressedoops ++ do_oop_load(_masm, ++ Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), ++ FSR, ++ IS_ARRAY); ++} ++ ++void TemplateTable::baload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR:index ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("baload Unimplemented yet"); ++ __ gslble(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::caload() { ++ transition(itos, itos); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++// iload followed by caload frequent pair ++// used register : T2 ++// T2 : index ++void TemplateTable::fast_icaload() { ++ transition(vtos, itos); ++ // load index out of locals ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, 1); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg); ++} ++ ++void TemplateTable::saload() { ++ transition(itos, itos); ++ if(UseBoundCheckInstruction) { ++ __ pop(SSR); //SSR:array FSR: index ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes()); //bound ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, SSR, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT)); ++ ++ __ warn("saload Unimplemented yet"); ++ __ gslhle(FSR, FSR, AT); ++ } else { ++ index_check(SSR, FSR); ++ __ dsll(FSR, FSR, Address::times_2); ++ __ daddu(FSR, SSR, FSR); ++ __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg); ++ } ++} ++ ++void TemplateTable::iload(int n) { ++ transition(vtos, itos); ++ __ lw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lload(int n) { ++ transition(vtos, ltos); ++ __ ld(FSR, laddress(n)); ++} ++ ++void TemplateTable::fload(int n) { ++ transition(vtos, ftos); ++ __ lwc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dload(int n) { ++ transition(vtos, dtos); ++ __ ldc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::aload(int n) { ++ transition(vtos, atos); ++ __ ld(FSR, aaddress(n)); ++} ++ ++void TemplateTable::aload_0() { ++ aload_0_internal(); ++} ++ ++void TemplateTable::nofast_aload_0() { ++ aload_0_internal(may_not_rewrite); ++} ++ ++// used register : T2, T3 ++// T2 : bytecode ++// T3 : folded code ++void TemplateTable::aload_0_internal(RewriteControl rc) { ++ transition(vtos, atos); ++ // According to bytecode histograms, the pairs: ++ // ++ // _aload_0, _fast_igetfield ++ // _aload_0, _fast_agetfield ++ // _aload_0, _fast_fgetfield ++ // ++ // occur frequently. If RewriteFrequentPairs is set, the (slow) ++ // _aload_0 bytecode checks if the next bytecode is either ++ // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then ++ // rewrites the current bytecode into a pair bytecode; otherwise it ++ // rewrites the current bytecode into _fast_aload_0 that doesn't do ++ // the pair check anymore. ++ // ++ // Note: If the next bytecode is _getfield, the rewrite must be ++ // delayed, otherwise we may miss an opportunity for a pair. ++ // ++ // Also rewrite frequent pairs ++ // aload_0, aload_1 ++ // aload_0, iload_1 ++ // These bytecodes with a small amount of code are most profitable ++ // to rewrite ++ if (RewriteFrequentPairs && rc == may_rewrite) { ++ Label rewrite, done; ++ // get the next bytecode in T2 ++ __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0))); ++ ++ // do actual aload_0 ++ aload(0); ++ ++ // if _getfield then wait with rewrite ++ __ move(AT, Bytecodes::_getfield); ++ __ beq(AT, T2, done); ++ __ delayed()->nop(); ++ ++ // if _igetfield then reqrite to _fast_iaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_iaccess_0); ++ __ move(AT, Bytecodes::_fast_igetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _agetfield then reqrite to _fast_aaccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aaccess_0); ++ __ move(AT, Bytecodes::_fast_agetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // if _fgetfield then reqrite to _fast_faccess_0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_faccess_0); ++ __ move(AT, Bytecodes::_fast_fgetfield); ++ __ beq(AT, T2, rewrite); ++ __ delayed()->nop(); ++ ++ // else rewrite to _fast_aload0 ++ assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == ++ Bytecodes::_aload_0, ++ "fix bytecode definition"); ++ __ move(T3, Bytecodes::_fast_aload_0); ++ ++ // rewrite ++ __ bind(rewrite); ++ patch_bytecode(Bytecodes::_aload_0, T3, T2, false); ++ ++ __ bind(done); ++ } else { ++ aload(0); ++ } ++} ++ ++void TemplateTable::istore() { ++ transition(itos, vtos); ++ locals_index(T2); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::lstore() { ++ transition(ltos, vtos); ++ locals_index(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::fstore() { ++ transition(ftos, vtos); ++ locals_index(T2); ++ __ swc1(FSF, T2, 0); ++} ++ ++void TemplateTable::dstore() { ++ transition(dtos, vtos); ++ locals_index(T2); ++ __ sdc1(FSF, T2, -wordSize); ++} ++ ++void TemplateTable::astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_istore() { ++ transition(vtos, vtos); ++ __ pop_i(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++void TemplateTable::wide_lstore() { ++ transition(vtos, vtos); ++ __ pop_l(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, -wordSize); ++} ++ ++void TemplateTable::wide_fstore() { ++ wide_istore(); ++} ++ ++void TemplateTable::wide_dstore() { ++ wide_lstore(); ++} ++ ++void TemplateTable::wide_astore() { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ locals_index_wide(T2); ++ __ sd(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::iastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); // T2: array SSR: index ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT)); //bound ++ ++ __ warn("iastore Unimplemented yet"); ++ __ gsswle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); // prefer index in SSR ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg); ++ } ++} ++ ++ ++ ++// used register T2, T3 ++void TemplateTable::lastore() { ++ transition(ltos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize); //bound ++ ++ __ warn("lastore Unimplemented yet"); ++ __ gssdle(FSR, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg); ++ } ++} ++ ++// used register T2 ++void TemplateTable::fastore() { ++ transition(ftos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT)); //bound ++ ++ __ warn("fastore Unimplemented yet"); ++ __ gsswlec1(FSF, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_4); ++ __ daddu(T2, T2, SSR); ++ __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg); ++ } ++} ++ ++// used register T2, T3 ++void TemplateTable::dastore() { ++ transition(dtos, vtos); ++ __ pop_i (T2); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T3); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T2, T3, T2); ++ __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); // base ++ ++ __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize); //bound ++ ++ __ warn("dastore Unimplemented yet"); ++ __ gssdlec1(FSF, T2, AT); ++ } else { ++ index_check(T3, T2); ++ __ dsll(T2, T2, Address::times_8); ++ __ daddu(T3, T3, T2); ++ __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg); ++ } ++} ++ ++// used register : T2, T3, T8 ++// T2 : array ++// T3 : subklass ++// T8 : supklass ++void TemplateTable::aastore() { ++ Label is_null, ok_is_subtype, done; ++ transition(vtos, vtos); ++ // stack: ..., array, index, value ++ __ ld(FSR, at_tos()); // Value ++ __ lw(SSR, at_tos_p1()); // Index ++ __ ld(T2, at_tos_p2()); // Array ++ ++ // index_check(T2, SSR); ++ index_check_without_pop(T2, SSR); ++ // do array store check - check for NULL value first ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Move subklass into T3 ++ //add for compressedoops ++ __ load_klass(T3, FSR); ++ // Move superklass into T8 ++ //add for compressedoops ++ __ load_klass(T8, T2); ++ __ ld(T8, Address(T8, ObjArrayKlass::element_klass_offset())); ++ // Compress array+index*4+12 into a single register. T2 ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)); ++ ++ // Generate subtype check. ++ // Superklass in T8. Subklass in T3. ++ __ gen_subtype_check(T8, T3, ok_is_subtype); ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ArrayStoreException_entry); ++ __ delayed()->nop(); ++ // Come here on success ++ __ bind(ok_is_subtype); ++ do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY); ++ __ b(done); ++ __ delayed()->nop(); ++ ++ // Have a NULL in FSR, T2=array, SSR=index. Store NULL at ary[idx] ++ __ bind(is_null); ++ __ profile_null_seen(T9); ++ __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8); ++ __ daddu(T2, T2, AT); ++ do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY); ++ ++ __ bind(done); ++ __ daddiu(SP, SP, 3 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::bastore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ guarantee(false, "unimplemented yet!"); ++ __ pop_ptr(T2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound ++ ++ __ warn("bastore Unimplemented yet"); ++ __ gssble(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ ++ // Need to check whether array is boolean or byte ++ // since both types share the bastore bytecode. ++ __ load_klass(T9, T2); ++ __ lw(T9, T9, in_bytes(Klass::layout_helper_offset())); ++ ++ int diffbit = Klass::layout_helper_boolean_diffbit(); ++ __ move(AT, diffbit); ++ ++ Label L_skip; ++ __ andr(AT, T9, AT); ++ __ beq(AT, R0, L_skip); ++ __ delayed()->nop(); ++ __ andi(FSR, FSR, 0x1); ++ __ bind(L_skip); ++ ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::castore() { ++ transition(itos, vtos); ++ __ pop_i(SSR); ++ if(UseBoundCheckInstruction) { ++ __ pop_ptr(T2); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)); // base ++ ++ __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes()); ++ __ dsll(AT, AT, Address::times_2); ++ __ daddu(AT, T2, AT); ++ __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR)); //bound ++ ++ __ warn("castore Unimplemented yet"); ++ __ gsshle(FSR, SSR, AT); ++ } else { ++ index_check(T2, SSR); ++ __ dsll(SSR, SSR, Address::times_2); ++ __ daddu(SSR, T2, SSR); ++ __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg); ++ } ++} ++ ++void TemplateTable::sastore() { ++ castore(); ++} ++ ++void TemplateTable::istore(int n) { ++ transition(itos, vtos); ++ __ sw(FSR, iaddress(n)); ++} ++ ++void TemplateTable::lstore(int n) { ++ transition(ltos, vtos); ++ __ sd(FSR, laddress(n)); ++} ++ ++void TemplateTable::fstore(int n) { ++ transition(ftos, vtos); ++ __ swc1(FSF, faddress(n)); ++} ++ ++void TemplateTable::dstore(int n) { ++ transition(dtos, vtos); ++ __ sdc1(FSF, laddress(n)); ++} ++ ++void TemplateTable::astore(int n) { ++ transition(vtos, vtos); ++ __ pop_ptr(FSR); ++ __ sd(FSR, aaddress(n)); ++} ++ ++void TemplateTable::pop() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, Interpreter::stackElementSize); ++} ++ ++void TemplateTable::pop2() { ++ transition(vtos, vtos); ++ __ daddiu(SP, SP, 2 * Interpreter::stackElementSize); ++} ++ ++void TemplateTable::dup() { ++ transition(vtos, vtos); ++ // stack: ..., a ++ __ load_ptr(0, FSR); ++ __ push_ptr(FSR); ++ // stack: ..., a, a ++} ++ ++// blows FSR ++void TemplateTable::dup_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(0, FSR); // load b ++ __ load_ptr(1, A5); // load a ++ __ store_ptr(1, FSR); // store b ++ __ store_ptr(0, A5); // store a ++ __ push_ptr(FSR); // push b ++ // stack: ..., b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, FSR); // load c ++ __ load_ptr(2, A5); // load a ++ __ store_ptr(2, FSR); // store c in a ++ __ push_ptr(FSR); // push c ++ // stack: ..., c, b, c, c ++ __ load_ptr(2, FSR); // load b ++ __ store_ptr(2, A5); // store a in b ++ // stack: ..., c, a, c, c ++ __ store_ptr(1, FSR); // store b in c ++ // stack: ..., c, a, b, c ++} ++ ++// blows FSR ++void TemplateTable::dup2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ __ load_ptr(1, FSR); // load a ++ __ push_ptr(FSR); // push a ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ // stack: ..., a, b, a, b ++} ++ ++// blows FSR ++void TemplateTable::dup2_x1() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c ++ __ load_ptr(0, T2); // load c ++ __ load_ptr(1, FSR); // load b ++ __ push_ptr(FSR); // push b ++ __ push_ptr(T2); // push c ++ // stack: ..., a, b, c, b, c ++ __ store_ptr(3, T2); // store c in b ++ // stack: ..., a, c, c, b, c ++ __ load_ptr(4, T2); // load a ++ __ store_ptr(2, T2); // store a in 2nd c ++ // stack: ..., a, c, a, b, c ++ __ store_ptr(4, FSR); // store b in a ++ // stack: ..., b, c, a, b, c ++ ++ // stack: ..., b, c, a, b, c ++} ++ ++// blows FSR, SSR ++void TemplateTable::dup2_x2() { ++ transition(vtos, vtos); ++ // stack: ..., a, b, c, d ++ // stack: ..., a, b, c, d ++ __ load_ptr(0, T2); // load d ++ __ load_ptr(1, FSR); // load c ++ __ push_ptr(FSR); // push c ++ __ push_ptr(T2); // push d ++ // stack: ..., a, b, c, d, c, d ++ __ load_ptr(4, FSR); // load b ++ __ store_ptr(2, FSR); // store b in d ++ __ store_ptr(4, T2); // store d in b ++ // stack: ..., a, d, c, b, c, d ++ __ load_ptr(5, T2); // load a ++ __ load_ptr(3, FSR); // load c ++ __ store_ptr(3, T2); // store a in c ++ __ store_ptr(5, FSR); // store c in a ++ // stack: ..., c, d, a, b, c, d ++ ++ // stack: ..., c, d, a, b, c, d ++} ++ ++// blows FSR ++void TemplateTable::swap() { ++ transition(vtos, vtos); ++ // stack: ..., a, b ++ ++ __ load_ptr(1, A5); // load a ++ __ load_ptr(0, FSR); // load b ++ __ store_ptr(0, A5); // store a in b ++ __ store_ptr(1, FSR); // store b in a ++ ++ // stack: ..., b, a ++} ++ ++void TemplateTable::iop2(Operation op) { ++ transition(itos, itos); ++ ++ __ pop_i(SSR); ++ switch (op) { ++ case add : __ addu32(FSR, SSR, FSR); break; ++ case sub : __ subu32(FSR, SSR, FSR); break; ++ case mul : __ mul(FSR, SSR, FSR); break; ++ case _and : __ andr(FSR, SSR, FSR); break; ++ case _or : __ orr(FSR, SSR, FSR); break; ++ case _xor : __ xorr(FSR, SSR, FSR); break; ++ case shl : __ sllv(FSR, SSR, FSR); break; ++ case shr : __ srav(FSR, SSR, FSR); break; ++ case ushr : __ srlv(FSR, SSR, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// the result stored in FSR, SSR, ++// used registers : T2, T3 ++void TemplateTable::lop2(Operation op) { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ ++ switch (op) { ++ case add : __ daddu(FSR, T2, FSR); break; ++ case sub : __ dsubu(FSR, T2, FSR); break; ++ case _and: __ andr(FSR, T2, FSR); break; ++ case _or : __ orr(FSR, T2, FSR); break; ++ case _xor: __ xorr(FSR, T2, FSR); break; ++ default : ShouldNotReachHere(); ++ } ++} ++ ++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception, ++// the result is 0x80000000 ++// the godson2 cpu do the same, so we need not handle this specially like x86 ++void TemplateTable::idiv() { ++ transition(itos, itos); ++ Label not_zero; ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ __ bind(not_zero); ++ ++ __ pop_i(SSR); ++ if (UseLEXT1) { ++ __ gsdiv(FSR, SSR, FSR); ++ } else { ++ __ div(SSR, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++void TemplateTable::irem() { ++ transition(itos, itos); ++ Label not_zero; ++ __ pop_i(SSR); ++ __ div(SSR, FSR); ++ ++ __ bne(FSR, R0, not_zero); ++ __ delayed()->nop(); ++ //__ brk(7); ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(not_zero); ++ __ mfhi(FSR); ++} ++ ++void TemplateTable::lmul() { ++ transition(ltos, ltos); ++ __ pop_l(T2); ++ if (UseLEXT1) { ++ __ gsdmult(FSR, T2, FSR); ++ } else { ++ __ dmult(T2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::ldiv() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ //__ brk(7); //generate FPE ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l(A2); ++ if (UseLEXT1) { ++ __ gsddiv(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mflo(FSR); ++ } ++} ++ ++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry ++void TemplateTable::lrem() { ++ transition(ltos, ltos); ++ Label normal; ++ ++ __ bne(FSR, R0, normal); ++ __ delayed()->nop(); ++ ++ __ jmp(Interpreter::_throw_ArithmeticException_entry); ++ __ delayed()->nop(); ++ ++ __ bind(normal); ++ __ pop_l (A2); ++ ++ if (UseLEXT1) { ++ __ gsdmod(FSR, A2, FSR); ++ } else { ++ __ ddiv(A2, FSR); ++ __ mfhi(FSR); ++ } ++} ++ ++// result in FSR ++// used registers : T0 ++void TemplateTable::lshl() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsllv(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lshr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrav(FSR, T0, FSR); ++} ++ ++// used registers : T0 ++void TemplateTable::lushr() { ++ transition(itos, ltos); ++ __ pop_l(T0); ++ __ dsrlv(FSR, T0, FSR); ++} ++ ++// result in FSF ++void TemplateTable::fop2(Operation op) { ++ transition(ftos, ftos); ++ switch (op) { ++ case add: ++ __ lwc1(FTF, at_sp()); ++ __ add_s(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ lwc1(FTF, at_sp()); ++ __ sub_s(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ lwc1(FTF, at_sp()); ++ __ mul_s(FSF, FTF, FSF); ++ break; ++ case div: ++ __ lwc1(FTF, at_sp()); ++ __ div_s(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_s(F13, FSF); ++ __ lwc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 1 * wordSize); ++} ++ ++// result in SSF||FSF ++// i dont handle the strict flags ++void TemplateTable::dop2(Operation op) { ++ transition(dtos, dtos); ++ switch (op) { ++ case add: ++ __ ldc1(FTF, at_sp()); ++ __ add_d(FSF, FTF, FSF); ++ break; ++ case sub: ++ __ ldc1(FTF, at_sp()); ++ __ sub_d(FSF, FTF, FSF); ++ break; ++ case mul: ++ __ ldc1(FTF, at_sp()); ++ __ mul_d(FSF, FTF, FSF); ++ break; ++ case div: ++ __ ldc1(FTF, at_sp()); ++ __ div_d(FSF, FTF, FSF); ++ break; ++ case rem: ++ __ mov_d(F13, FSF); ++ __ ldc1(F12, at_sp()); ++ __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2); ++ break; ++ default : ShouldNotReachHere(); ++ } ++ ++ __ daddiu(SP, SP, 2 * wordSize); ++} ++ ++void TemplateTable::ineg() { ++ transition(itos, itos); ++ __ subu32(FSR, R0, FSR); ++} ++ ++void TemplateTable::lneg() { ++ transition(ltos, ltos); ++ __ dsubu(FSR, R0, FSR); ++} ++ ++void TemplateTable::fneg() { ++ transition(ftos, ftos); ++ __ neg_s(FSF, FSF); ++} ++ ++void TemplateTable::dneg() { ++ transition(dtos, dtos); ++ __ neg_d(FSF, FSF); ++} ++ ++// used registers : T2 ++void TemplateTable::iinc() { ++ transition(vtos, vtos); ++ locals_index(T2); ++ __ lw(FSR, T2, 0); ++ __ lb(AT, at_bcp(2)); // get constant ++ __ daddu(FSR, FSR, AT); ++ __ sw(FSR, T2, 0); ++} ++ ++// used register : T2 ++void TemplateTable::wide_iinc() { ++ transition(vtos, vtos); ++ locals_index_wide(T2); ++ __ get_2_byte_integer_at_bcp(FSR, AT, 4); ++ __ hswap(FSR); ++ __ lw(AT, T2, 0); ++ __ daddu(FSR, AT, FSR); ++ __ sw(FSR, T2, 0); ++} ++ ++void TemplateTable::convert() { ++ // Checking ++#ifdef ASSERT ++ { ++ TosState tos_in = ilgl; ++ TosState tos_out = ilgl; ++ switch (bytecode()) { ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_in = itos; break; ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_l2d: tos_in = ltos; break; ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_f2d: tos_in = ftos; break; ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_d2l: // fall through ++ case Bytecodes::_d2f: tos_in = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ switch (bytecode()) { ++ case Bytecodes::_l2i: // fall through ++ case Bytecodes::_f2i: // fall through ++ case Bytecodes::_d2i: // fall through ++ case Bytecodes::_i2b: // fall through ++ case Bytecodes::_i2c: // fall through ++ case Bytecodes::_i2s: tos_out = itos; break; ++ case Bytecodes::_i2l: // fall through ++ case Bytecodes::_f2l: // fall through ++ case Bytecodes::_d2l: tos_out = ltos; break; ++ case Bytecodes::_i2f: // fall through ++ case Bytecodes::_l2f: // fall through ++ case Bytecodes::_d2f: tos_out = ftos; break; ++ case Bytecodes::_i2d: // fall through ++ case Bytecodes::_l2d: // fall through ++ case Bytecodes::_f2d: tos_out = dtos; break; ++ default : ShouldNotReachHere(); ++ } ++ transition(tos_in, tos_out); ++ } ++#endif // ASSERT ++ ++ // Conversion ++ switch (bytecode()) { ++ case Bytecodes::_i2l: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_i2f: ++ __ mtc1(FSR, FSF); ++ __ cvt_s_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2d: ++ __ mtc1(FSR, FSF); ++ __ cvt_d_w(FSF, FSF); ++ break; ++ case Bytecodes::_i2b: ++ __ seb(FSR, FSR); ++ break; ++ case Bytecodes::_i2c: ++ __ andi(FSR, FSR, 0xFFFF); // truncate upper 56 bits ++ break; ++ case Bytecodes::_i2s: ++ __ seh(FSR, FSR); ++ break; ++ case Bytecodes::_l2i: ++ __ sll(FSR, FSR, 0); ++ break; ++ case Bytecodes::_l2f: ++ __ dmtc1(FSR, FSF); ++ __ cvt_s_l(FSF, FSF); ++ break; ++ case Bytecodes::_l2d: ++ __ dmtc1(FSR, FSF); ++ __ cvt_d_l(FSF, FSF); ++ break; ++ case Bytecodes::_f2i: ++ { ++ Label L; ++ ++ __ trunc_w_s(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2l: ++ { ++ Label L; ++ ++ __ trunc_l_s(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ __ c_un_s(FSF, FSF); //NaN? ++ __ movt(FSR, R0); ++ ++ __ bne(AT, FSR, L); ++ __ delayed()->lui(T9, 0x8000); ++ ++ __ mfc1(AT, FSF); ++ __ andr(AT, AT, T9); ++ ++ __ dsll32(T9, T9, 0); ++ __ movn(FSR, T9, AT); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_f2d: ++ __ cvt_d_s(FSF, FSF); ++ break; ++ case Bytecodes::_d2i: ++ { ++ Label L; ++ ++ __ trunc_w_d(F12, FSF); ++ __ move(AT, 0x7fffffff); ++ __ mfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->addiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu32(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2l: ++ { ++ Label L; ++ ++ __ trunc_l_d(F12, FSF); ++ __ daddiu(AT, R0, -1); ++ __ dsrl(AT, AT, 1); ++ __ dmfc1(FSR, F12); ++ ++ __ bne(FSR, AT, L); ++ __ delayed()->mtc1(R0, F12); ++ ++ __ cvt_d_w(F12, F12); ++ __ c_ult_d(FSF, F12); ++ __ bc1f(L); ++ __ delayed()->daddiu(T9, R0, -1); ++ ++ __ c_un_d(FSF, FSF); //NaN? ++ __ subu(FSR, T9, AT); ++ __ movt(FSR, R0); ++ ++ __ bind(L); ++ } ++ break; ++ case Bytecodes::_d2f: ++ __ cvt_s_d(FSF, FSF); ++ break; ++ default : ++ ShouldNotReachHere(); ++ } ++} ++ ++void TemplateTable::lcmp() { ++ transition(ltos, itos); ++ ++ __ pop(T0); ++ __ pop(R0); ++ ++ __ slt(AT, T0, FSR); ++ __ slt(FSR, FSR, T0); ++ __ subu(FSR, FSR, AT); ++} ++ ++void TemplateTable::float_cmp(bool is_float, int unordered_result) { ++ __ ori(FSR, R0, 1); ++ __ ori(AT, R0, 1); ++ ++ if (is_float) { ++ __ lwc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 1 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_s(FTF, FSF); ++ } else { ++ __ c_ult_s(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_s(FTF, FSF); ++ } ++ } else { ++ __ ldc1(FTF, at_sp()); ++ __ daddiu(SP, SP, 2 * wordSize); ++ if (unordered_result < 0) { ++ __ c_olt_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_ult_d(FTF, FSF); ++ } else { ++ __ c_ult_d(FSF, FTF); ++ __ movf(FSR, R0); ++ __ c_olt_d(FTF, FSF); ++ } ++ } ++ ++ __ movf(AT, R0); ++ __ subu(FSR, FSR, AT); ++} ++ ++ ++// used registers : T3, A7, Rnext ++// FSR : return bci, this is defined by the vm specification ++// T2 : MDO taken count ++// T3 : method ++// A7 : offset ++// Rnext : next bytecode, this is required by dispatch_base ++void TemplateTable::branch(bool is_jsr, bool is_wide) { ++ __ get_method(T3); ++ __ profile_taken_branch(A7, T2); // only C2 meaningful ++ ++ const ByteSize be_offset = MethodCounters::backedge_counter_offset() + ++ InvocationCounter::counter_offset(); ++ const ByteSize inv_offset = MethodCounters::invocation_counter_offset() + ++ InvocationCounter::counter_offset(); ++ ++ // Load up T4 with the branch displacement ++ if (!is_wide) { ++ __ lb(A7, BCP, 1); ++ __ lbu(AT, BCP, 2); ++ __ dsll(A7, A7, 8); ++ __ orr(A7, A7, AT); ++ } else { ++ __ get_4_byte_integer_at_bcp(A7, AT, 1); ++ __ swap(A7); ++ } ++ ++ // Handle all the JSR stuff here, then exit. ++ // It's much shorter and cleaner than intermingling with the non-JSR ++ // normal-branch stuff occuring below. ++ if (is_jsr) { ++ // Pre-load the next target bytecode into Rnext ++ __ daddu(AT, BCP, A7); ++ __ lbu(Rnext, AT, 0); ++ ++ // compute return address as bci in FSR ++ __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset())); ++ __ ld(AT, T3, in_bytes(Method::const_offset())); ++ __ dsubu(FSR, FSR, AT); ++ // Adjust the bcp in BCP by the displacement in A7 ++ __ daddu(BCP, BCP, A7); ++ // jsr returns atos that is not an oop ++ // Push return address ++ __ push_i(FSR); ++ // jsr returns vtos ++ __ dispatch_only_noverify(vtos); ++ ++ return; ++ } ++ ++ // Normal (non-jsr) branch handling ++ ++ // Adjust the bcp in S0 by the displacement in T4 ++ __ daddu(BCP, BCP, A7); ++ ++ assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters"); ++ Label backedge_counter_overflow; ++ Label profile_method; ++ Label dispatch; ++ if (UseLoopCounter) { ++ // increment backedge counter for backward branches ++ // T3: method ++ // T4: target offset ++ // BCP: target bcp ++ // LVP: locals pointer ++ __ bgtz(A7, dispatch); // check if forward or backward branch ++ __ delayed()->nop(); ++ ++ // check if MethodCounters exists ++ Label has_counters; ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ bne(AT, R0, has_counters); ++ __ delayed()->nop(); ++ __ push(T3); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters), ++ T3); ++ __ pop(T3); ++ __ ld(AT, T3, in_bytes(Method::method_counters_offset())); // use AT as MDO, TEMP ++ __ beq(AT, R0, dispatch); ++ __ delayed()->nop(); ++ __ bind(has_counters); ++ ++ if (TieredCompilation) { ++ Label no_mdo; ++ int increment = InvocationCounter::count_increment; ++ int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift; ++ if (ProfileInterpreter) { ++ // Are we profiling? ++ __ ld(T0, Address(T3, in_bytes(Method::method_data_offset()))); ++ __ beq(T0, R0, no_mdo); ++ __ delayed()->nop(); ++ // Increment the MDO backedge counter ++ const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) + ++ in_bytes(InvocationCounter::counter_offset())); ++ __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ __ beq(R0, R0, dispatch); ++ __ delayed()->nop(); ++ } ++ __ bind(no_mdo); ++ // Increment backedge counter in MethodCounters* ++ __ ld(T0, Address(T3, Method::method_counters_offset())); ++ __ increment_mask_and_jump(Address(T0, be_offset), increment, mask, ++ T1, false, Assembler::zero, &backedge_counter_overflow); ++ if (!UseOnStackReplacement) { ++ __ bind(backedge_counter_overflow); ++ } ++ } else { ++ // increment back edge counter ++ __ ld(T1, T3, in_bytes(Method::method_counters_offset())); ++ __ lw(T0, T1, in_bytes(be_offset)); ++ __ increment(T0, InvocationCounter::count_increment); ++ __ sw(T0, T1, in_bytes(be_offset)); ++ ++ // load invocation counter ++ __ lw(T1, T1, in_bytes(inv_offset)); ++ // buffer bit added, mask no needed ++ ++ // dadd backedge counter & invocation counter ++ __ daddu(T1, T1, T0); ++ ++ if (ProfileInterpreter) { ++ // Test to see if we should create a method data oop ++ // T1 : backedge counter & invocation counter ++ if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) { ++ __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // if no method data exists, go to profile method ++ __ test_method_data_pointer(T1, profile_method); ++ ++ if (UseOnStackReplacement) { ++ if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) { ++ __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit); ++ } else { ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T2, AT); ++ } ++ ++ __ bne(AT, R0, dispatch); ++ __ delayed()->nop(); ++ ++ // When ProfileInterpreter is on, the backedge_count comes ++ // from the methodDataOop, which value does not get reset on ++ // the call to frequency_counter_overflow(). ++ // To avoid excessive calls to the overflow routine while ++ // the method is being compiled, dadd a second test to make ++ // sure the overflow function is called only once every ++ // overflow_frequency. ++ const int overflow_frequency = 1024; ++ __ andi(AT, T2, overflow_frequency-1); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } else { ++ if (UseOnStackReplacement) { ++ // check for overflow against AT, which is the sum of the counters ++ __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit); ++ __ lw(AT, AT, 0); ++ __ slt(AT, T1, AT); ++ __ beq(AT, R0, backedge_counter_overflow); ++ __ delayed()->nop(); ++ } ++ } ++ } ++ __ bind(dispatch); ++ } ++ ++ // Pre-load the next target bytecode into Rnext ++ __ lbu(Rnext, BCP, 0); ++ ++ // continue with the bytecode @ target ++ // FSR: return bci for jsr's, unused otherwise ++ // Rnext: target bytecode ++ // BCP: target bcp ++ __ dispatch_only(vtos, true); ++ ++ if (UseLoopCounter) { ++ if (ProfileInterpreter) { ++ // Out-of-line code to allocate method data oop. ++ __ bind(profile_method); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method)); ++ __ set_method_data_pointer_for_bcp(); ++ __ b(dispatch); ++ __ delayed()->nop(); ++ } ++ ++ if (UseOnStackReplacement) { ++ // invocation counter overflow ++ __ bind(backedge_counter_overflow); ++ __ subu(A7, BCP, A7); // branch bcp ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::frequency_counter_overflow), A7); ++ ++ // V0: osr nmethod (osr ok) or NULL (osr not possible) ++ // V1: osr adapter frame return address ++ // LVP: locals pointer ++ // BCP: bcp ++ __ beq(V0, R0, dispatch); ++ __ delayed()->nop(); ++ // nmethod may have been invalidated (VM may block upon call_VM return) ++ __ lb(T3, V0, nmethod::state_offset()); ++ __ move(AT, nmethod::in_use); ++ __ bne(AT, T3, dispatch); ++ __ delayed()->nop(); ++ ++ // We have the address of an on stack replacement routine in rax. ++ // In preparation of invoking it, first we must migrate the locals ++ // and monitors from off the interpreter frame on the stack. ++ // Ensure to save the osr nmethod over the migration call, ++ // it will be preserved in Rnext. ++ __ move(Rnext, V0); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin)); ++ ++ // V0 is OSR buffer, move it to expected parameter location ++ // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp ++ __ move(T0, V0); ++ ++ // pop the interpreter frame ++ __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize)); ++ //FIXME, shall we keep the return address on the stack? ++ __ leave(); // remove frame anchor ++ __ move(LVP, RA); ++ __ move(SP, A7); ++ ++ __ move(AT, -(StackAlignmentInBytes)); ++ __ andr(SP , SP , AT); ++ ++ // push the (possibly adjusted) return address ++ //refer to osr_entry in c1_LIRAssembler_mips.cpp ++ __ ld(AT, Rnext, nmethod::osr_entry_point_offset()); ++ __ jr(AT); ++ __ delayed()->nop(); ++ } ++ } ++} ++ ++ ++void TemplateTable::if_0cmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ case less: ++ __ bgez(FSR, not_taken); ++ break; ++ case less_equal: ++ __ bgtz(FSR, not_taken); ++ break; ++ case greater: ++ __ blez(FSR, not_taken); ++ break; ++ case greater_equal: ++ __ bltz(FSR, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_icmp(Condition cc) { ++ transition(itos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ ++ __ pop_i(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ case less: ++ __ slt(AT, SSR, FSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case less_equal: ++ __ slt(AT, FSR, SSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ case greater: ++ __ slt(AT, FSR, SSR); ++ __ beq(AT, R0, not_taken); ++ break; ++ case greater_equal: ++ __ slt(AT, SSR, FSR); ++ __ bne(AT, R0, not_taken); ++ break; ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++void TemplateTable::if_nullcmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ switch(cc) { ++ case not_equal: ++ __ beq(FSR, R0, not_taken); ++ break; ++ case equal: ++ __ bne(FSR, R0, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++ ++void TemplateTable::if_acmp(Condition cc) { ++ transition(atos, vtos); ++ // assume branch is more often taken than not (loops use backward branches) ++ Label not_taken; ++ // __ lw(SSR, SP, 0); ++ __ pop_ptr(SSR); ++ switch(cc) { ++ case not_equal: ++ __ beq(SSR, FSR, not_taken); ++ break; ++ case equal: ++ __ bne(SSR, FSR, not_taken); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ __ delayed()->nop(); ++ ++ branch(false, false); ++ ++ __ bind(not_taken); ++ __ profile_not_taken_branch(FSR); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::ret() { ++ transition(vtos, vtos); ++ ++ locals_index(T2); ++ __ ld(T2, T2, 0); ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : method ++// T2 : returb bci ++void TemplateTable::wide_ret() { ++ transition(vtos, vtos); ++ ++ locals_index_wide(T2); ++ __ ld(T2, T2, 0); // get return bci, compute return bcp ++ __ profile_ret(T2, T3); ++ ++ __ get_method(T1); ++ __ ld(BCP, T1, in_bytes(Method::const_offset())); ++ __ daddu(BCP, BCP, T2); ++ __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); ++ ++ __ dispatch_next(vtos, 0, true); ++} ++ ++// used register T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : low ++// A7 : high ++// Rnext : dest bytecode, required by dispatch_base ++void TemplateTable::tableswitch() { ++ Label default_case, continue_execution; ++ transition(itos, vtos); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // load lo & hi ++ __ lw(T3, T2, 1 * BytesPerInt); ++ __ swap(T3); ++ __ lw(A7, T2, 2 * BytesPerInt); ++ __ swap(A7); ++ ++ // check against lo & hi ++ __ slt(AT, FSR, T3); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ __ slt(AT, A7, FSR); ++ __ bne(AT, R0, default_case); ++ __ delayed()->nop(); ++ ++ // lookup dispatch offset, in A7 big endian ++ __ dsubu(FSR, FSR, T3); ++ __ dsll(AT, FSR, Address::times_4); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(FSR, T9, T3); ++ ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // handle default ++ __ bind(default_case); ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++} ++ ++void TemplateTable::lookupswitch() { ++ transition(itos, itos); ++ __ stop("lookupswitch bytecode should have been rewritten"); ++} ++ ++// used registers : T2, T3, A7, Rnext ++// T2 : bytecode pointer ++// T3 : pair index ++// A7 : offset ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_linearswitch() { ++ transition(itos, vtos); ++ Label loop_entry, loop, found, continue_execution; ++ ++ // swap FSR so we can avoid swapping the table entries ++ __ swap(FSR); ++ ++ // align BCP ++ __ daddiu(T2, BCP, BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(T2, T2, AT); ++ ++ // set counter ++ __ lw(T3, T2, BytesPerInt); ++ __ swap(T3); ++ __ b(loop_entry); ++ __ delayed()->nop(); ++ ++ // table search ++ __ bind(loop); ++ // get the entry value ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(AT, AT, 2 * BytesPerInt); ++ ++ // found? ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ ++ __ bind(loop_entry); ++ __ bgtz(T3, loop); ++ __ delayed()->daddiu(T3, T3, -1); ++ ++ // default case ++ __ profile_switch_default(FSR); ++ __ lw(A7, T2, 0); ++ __ b(continue_execution); ++ __ delayed()->nop(); ++ ++ // entry found -> get offset ++ __ bind(found); ++ __ dsll(AT, T3, Address::times_8); ++ __ daddu(AT, T2, AT); ++ __ lw(A7, AT, 3 * BytesPerInt); ++ __ profile_switch_case(T3, FSR, T2); ++ ++ // continue execution ++ __ bind(continue_execution); ++ __ swap(A7); ++ __ daddu(BCP, BCP, A7); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++// used registers : T0, T1, T2, T3, A7, Rnext ++// T2 : pairs address(array) ++// Rnext : dest bytecode ++// the data after the opcode is the same as lookupswitch ++// see Rewriter::rewrite_method for more information ++void TemplateTable::fast_binaryswitch() { ++ transition(itos, vtos); ++ // Implementation using the following core algorithm: ++ // ++ // int binary_search(int key, LookupswitchPair* array, int n) { ++ // // Binary search according to "Methodik des Programmierens" by ++ // // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985. ++ // int i = 0; ++ // int j = n; ++ // while (i+1 < j) { ++ // // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q) ++ // // with Q: for all i: 0 <= i < n: key < a[i] ++ // // where a stands for the array and assuming that the (inexisting) ++ // // element a[n] is infinitely big. ++ // int h = (i + j) >> 1; ++ // // i < h < j ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // } ++ // // R: a[i] <= key < a[i+1] or Q ++ // // (i.e., if key is within array, i is the correct index) ++ // return i; ++ // } ++ ++ // register allocation ++ const Register array = T2; ++ const Register i = T3, j = A7; ++ const Register h = T1; ++ const Register temp = T0; ++ const Register key = FSR; ++ ++ // setup array ++ __ daddiu(array, BCP, 3*BytesPerInt); ++ __ li(AT, -BytesPerInt); ++ __ andr(array, array, AT); ++ ++ // initialize i & j ++ __ move(i, R0); ++ __ lw(j, array, - 1 * BytesPerInt); ++ // Convert j into native byteordering ++ __ swap(j); ++ ++ // and start ++ Label entry; ++ __ b(entry); ++ __ delayed()->nop(); ++ ++ // binary search loop ++ { ++ Label loop; ++ __ bind(loop); ++ // int h = (i + j) >> 1; ++ __ daddu(h, i, j); ++ __ dsrl(h, h, 1); ++ // if (key < array[h].fast_match()) { ++ // j = h; ++ // } else { ++ // i = h; ++ // } ++ // Convert array[h].match to native byte-ordering before compare ++ __ dsll(AT, h, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ ++ __ slt(AT, key, temp); ++ __ movz(i, h, AT); ++ __ movn(j, h, AT); ++ ++ // while (i+1 < j) ++ __ bind(entry); ++ __ daddiu(h, i, 1); ++ __ slt(AT, h, j); ++ __ bne(AT, R0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // end of binary search, result index is i (must check again!) ++ Label default_case; ++ // Convert array[i].match to native byte-ordering before compare ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(temp, AT, 0 * BytesPerInt); ++ __ swap(temp); ++ __ bne(key, temp, default_case); ++ __ delayed()->nop(); ++ ++ // entry found -> j = offset ++ __ dsll(AT, i, Address::times_8); ++ __ daddu(AT, array, AT); ++ __ lw(j, AT, 1 * BytesPerInt); ++ __ profile_switch_case(i, key, array); ++ __ swap(j); ++ ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++ ++ // default case -> j = default offset ++ __ bind(default_case); ++ __ profile_switch_default(i); ++ __ lw(j, array, - 2 * BytesPerInt); ++ __ swap(j); ++ __ daddu(BCP, BCP, j); ++ __ lbu(Rnext, BCP, 0); ++ __ dispatch_only(vtos, true); ++} ++ ++void TemplateTable::_return(TosState state) { ++ transition(state, state); ++ assert(_desc->calls_vm(), ++ "inconsistent calls_vm information"); // call in remove_activation ++ ++ if (_desc->bytecode() == Bytecodes::_return_register_finalizer) { ++ assert(state == vtos, "only valid state"); ++ __ ld(T1, aaddress(0)); ++ __ load_klass(LVP, T1); ++ __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset())); ++ __ move(AT, JVM_ACC_HAS_FINALIZER); ++ __ andr(AT, AT, LVP); ++ Label skip_register_finalizer; ++ __ beq(AT, R0, skip_register_finalizer); ++ __ delayed()->nop(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::register_finalizer), T1); ++ __ bind(skip_register_finalizer); ++ } ++ ++ Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ ++ if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) { ++ Label no_safepoint; ++ NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll")); ++ __ lb(AT, thread, in_bytes(Thread::polling_page_offset())); ++ __ andi(AT, AT, SafepointMechanism::poll_bit()); ++ __ beq(AT, R0, no_safepoint); ++ __ delayed()->nop(); ++ __ push(state); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::at_safepoint)); ++ __ pop(state); ++ __ bind(no_safepoint); ++ } ++ ++ // Narrow result if state is itos but result type is smaller. ++ // Need to narrow in the return bytecode rather than in generate_return_entry ++ // since compiled code callers expect the result to already be narrowed. ++ if (state == itos) { ++ __ narrow(FSR); ++ } ++ ++ __ remove_activation(state, T9); ++ __ sync(); ++ ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++// ---------------------------------------------------------------------------- ++// Volatile variables demand their effects be made known to all CPU's ++// in order. Store buffers on most chips allow reads & writes to ++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode ++// without some kind of memory barrier (i.e., it's not sufficient that ++// the interpreter does not reorder volatile references, the hardware ++// also must not reorder them). ++// ++// According to the new Java Memory Model (JMM): ++// (1) All volatiles are serialized wrt to each other. ALSO reads & ++// writes act as aquire & release, so: ++// (2) A read cannot let unrelated NON-volatile memory refs that ++// happen after the read float up to before the read. It's OK for ++// non-volatile memory refs that happen before the volatile read to ++// float down below it. ++// (3) Similar a volatile write cannot let unrelated NON-volatile ++// memory refs that happen BEFORE the write float down to after the ++// write. It's OK for non-volatile memory refs that happen after the ++// volatile write to float up before it. ++// ++// We only put in barriers around volatile refs (they are expensive), ++// not _between_ memory refs (that would require us to track the ++// flavor of the previous memory refs). Requirements (2) and (3) ++// require some barriers before volatile stores and after volatile ++// loads. These nearly cover requirement (1) but miss the ++// volatile-store-volatile-load case. This final case is placed after ++// volatile-stores although it could just as well go before ++// volatile-loads. ++void TemplateTable::volatile_barrier() { ++ if(os::is_MP()) __ sync(); ++} ++ ++// we dont shift left 2 bits in get_cache_and_index_at_bcp ++// for we always need shift the index we use it. the ConstantPoolCacheEntry ++// is 16-byte long, index is the index in ++// ConstantPoolCache, so cache + base_offset() + index * 16 is ++// the corresponding ConstantPoolCacheEntry ++// used registers : T2 ++// NOTE : the returned index need also shift left 4 to get the address! ++void TemplateTable::resolve_cache_and_index(int byte_no, ++ Register Rcache, ++ Register index, ++ size_t index_size) { ++ assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range"); ++ const Register temp = A1; ++ assert_different_registers(Rcache, index); ++ ++ Label resolved; ++ ++ Bytecodes::Code code = bytecode(); ++ switch (code) { ++ case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break; ++ case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break; ++ default: break; ++ } ++ ++ __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size); ++ // is resolved? ++ int i = (int)code; ++ __ addiu(temp, temp, -i); ++ __ beq(temp, R0, resolved); ++ __ delayed()->nop(); ++ ++ // resolve first time through ++ address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache); ++ ++ __ move(temp, i); ++ __ call_VM(NOREG, entry, temp); ++ ++ // Update registers with resolved info ++ __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size); ++ __ bind(resolved); ++} ++ ++// The Rcache and index registers must be set before call ++void TemplateTable::load_field_cp_cache_entry(Register obj, ++ Register cache, ++ Register index, ++ Register off, ++ Register flags, ++ bool is_static = false) { ++ assert_different_registers(cache, index, flags, off); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ // Field offset ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset())); ++ // Flags ++ __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset())); ++ ++ // klass overwrite register ++ if (is_static) { ++ __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset())); ++ const int mirror_offset = in_bytes(Klass::java_mirror_offset()); ++ __ ld(obj, Address(obj, mirror_offset)); ++ ++ __ resolve_oop_handle(obj, T9); ++ } ++} ++ ++// get the method, itable_index and flags of the current invoke ++void TemplateTable::load_invoke_cp_cache_entry(int byte_no, ++ Register method, ++ Register itable_index, ++ Register flags, ++ bool is_invokevirtual, ++ bool is_invokevfinal, /*unused*/ ++ bool is_invokedynamic) { ++ // setup registers ++ const Register cache = T3; ++ const Register index = T1; ++ assert_different_registers(method, flags); ++ assert_different_registers(method, cache, index); ++ assert_different_registers(itable_index, flags); ++ assert_different_registers(itable_index, cache, index); ++ assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant"); ++ // determine constant pool cache field offsets ++ const int method_offset = in_bytes( ++ ConstantPoolCache::base_offset() + ++ ((byte_no == f2_byte) ++ ? ConstantPoolCacheEntry::f2_offset() ++ : ConstantPoolCacheEntry::f1_offset())); ++ const int flags_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::flags_offset()); ++ // access constant pool cache fields ++ const int index_offset = in_bytes(ConstantPoolCache::base_offset() + ++ ConstantPoolCacheEntry::f2_offset()); ++ ++ size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2)); ++ resolve_cache_and_index(byte_no, cache, index, index_size); ++ ++ //assert(wordSize == 8, "adjust code below"); ++ // note we shift 4 not 2, for we get is the true inde ++ // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version ++ __ dsll(AT, index, Address::times_ptr); ++ __ daddu(AT, cache, AT); ++ __ ld(method, AT, method_offset); ++ ++ if (itable_index != NOREG) { ++ __ ld(itable_index, AT, index_offset); ++ } ++ __ ld(flags, AT, flags_offset); ++} ++ ++// The registers cache and index expected to be set before call. ++// Correct values of the cache and index registers are preserved. ++void TemplateTable::jvmti_post_field_access(Register cache, Register index, ++ bool is_static, bool has_tos) { ++ // do the JVMTI work here to avoid disturbing the register state below ++ // We use c_rarg registers here because we want to use the register used in ++ // the call to the VM ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we ++ // take the time to call into the VM. ++ Label L1; ++ // kill FSR ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ assert_different_registers(cache, index, AT); ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp3, 1); ++ ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset())); ++ __ shl(tmp3, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp3); ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ __ ld(tmp1, SP, 0); ++ __ verify_oop(tmp1); ++ } ++ // tmp1: object pointer or NULL ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_access), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++void TemplateTable::pop_and_check_object(Register r) { ++ __ pop_ptr(r); ++ __ null_check(r); // for field access must check obj. ++ __ verify_oop(r); ++} ++ ++// used registers : T1, T2, T3, T1 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T1 : field address ++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the ++// following mapping to the TosState states: ++// btos: 0 ++// ctos: 1 ++// stos: 2 ++// itos: 3 ++// ltos: 4 ++// ftos: 5 ++// dtos: 6 ++// atos: 7 ++// vtos: 8 ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_access(cache, index, is_static, false); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ if (!is_static) pop_and_check_object(obj); ++ __ daddu(index, obj, off); ++ ++ const Address field(index, 0); ++ ++ Label Done, notByte, notBool, notInt, notShort, notChar, ++ notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ // btos ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(btos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ztos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ // itos ++ __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(itos); ++ ++ // Rewrite bytecode to be faster ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_igetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ // atos ++ //add for compressedoops ++ do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP); ++ __ push(atos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_agetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(ctos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ // stos ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg); ++ __ push(stos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ // FIXME : the load/store should be atomic, we have no simple method to do this in mips32 ++ // ltos ++ __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg); ++ __ push(ltos); ++ ++ // Don't rewrite to _fast_lgetfield for potential volatile case. ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg); ++ __ push(ftos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ // dtos ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg); ++ __ push(dtos); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++void TemplateTable::getfield(int byte_no) { ++ getfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_getfield(int byte_no) { ++ getfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::getstatic(int byte_no) { ++ getfield_or_static(byte_no, true); ++} ++ ++// The registers cache and index expected to be set before call. ++// The function may destroy various registers, just not the cache and index registers. ++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) { ++ transition(vtos, vtos); ++ ++ ByteSize cp_base_offset = ConstantPoolCache::base_offset(); ++ ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L1; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ assert_different_registers(cache, index, tmp4); ++ ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(AT, AT, 0); ++ __ beq(AT, R0, L1); ++ __ delayed()->nop(); ++ ++ __ get_cache_and_index_at_bcp(tmp2, tmp4, 1); ++ ++ if (is_static) { ++ __ move(tmp1, R0); ++ } else { ++ // Life is harder. The stack holds the value on top, followed by ++ // the object. We don't know the size of the value, though; it ++ // could be one or two words depending on its type. As a result, ++ // we must find the type to determine where the object is. ++ Label two_word, valsize_known; ++ __ dsll(AT, tmp4, Address::times_8); ++ __ daddu(AT, tmp2, AT); ++ __ ld(tmp3, AT, in_bytes(cp_base_offset + ++ ConstantPoolCacheEntry::flags_offset())); ++ __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift); ++ ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ __ move(tmp1, SP); ++ __ move(AT, ltos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ move(AT, dtos); ++ __ beq(tmp3, AT, two_word); ++ __ delayed()->nop(); ++ __ b(valsize_known); ++ __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) ); ++ ++ __ bind(two_word); ++ __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2)); ++ ++ __ bind(valsize_known); ++ // setup object pointer ++ __ ld(tmp1, tmp1, 0*wordSize); ++ } ++ // cache entry pointer ++ __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset)); ++ __ shl(tmp4, LogBytesPerWord); ++ __ daddu(tmp2, tmp2, tmp4); ++ // object (tos) ++ __ move(tmp3, SP); ++ // tmp1: object pointer set up above (NULL if static) ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ __ get_cache_and_index_at_bcp(cache, index, 1); ++ __ bind(L1); ++ } ++} ++ ++// used registers : T0, T1, T2, T3, T8 ++// T1 : flags ++// T2 : off ++// T3 : obj ++// T8 : volatile bit ++// see ConstantPoolCacheEntry::set_field for more info ++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) { ++ transition(vtos, vtos); ++ ++ const Register cache = T3; ++ const Register index = T0; ++ const Register obj = T3; ++ const Register off = T2; ++ const Register flags = T1; ++ const Register bc = T3; ++ ++ const Register scratch = T8; ++ ++ resolve_cache_and_index(byte_no, cache, index, sizeof(u2)); ++ jvmti_post_field_mod(cache, index, is_static); ++ load_field_cp_cache_entry(obj, cache, index, off, flags, is_static); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, flags); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ ++ Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble; ++ ++ assert(btos == 0, "change code, btos != 0"); ++ ++ // btos ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask); ++ __ bne(flags, R0, notByte); ++ __ delayed()->nop(); ++ ++ __ pop(btos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ztos ++ __ bind(notByte); ++ __ move(AT, ztos); ++ __ bne(flags, AT, notBool); ++ __ delayed()->nop(); ++ ++ __ pop(ztos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ andi(FSR, FSR, 0x1); ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // itos ++ __ bind(notBool); ++ __ move(AT, itos); ++ __ bne(flags, AT, notInt); ++ __ delayed()->nop(); ++ ++ __ pop(itos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // atos ++ __ bind(notInt); ++ __ move(AT, atos); ++ __ bne(flags, AT, notObj); ++ __ delayed()->nop(); ++ ++ __ pop(atos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ ++ do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR); ++ ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ctos ++ __ bind(notObj); ++ __ move(AT, ctos); ++ __ bne(flags, AT, notChar); ++ __ delayed()->nop(); ++ ++ __ pop(ctos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // stos ++ __ bind(notChar); ++ __ move(AT, stos); ++ __ bne(flags, AT, notShort); ++ __ delayed()->nop(); ++ ++ __ pop(stos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ltos ++ __ bind(notShort); ++ __ move(AT, ltos); ++ __ bne(flags, AT, notLong); ++ __ delayed()->nop(); ++ ++ __ pop(ltos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ // ftos ++ __ bind(notLong); ++ __ move(AT, ftos); ++ __ bne(flags, AT, notFloat); ++ __ delayed()->nop(); ++ ++ __ pop(ftos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no); ++ } ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ ++ // dtos ++ __ bind(notFloat); ++ __ move(AT, dtos); ++#ifdef ASSERT ++ __ bne(flags, AT, notDouble); ++ __ delayed()->nop(); ++#endif ++ ++ __ pop(dtos); ++ if (!is_static) { ++ pop_and_check_object(obj); ++ } ++ __ daddu(T9, obj, off); ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg); ++ if (!is_static && rc == may_rewrite) { ++ patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no); ++ } ++ ++#ifdef ASSERT ++ __ b(Done); ++ __ delayed()->nop(); ++ ++ __ bind(notDouble); ++ __ stop("Bad state"); ++#endif ++ ++ __ bind(Done); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++void TemplateTable::putfield(int byte_no) { ++ putfield_or_static(byte_no, false); ++} ++ ++void TemplateTable::nofast_putfield(int byte_no) { ++ putfield_or_static(byte_no, false, may_not_rewrite); ++} ++ ++void TemplateTable::putstatic(int byte_no) { ++ putfield_or_static(byte_no, true); ++} ++ ++// used registers : T1, T2, T3 ++// T1 : cp_entry ++// T2 : obj ++// T3 : value pointer ++void TemplateTable::jvmti_post_fast_field_mod() { ++ if (JvmtiExport::can_post_field_modification()) { ++ // Check to see if a field modification watch has been set before ++ // we take the time to call into the VM. ++ Label L2; ++ //kill AT, T1, T2, T3, T9 ++ Register tmp1 = T2; ++ Register tmp2 = T1; ++ Register tmp3 = T3; ++ Register tmp4 = T9; ++ __ li(AT, JvmtiExport::get_field_modification_count_addr()); ++ __ lw(tmp3, AT, 0); ++ __ beq(tmp3, R0, L2); ++ __ delayed()->nop(); ++ __ pop_ptr(tmp1); ++ __ verify_oop(tmp1); ++ __ push_ptr(tmp1); ++ switch (bytecode()) { // load values into the jvalue object ++ case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ push_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ push_d(FSF); break; ++ case Bytecodes::_fast_fputfield: __ push_f(); break; ++ case Bytecodes::_fast_lputfield: __ push_l(FSR); break; ++ default: ShouldNotReachHere(); ++ } ++ __ move(tmp3, SP); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1); ++ __ verify_oop(tmp1); ++ // tmp1: object pointer copied above ++ // tmp2: cache entry pointer ++ // tmp3: jvalue object on the stack ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::post_field_modification), ++ tmp1, tmp2, tmp3); ++ ++ switch (bytecode()) { // restore tos values ++ case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break; ++ case Bytecodes::_fast_bputfield: // fall through ++ case Bytecodes::_fast_zputfield: // fall through ++ case Bytecodes::_fast_sputfield: // fall through ++ case Bytecodes::_fast_cputfield: // fall through ++ case Bytecodes::_fast_iputfield: __ pop_i(FSR); break; ++ case Bytecodes::_fast_dputfield: __ pop_d(); break; ++ case Bytecodes::_fast_fputfield: __ pop_f(); break; ++ case Bytecodes::_fast_lputfield: __ pop_l(FSR); break; ++ default: break; ++ } ++ __ bind(L2); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T2 : index & off & field address ++// T3 : cache & obj ++// T1 : flags ++void TemplateTable::fast_storefield(TosState state) { ++ transition(state, vtos); ++ ++ const Register scratch = T8; ++ ++ ByteSize base = ConstantPoolCache::base_offset(); ++ ++ jvmti_post_fast_field_mod(); ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // test for volatile with T1 ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset())); ++ ++ // replace index with field offset from cache entry ++ __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset())); ++ ++ Label Done; ++ { ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, T1); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // Get object from stack ++ pop_and_check_object(T3); ++ ++ if (bytecode() != Bytecodes::_fast_aputfield) { ++ // field address ++ __ daddu(T2, T3, T2); ++ } ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_zputfield: ++ __ andi(FSR, FSR, 0x1); // boolean is true if LSB is 1 ++ __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_bputfield: ++ __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_sputfield: ++ __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_cputfield: ++ __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_iputfield: ++ __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_lputfield: ++ __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg); ++ break; ++ case Bytecodes::_fast_fputfield: ++ __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_dputfield: ++ __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg); ++ break; ++ case Bytecodes::_fast_aputfield: ++ do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// used registers : T2, T3, T1 ++// T3 : cp_entry & cache ++// T2 : index & offset ++void TemplateTable::fast_accessfield(TosState state) { ++ transition(atos, state); ++ ++ const Register scratch = T8; ++ ++ // do the JVMTI work here to avoid disturbing the register state below ++ if (JvmtiExport::can_post_field_access()) { ++ // Check to see if a field access watch has been set before we take ++ // the time to call into the VM. ++ Label L1; ++ __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr()); ++ __ lw(T3, AT, 0); ++ __ beq(T3, R0, L1); ++ __ delayed()->nop(); ++ // access constant pool cache entry ++ __ get_cache_entry_pointer_at_bcp(T3, T1, 1); ++ __ move(TSR, FSR); ++ __ verify_oop(FSR); ++ // FSR: object pointer copied above ++ // T3: cache entry pointer ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access), ++ FSR, T3); ++ __ move(FSR, TSR); ++ __ bind(L1); ++ } ++ ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 1); ++ ++ // Must prevent reordering of the following cp cache loads with bytecode load ++ __ sync(); ++ ++ // replace index with field offset from cache entry ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // FSR: object ++ __ verify_oop(FSR); ++ __ null_check(FSR); ++ // field addresses ++ __ daddu(FSR, FSR, T2); ++ ++ // access field ++ switch (bytecode()) { ++ case Bytecodes::_fast_bgetfield: ++ __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_sgetfield: ++ __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_cgetfield: ++ __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_igetfield: ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_lgetfield: ++ __ stop("should not be rewritten"); ++ break; ++ case Bytecodes::_fast_fgetfield: ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_dgetfield: ++ __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg); ++ break; ++ case Bytecodes::_fast_agetfield: ++ //add for compressedoops ++ do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0 ++// used registers : T1, T2, T3, T1 ++// T1 : obj & field address ++// T2 : off ++// T3 : cache ++// T1 : index ++void TemplateTable::fast_xaccess(TosState state) { ++ transition(vtos, state); ++ ++ const Register scratch = T8; ++ ++ // get receiver ++ __ ld(T1, aaddress(0)); ++ // access constant pool cache ++ __ get_cache_and_index_at_bcp(T3, T2, 2); ++ __ dsll(AT, T2, Address::times_8); ++ __ daddu(AT, T3, AT); ++ __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset())); ++ ++ { ++ __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset())); ++ __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift); ++ __ andr(scratch, scratch, AT); ++ ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++ ++ // make sure exception is reported in correct bcp range (getfield is ++ // next instruction) ++ __ daddiu(BCP, BCP, 1); ++ __ null_check(T1); ++ __ daddu(T1, T1, T2); ++ ++ if (state == itos) { ++ __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg); ++ } else if (state == atos) { ++ do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP); ++ __ verify_oop(FSR); ++ } else if (state == ftos) { ++ __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg); ++ } else { ++ ShouldNotReachHere(); ++ } ++ __ daddiu(BCP, BCP, -1); ++ ++ { ++ Label notVolatile; ++ __ beq(scratch, R0, notVolatile); ++ __ delayed()->nop(); ++ volatile_barrier(); ++ __ bind(notVolatile); ++ } ++} ++ ++ ++ ++//----------------------------------------------------------------------------- ++// Calls ++ ++void TemplateTable::count_calls(Register method, Register temp) { ++ // implemented elsewhere ++ ShouldNotReachHere(); ++} ++ ++// method, index, recv, flags: T1, T2, T3, T1 ++// byte_no = 2 for _invokevirtual, 1 else ++// T0 : return address ++// get the method & index of the invoke, and push the return address of ++// the invoke(first word in the frame) ++// this address is where the return code jmp to. ++// NOTE : this method will set T3&T1 as recv&flags ++void TemplateTable::prepare_invoke(int byte_no, ++ Register method, // linked method (or i-klass) ++ Register index, // itable index, MethodType, etc. ++ Register recv, // if caller wants to see it ++ Register flags // if caller wants to test it ++ ) { ++ // determine flags ++ const Bytecodes::Code code = bytecode(); ++ const bool is_invokeinterface = code == Bytecodes::_invokeinterface; ++ const bool is_invokedynamic = code == Bytecodes::_invokedynamic; ++ const bool is_invokehandle = code == Bytecodes::_invokehandle; ++ const bool is_invokevirtual = code == Bytecodes::_invokevirtual; ++ const bool is_invokespecial = code == Bytecodes::_invokespecial; ++ const bool load_receiver = (recv != noreg); ++ const bool save_flags = (flags != noreg); ++ assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),""); ++ assert(save_flags == (is_invokeinterface || is_invokevirtual), "need flags for vfinal"); ++ assert(flags == noreg || flags == T1, "error flags reg."); ++ assert(recv == noreg || recv == T3, "error recv reg."); ++ ++ // setup registers & access constant pool cache ++ if(recv == noreg) recv = T3; ++ if(flags == noreg) flags = T1; ++ assert_different_registers(method, index, recv, flags); ++ ++ // save 'interpreter return address' ++ __ save_bcp(); ++ ++ load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic); ++ ++ if (is_invokedynamic || is_invokehandle) { ++ Label L_no_push; ++ __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift)); ++ __ andr(AT, AT, flags); ++ __ beq(AT, R0, L_no_push); ++ __ delayed()->nop(); ++ // Push the appendix as a trailing parameter. ++ // This must be done before we get the receiver, ++ // since the parameter_size includes it. ++ Register tmp = SSR; ++ __ push(tmp); ++ __ move(tmp, index); ++ assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0"); ++ __ load_resolved_reference_at_index(index, tmp, recv); ++ __ pop(tmp); ++ __ push(index); // push appendix (MethodType, CallSite, etc.) ++ __ bind(L_no_push); ++ } ++ ++ // load receiver if needed (after appendix is pushed so parameter size is correct) ++ // Note: no return address pushed yet ++ if (load_receiver) { ++ __ move(AT, ConstantPoolCacheEntry::parameter_size_mask); ++ __ andr(recv, flags, AT); ++ // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0. ++ const int no_return_pc_pushed_yet = 0; // argument slot correction before we push return address ++ const int receiver_is_at_end = -1; // back off one slot to get receiver ++ Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end); ++ __ ld(recv, recv_addr); ++ __ verify_oop(recv); ++ } ++ if(save_flags) { ++ __ move(BCP, flags); ++ } ++ ++ // compute return type ++ __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift); ++ __ andi(flags, flags, 0xf); ++ ++ // Make sure we don't need to mask flags for tos_state_shift after the above shift ++ ConstantPoolCacheEntry::verify_tos_state_shift(); ++ // load return address ++ { ++ const address table = (address) Interpreter::invoke_return_entry_table_for(code); ++ __ li(AT, (long)table); ++ __ dsll(flags, flags, LogBytesPerWord); ++ __ daddu(AT, AT, flags); ++ __ ld(RA, AT, 0); ++ } ++ ++ if (save_flags) { ++ __ move(flags, BCP); ++ __ restore_bcp(); ++ } ++} ++ ++// used registers : T0, T3, T1, T2 ++// T3 : recv, this two register using convention is by prepare_invoke ++// T1 : flags, klass ++// Rmethod : method, index must be Rmethod ++void TemplateTable::invokevirtual_helper(Register index, ++ Register recv, ++ Register flags) { ++ ++ assert_different_registers(index, recv, flags, T2); ++ ++ // Test for an invoke of a final method ++ Label notFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, flags, AT); ++ __ beq(AT, R0, notFinal); ++ __ delayed()->nop(); ++ ++ Register method = index; // method must be Rmethod ++ assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention"); ++ ++ // do the call - the index is actually the method to call ++ // the index is indeed methodOop, for this is vfinal, ++ // see ConstantPoolCacheEntry::set_method for more info ++ ++ ++ // It's final, need a null check here! ++ __ null_check(recv); ++ ++ // profile this call ++ __ profile_final_call(T2); ++ ++ // T2: tmp, used for mdp ++ // method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T2, method, T9, true); ++ ++ __ jump_from_interpreted(method, T2); ++ ++ __ bind(notFinal); ++ ++ // get receiver klass ++ __ null_check(recv, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T2, recv); ++ ++ // profile this call ++ __ profile_virtual_call(T2, T0, T1); ++ ++ // get target methodOop & entry point ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below"); ++ __ dsll(AT, index, Address::times_ptr); ++ // T2: receiver ++ __ daddu(AT, T2, AT); ++ //this is a ualign read ++ __ ld(method, AT, base + vtableEntry::method_offset_in_bytes()); ++ __ profile_arguments_type(T2, method, T9, true); ++ __ jump_from_interpreted(method, T2); ++ ++} ++ ++void TemplateTable::invokevirtual(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3, T1); ++ // now recv & flags in T3, T1 ++ invokevirtual_helper(Rmethod, T3, T1); ++} ++ ++// T9 : entry ++// Rmethod : method ++void TemplateTable::invokespecial(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG, T3); ++ // now recv & flags in T3, T1 ++ __ verify_oop(T3); ++ __ null_check(T3); ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ __ move(T0, T3); ++} ++ ++void TemplateTable::invokestatic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, Rmethod, NOREG); ++ ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++} ++ ++// i have no idea what to do here, now. for future change. FIXME. ++void TemplateTable::fast_invokevfinal(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f2_byte, "use this argument"); ++ __ stop("fast_invokevfinal not used on mips64"); ++} ++ ++// used registers : T0, T1, T2, T3, T1, A7 ++// T0 : itable, vtable, entry ++// T1 : interface ++// T3 : receiver ++// T1 : flags, klass ++// Rmethod : index, method, this is required by interpreter_entry ++void TemplateTable::invokeinterface(int byte_no) { ++ transition(vtos, vtos); ++ //this method will use T1-T4 and T0 ++ assert(byte_no == f1_byte, "use this argument"); ++ prepare_invoke(byte_no, T2, Rmethod, T3, T1); ++ // T2: reference klass (from f1) if interface method ++ // Rmethod: method (from f2) ++ // T3: receiver ++ // T1: flags ++ ++ // First check for Object case, then private interface method, ++ // then regular interface method. ++ ++ // Special case of invokeinterface called for virtual method of ++ // java.lang.Object. See cpCache.cpp for details. ++ Label notObjectMethod; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notObjectMethod); ++ __ delayed()->nop(); ++ ++ invokevirtual_helper(Rmethod, T3, T1); ++ // no return from above ++ __ bind(notObjectMethod); ++ ++ Label no_such_interface; // for receiver subtype check ++ Register recvKlass; // used for exception processing ++ ++ // Check for private method invocation - indicated by vfinal ++ Label notVFinal; ++ __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift)); ++ __ andr(AT, T1, AT); ++ __ beq(AT, R0, notVFinal); ++ __ delayed()->nop(); ++ ++ // Get receiver klass into FSR - also a null check ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(FSR, T3); ++ ++ Label subtype; ++ __ check_klass_subtype(FSR, T2, T0, subtype); ++ // If we get here the typecheck failed ++ recvKlass = T1; ++ __ move(recvKlass, FSR); ++ __ b(no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bind(subtype); ++ ++ // do the call - rbx is actually the method to call ++ ++ __ profile_final_call(T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ __ jump_from_interpreted(Rmethod, T1); ++ // no return from above ++ __ bind(notVFinal); ++ ++ // Get receiver klass into T1 - also a null check ++ __ restore_locals(); ++ __ null_check(T3, oopDesc::klass_offset_in_bytes()); ++ __ load_klass(T1, T3); ++ ++ Label no_such_method; ++ ++ // Preserve method for throw_AbstractMethodErrorVerbose. ++ __ move(T3, Rmethod); ++ // Receiver subtype check against REFC. ++ // Superklass in T2. Subklass in T1. ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ T1, T2, noreg, ++ // outputs: scan temp. reg, scan temp. reg ++ T0, FSR, ++ no_such_interface, ++ /*return_method=*/false); ++ ++ ++ // profile this call ++ __ restore_bcp(); ++ __ profile_virtual_call(T1, T0, FSR); ++ ++ // Get declaring interface class from method, and itable index ++ __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset())); ++ __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset())); ++ __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes()); ++ __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset())); ++ __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max); ++ __ subu32(Rmethod, R0, Rmethod); ++ ++ // Preserve recvKlass for throw_AbstractMethodErrorVerbose. ++ __ move(FSR, T1); ++ __ lookup_interface_method(// inputs: rec. class, interface, itable index ++ FSR, T2, Rmethod, ++ // outputs: method, scan temp. reg ++ Rmethod, T0, ++ no_such_interface); ++ ++ // Rmethod: Method* to call ++ // T3: receiver ++ // Check for abstract method error ++ // Note: This should be done more efficiently via a throw_abstract_method_error ++ // interpreter entry point and a conditional jump to it in case of a null ++ // method. ++ __ beq(Rmethod, R0, no_such_method); ++ __ delayed()->nop(); ++ ++ __ profile_called_method(Rmethod, T0, T1); ++ __ profile_arguments_type(T1, Rmethod, T0, true); ++ ++ // do the call ++ // T3: receiver ++ // Rmethod: Method* ++ __ jump_from_interpreted(Rmethod, T1); ++ __ should_not_reach_here(); ++ ++ // exception handling code follows... ++ // note: must restore interpreter registers to canonical ++ // state for exception handling to work correctly! ++ ++ __ bind(no_such_method); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ recvKlass = A1; ++ Register method = A2; ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ if (method != T3) { __ move(method, T3); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++ ++ __ bind(no_such_interface); ++ // throw exception ++ __ pop(Rmethod); // pop return address (pushed by prepare_invoke) ++ __ restore_bcp(); ++ __ restore_locals(); ++ // Pass arguments for generating a verbose error message. ++ if (recvKlass != T1) { __ move(recvKlass, T1); } ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2); ++ // the call_VM checks for exception, so we should never return here. ++ __ should_not_reach_here(); ++} ++ ++ ++void TemplateTable::invokehandle(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ const Register T2_method = Rmethod; ++ const Register FSR_mtype = FSR; ++ const Register T3_recv = T3; ++ ++ prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv); ++ //??__ verify_method_ptr(T2_method); ++ __ verify_oop(T3_recv); ++ __ null_check(T3_recv); ++ ++ // T9: MethodType object (from cpool->resolved_references[f1], if necessary) ++ // T2_method: MH.invokeExact_MT method (from f2) ++ ++ // Note: T9 is already pushed (if necessary) by prepare_invoke ++ ++ // FIXME: profile the LambdaForm also ++ __ profile_final_call(T9); ++ ++ // T8: tmp, used for mdp ++ // T2_method: callee ++ // T9: tmp ++ // is_virtual: true ++ __ profile_arguments_type(T8, T2_method, T9, true); ++ ++ __ jump_from_interpreted(T2_method, T9); ++} ++ ++ void TemplateTable::invokedynamic(int byte_no) { ++ transition(vtos, vtos); ++ assert(byte_no == f1_byte, "use this argument"); ++ ++ //const Register Rmethod = T2; ++ const Register T2_callsite = T2; ++ ++ prepare_invoke(byte_no, Rmethod, T2_callsite); ++ ++ // T2: CallSite object (from cpool->resolved_references[f1]) ++ // Rmethod: MH.linkToCallSite method (from f2) ++ ++ // Note: T2_callsite is already pushed by prepare_invoke ++ // %%% should make a type profile for any invokedynamic that takes a ref argument ++ // profile this call ++ __ profile_call(T9); ++ ++ // T8: tmp, used for mdp ++ // Rmethod: callee ++ // T9: tmp ++ // is_virtual: false ++ __ profile_arguments_type(T8, Rmethod, T9, false); ++ ++ __ verify_oop(T2_callsite); ++ ++ __ jump_from_interpreted(Rmethod, T9); ++ } ++ ++//----------------------------------------------------------------------------- ++// Allocation ++// T1 : tags & buffer end & thread ++// T2 : object end ++// T3 : klass ++// T1 : object size ++// A1 : cpool ++// A2 : cp index ++// return object in FSR ++void TemplateTable::_new() { ++ transition(vtos, atos); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ ++ Label slow_case; ++ Label done; ++ Label initialize_header; ++ Label initialize_object; // including clearing the fields ++ Label allocate_shared; ++ ++ __ get_cpool_and_tags(A1, T1); ++ ++ // make sure the class we're about to instantiate has been resolved. ++ // Note: slow_case does a pop of stack, which is why we loaded class/pushed above ++ const int tags_offset = Array::base_offset_in_bytes(); ++ if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) { ++ __ gslbx(AT, T1, A2, tags_offset); ++ } else { ++ __ daddu(T1, T1, A2); ++ __ lb(AT, T1, tags_offset); ++ } ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // get InstanceKlass ++ __ load_resolved_klass_at_index(A1, A2, T3); ++ ++ // make sure klass is initialized & doesn't have finalizer ++ // make sure klass is fully initialized ++ __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset())); ++ __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // has_finalizer ++ __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) ); ++ __ andi(AT, T0, Klass::_lh_instance_slow_path_bit); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Allocate the instance ++ // 1) Try to allocate in the TLAB ++ // 2) if fail and the object is large allocate in the shared Eden ++ // 3) if the above fails (or is not applicable), go to a slow case ++ // (creates a new TLAB, etc.) ++ ++ const bool allow_shared_alloc = ++ Universe::heap()->supports_inline_contig_alloc(); ++ ++#ifndef OPT_THREAD ++ const Register thread = T8; ++ if (UseTLAB || allow_shared_alloc) { ++ __ get_thread(thread); ++ } ++#else ++ const Register thread = TREG; ++#endif ++ ++ if (UseTLAB) { ++ // get tlab_top ++ __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset())); ++ // get tlab_end ++ __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset())); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case); ++ __ delayed()->nop(); ++ __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset())); ++ ++ if (ZeroTLAB) { ++ // the fields have been already cleared ++ __ beq(R0, R0, initialize_header); ++ } else { ++ // initialize both the header and fields ++ __ beq(R0, R0, initialize_object); ++ } ++ __ delayed()->nop(); ++ } ++ ++ // Allocation in the shared Eden , if allowed ++ // T0 : instance size in words ++ if(allow_shared_alloc){ ++ __ bind(allocate_shared); ++ ++ Label done, retry; ++ Address heap_top(T1); ++ __ set64(T1, (long)Universe::heap()->top_addr()); ++ __ ld(FSR, heap_top); ++ ++ __ bind(retry); ++ __ set64(AT, (long)Universe::heap()->end_addr()); ++ __ ld(AT, AT, 0); ++ __ daddu(T2, FSR, T0); ++ __ slt(AT, AT, T2); ++ __ bne(AT, R0, slow_case); ++ __ delayed()->nop(); ++ ++ // Compare FSR with the top addr, and if still equal, store the new ++ // top addr in T2 at the address of the top addr pointer. Sets AT if was ++ // equal, and clears it otherwise. Use lock prefix for atomicity on MPs. ++ // ++ // FSR: object begin ++ // T2: object end ++ // T0: instance size in words ++ ++ // if someone beat us on the allocation, try again, otherwise continue ++ __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry); ++ ++ __ bind(done); ++ ++ __ incr_allocated_bytes(thread, T0, 0); ++ } ++ ++ if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) { ++ // The object is initialized before the header. If the object size is ++ // zero, go directly to the header initialization. ++ __ bind(initialize_object); ++ __ set64(AT, - sizeof(oopDesc)); ++ __ daddu(T0, T0, AT); ++ __ beq(T0, R0, initialize_header); ++ __ delayed()->nop(); ++ ++ // initialize remaining object fields: T0 is a multiple of 2 ++ { ++ Label loop; ++ __ daddu(T1, FSR, T0); ++ __ daddiu(T1, T1, -oopSize); ++ ++ __ bind(loop); ++ __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize); ++ __ bne(T1, FSR, loop); //dont clear header ++ __ delayed()->daddiu(T1, T1, -oopSize); ++ } ++ ++ //klass in T3, ++ // initialize object header only. ++ __ bind(initialize_header); ++ if (UseBiasedLocking) { ++ __ ld(AT, T3, in_bytes(Klass::prototype_header_offset())); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ()); ++ } else { ++ __ set64(AT, (long)markOopDesc::prototype()); ++ __ sd(AT, FSR, oopDesc::mark_offset_in_bytes()); ++ } ++ ++ __ store_klass_gap(FSR, R0); ++ __ store_klass(FSR, T3); ++ ++ { ++ SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0); ++ // Trigger dtrace event for fastpath ++ __ push(atos); ++ __ call_VM_leaf( ++ CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR); ++ __ pop(atos); ++ ++ } ++ __ b(done); ++ __ delayed()->nop(); ++ } ++ ++ // slow case ++ __ bind(slow_case); ++ __ get_constant_pool(A1); ++ __ get_unsigned_2_byte_index_at_bcp(A2, 1); ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2); ++ ++ // continue ++ __ bind(done); ++ __ sync(); ++} ++ ++void TemplateTable::newarray() { ++ transition(itos, atos); ++ __ lbu(A1, at_bcp(1)); ++ //type, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::anewarray() { ++ transition(itos, atos); ++ __ get_2_byte_integer_at_bcp(A2, AT, 1); ++ __ huswap(A2); ++ __ get_constant_pool(A1); ++ // cp, index, count ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR); ++ __ sync(); ++} ++ ++void TemplateTable::arraylength() { ++ transition(atos, itos); ++ __ null_check(FSR, arrayOopDesc::length_offset_in_bytes()); ++ __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes()); ++} ++ ++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always) ++// T2 : sub klass ++// T3 : cpool ++// T3 : super klass ++void TemplateTable::checkcast() { ++ transition(atos, atos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded. ++ // Then, GC will move the object in V0 to another places in heap. ++ // Therefore, We should never save such an object in register. ++ // Instead, we should save it in the stack. It can be modified automatically by the GC thread. ++ // After GC, the object address in FSR is changed to a new place. ++ // ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // klass already in cp, get superklass in T3 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ ++ // Come here on failure ++ // object is at FSR ++ __ jmp(Interpreter::_throw_ClassCastException_entry); ++ __ delayed()->nop(); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ ++ // Collect counts on whether this check-cast sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ b(done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); ++ } ++ __ bind(done); ++} ++ ++// i use T3 as cpool, T1 as tags, T2 as index ++// object always in FSR, superklass in T3, subklass in T2 ++void TemplateTable::instanceof() { ++ transition(atos, itos); ++ Label done, is_null, ok_is_subtype, quicked, resolved; ++ ++ __ beq(FSR, R0, is_null); ++ __ delayed()->nop(); ++ ++ // Get cpool & tags index ++ __ get_cpool_and_tags(T3, T1); ++ // get index ++ __ get_2_byte_integer_at_bcp(T2, AT, 1); ++ __ huswap(T2); ++ ++ // See if bytecode has already been quicked ++ // quicked ++ __ daddu(AT, T1, T2); ++ __ lb(AT, AT, Array::base_offset_in_bytes()); ++ if(os::is_MP()) { ++ __ sync(); // load acquire ++ } ++ __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class); ++ __ beq(AT, R0, quicked); ++ __ delayed()->nop(); ++ ++ __ push(atos); ++ const Register thread = TREG; ++#ifndef OPT_THREAD ++ __ get_thread(thread); ++#endif ++ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc)); ++ __ get_vm_result_2(T3, thread); ++ __ pop_ptr(FSR); ++ __ b(resolved); ++ __ delayed()->nop(); ++ ++ // get superklass in T3, subklass in T2 ++ __ bind(quicked); ++ __ load_resolved_klass_at_index(T3, T2, T3); ++ ++ __ bind(resolved); ++ // get subklass in T2 ++ //add for compressedoops ++ __ load_klass(T2, FSR); ++ ++ // Superklass in T3. Subklass in T2. ++ __ gen_subtype_check(T3, T2, ok_is_subtype); ++ // Come here on failure ++ __ b(done); ++ __ delayed(); __ move(FSR, R0); ++ ++ // Come here on success ++ __ bind(ok_is_subtype); ++ __ move(FSR, 1); ++ ++ // Collect counts on whether this test sees NULLs a lot or not. ++ if (ProfileInterpreter) { ++ __ beq(R0, R0, done); ++ __ delayed()->nop(); ++ __ bind(is_null); ++ __ profile_null_seen(T3); ++ } else { ++ __ bind(is_null); // same as 'done' ++ } ++ __ bind(done); ++ // FSR = 0: obj == NULL or obj is not an instanceof the specified klass ++ // FSR = 1: obj != NULL and obj is an instanceof the specified klass ++} ++ ++//-------------------------------------------------------- ++//-------------------------------------------- ++// Breakpoints ++void TemplateTable::_breakpoint() { ++ // Note: We get here even if we are single stepping.. ++ // jbug inists on setting breakpoints at every bytecode ++ // even if we are in single step mode. ++ ++ transition(vtos, vtos); ++ ++ // get the unpatched byte code ++ __ get_method(A1); ++ __ call_VM(NOREG, ++ CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::get_original_bytecode_at), ++ A1, BCP); ++ __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal ++ ++ // post the breakpoint event ++ __ get_method(A1); ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP); ++ ++ // complete the execution of original bytecode ++ __ dispatch_only_normal(vtos); ++} ++ ++//----------------------------------------------------------------------------- ++// Exceptions ++ ++void TemplateTable::athrow() { ++ transition(atos, vtos); ++ __ null_check(FSR); ++ __ jmp(Interpreter::throw_exception_entry()); ++ __ delayed()->nop(); ++} ++ ++//----------------------------------------------------------------------------- ++// Synchronization ++// ++// Note: monitorenter & exit are symmetric routines; which is reflected ++// in the assembly code structure as well ++// ++// Stack layout: ++// ++// [expressions ] <--- SP = expression stack top ++// .. ++// [expressions ] ++// [monitor entry] <--- monitor block top = expression stack bot ++// .. ++// [monitor entry] ++// [frame data ] <--- monitor block bot ++// ... ++// [return addr ] <--- FP ++ ++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer ++// object always in FSR ++void TemplateTable::monitorenter() { ++ transition(atos, vtos); ++ ++ // check for NULL object ++ __ null_check(FSR); ++ ++ const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset ++ * wordSize); ++ const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize); ++ Label allocated; ++ ++ // initialize entry pointer ++ __ move(c_rarg0, R0); ++ ++ // find a free slot in the monitor block (result in c_rarg0) ++ { ++ Label entry, loop, exit; ++ __ ld(T2, monitor_block_top); ++ __ b(entry); ++ __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ // free slot? ++ __ bind(loop); ++ __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes()); ++ __ movz(c_rarg0, T2, AT); ++ ++ __ beq(FSR, AT, exit); ++ __ delayed()->nop(); ++ __ daddiu(T2, T2, entry_size); ++ ++ __ bind(entry); ++ __ bne(T3, T2, loop); ++ __ delayed()->nop(); ++ __ bind(exit); ++ } ++ ++ __ bne(c_rarg0, R0, allocated); ++ __ delayed()->nop(); ++ ++ // allocate one if there's no free slot ++ { ++ Label entry, loop; ++ // 1. compute new pointers // SP: old expression stack top ++ __ ld(c_rarg0, monitor_block_top); ++ __ daddiu(SP, SP, - entry_size); ++ __ daddiu(c_rarg0, c_rarg0, - entry_size); ++ __ sd(c_rarg0, monitor_block_top); ++ __ b(entry); ++ __ delayed(); __ move(T3, SP); ++ ++ // 2. move expression stack contents ++ __ bind(loop); ++ __ ld(AT, T3, entry_size); ++ __ sd(AT, T3, 0); ++ __ daddiu(T3, T3, wordSize); ++ __ bind(entry); ++ __ bne(T3, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ __ bind(allocated); ++ // Increment bcp to point to the next bytecode, ++ // so exception handling for async. exceptions work correctly. ++ // The object has already been poped from the stack, so the ++ // expression stack looks correct. ++ __ daddiu(BCP, BCP, 1); ++ __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ lock_object(c_rarg0); ++ // check to make sure this monitor doesn't cause stack overflow after locking ++ __ save_bcp(); // in case of exception ++ __ generate_stack_overflow_check(0); ++ // The bcp has already been incremented. Just need to dispatch to next instruction. ++ ++ __ dispatch_next(vtos); ++} ++ ++// T2 : top ++// c_rarg0 : entry ++void TemplateTable::monitorexit() { ++ transition(atos, vtos); ++ ++ __ null_check(FSR); ++ ++ const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize); ++ Label found; ++ ++ // find matching slot ++ { ++ Label entry, loop; ++ __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize); ++ __ b(entry); ++ __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize); ++ ++ __ bind(loop); ++ __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes()); ++ __ beq(FSR, AT, found); ++ __ delayed()->nop(); ++ __ daddiu(c_rarg0, c_rarg0, entry_size); ++ __ bind(entry); ++ __ bne(T2, c_rarg0, loop); ++ __ delayed()->nop(); ++ } ++ ++ // error handling. Unlocking was not block-structured ++ Label end; ++ __ call_VM(NOREG, CAST_FROM_FN_PTR(address, ++ InterpreterRuntime::throw_illegal_monitor_state_exception)); ++ __ should_not_reach_here(); ++ ++ // call run-time routine ++ // c_rarg0: points to monitor entry ++ __ bind(found); ++ __ move(TSR, FSR); ++ __ unlock_object(c_rarg0); ++ __ move(FSR, TSR); ++ __ bind(end); ++} ++ ++ ++// Wide instructions ++void TemplateTable::wide() { ++ transition(vtos, vtos); ++ __ lbu(Rnext, at_bcp(1)); ++ __ dsll(T9, Rnext, Address::times_8); ++ __ li(AT, (long)Interpreter::_wentry_point); ++ __ daddu(AT, T9, AT); ++ __ ld(T9, AT, 0); ++ __ jr(T9); ++ __ delayed()->nop(); ++} ++ ++ ++void TemplateTable::multianewarray() { ++ transition(vtos, atos); ++ // last dim is on top of stack; we want address of first one: ++ // first_addr = last_addr + (ndims - 1) * wordSize ++ __ lbu(A1, at_bcp(3)); // dimension ++ __ daddiu(A1, A1, -1); ++ __ dsll(A1, A1, Address::times_8); ++ __ daddu(A1, SP, A1); // now A1 pointer to the count array on the stack ++ call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1); ++ __ lbu(AT, at_bcp(3)); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(SP, SP, AT); ++ __ sync(); ++} ++#endif // !CC_INTERP +diff --git a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp +new file mode 100644 +index 0000000000..6939914356 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp +@@ -0,0 +1,68 @@ ++/* ++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP ++ ++// These are the CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* JavaCallWrapper */ \ ++ /******************************/ \ ++ /******************************/ \ ++ /* JavaFrameAnchor */ \ ++ /******************************/ \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) \ ++ \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_STRUCTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_TYPES_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ ++ /* NOTE that we do not use the last_entry() macro here; it is used */ ++ /* in vmStructs__.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must */ ++ /* be present there) */ ++ ++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp +new file mode 100644 +index 0000000000..ac2a43edce +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp +@@ -0,0 +1,90 @@ ++/* ++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "memory/allocation.inline.hpp" ++#include "runtime/os.inline.hpp" ++#include "vm_version_ext_mips.hpp" ++ ++// VM_Version_Ext statics ++int VM_Version_Ext::_no_of_threads = 0; ++int VM_Version_Ext::_no_of_cores = 0; ++int VM_Version_Ext::_no_of_sockets = 0; ++bool VM_Version_Ext::_initialized = false; ++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0}; ++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0}; ++ ++void VM_Version_Ext::initialize_cpu_information(void) { ++ // do nothing if cpu info has been initialized ++ if (_initialized) { ++ return; ++ } ++ ++ _no_of_cores = os::processor_count(); ++ _no_of_threads = _no_of_cores; ++ _no_of_sockets = _no_of_cores; ++ if (is_loongson()) { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features()); ++ } else { ++ snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS"); ++ snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features()); ++ } ++ _initialized = true; ++} ++ ++int VM_Version_Ext::number_of_threads(void) { ++ initialize_cpu_information(); ++ return _no_of_threads; ++} ++ ++int VM_Version_Ext::number_of_cores(void) { ++ initialize_cpu_information(); ++ return _no_of_cores; ++} ++ ++int VM_Version_Ext::number_of_sockets(void) { ++ initialize_cpu_information(); ++ return _no_of_sockets; ++} ++ ++const char* VM_Version_Ext::cpu_name(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE); ++ return tmp; ++} ++ ++const char* VM_Version_Ext::cpu_description(void) { ++ initialize_cpu_information(); ++ char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing); ++ if (NULL == tmp) { ++ return NULL; ++ } ++ strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE); ++ return tmp; ++} +diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp +new file mode 100644 +index 0000000000..ffdcff0677 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp +@@ -0,0 +1,54 @@ ++/* ++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP ++ ++#include "runtime/vm_version.hpp" ++#include "utilities/macros.hpp" ++ ++class VM_Version_Ext : public VM_Version { ++ private: ++ static const size_t CPU_TYPE_DESC_BUF_SIZE = 256; ++ static const size_t CPU_DETAILED_DESC_BUF_SIZE = 4096; ++ ++ static int _no_of_threads; ++ static int _no_of_cores; ++ static int _no_of_sockets; ++ static bool _initialized; ++ static char _cpu_name[CPU_TYPE_DESC_BUF_SIZE]; ++ static char _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE]; ++ ++ public: ++ static int number_of_threads(void); ++ static int number_of_cores(void); ++ static int number_of_sockets(void); ++ ++ static const char* cpu_name(void); ++ static const char* cpu_description(void); ++ static void initialize_cpu_information(void); ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp +new file mode 100644 +index 0000000000..2e7b61390e +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp +@@ -0,0 +1,516 @@ ++/* ++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "asm/macroAssembler.inline.hpp" ++#include "memory/resourceArea.hpp" ++#include "runtime/java.hpp" ++#include "runtime/stubCodeGenerator.hpp" ++#include "runtime/vm_version.hpp" ++#ifdef TARGET_OS_FAMILY_linux ++# include "os_linux.inline.hpp" ++#endif ++ ++int VM_Version::_cpuFeatures; ++const char* VM_Version::_features_str = ""; ++VM_Version::CpuidInfo VM_Version::_cpuid_info = { 0, }; ++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false; ++bool VM_Version::_is_cpucfg_instruction_supported = true; ++bool VM_Version::_cpu_info_is_initialized = false; ++ ++static BufferBlob* stub_blob; ++static const int stub_size = 600; ++ ++extern "C" { ++ typedef void (*get_cpu_info_stub_t)(void*); ++} ++static get_cpu_info_stub_t get_cpu_info_stub = NULL; ++ ++ ++class VM_Version_StubGenerator: public StubCodeGenerator { ++ public: ++ ++ VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {} ++ ++ address generate_get_cpu_info() { ++ assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub"); ++# define __ _masm-> ++ ++ address start = __ pc(); ++ ++ __ enter(); ++ __ push(AT); ++ __ push(V0); ++ ++ __ li(AT, (long)0); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset())); ++ ++ __ li(AT, 1); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset())); ++ ++ __ li(AT, 2); ++ __ cpucfg(V0, AT); ++ __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset())); ++ ++ __ pop(V0); ++ __ pop(AT); ++ __ leave(); ++ __ jr(RA); ++ __ delayed()->nop(); ++# undef __ ++ ++ return start; ++ }; ++}; ++ ++uint32_t VM_Version::get_feature_flags_by_cpucfg() { ++ uint32_t result = 0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0) ++ result |= CPU_MMI; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0) ++ result |= CPU_MSA1_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0) ++ result |= CPU_MSA2_0; ++ if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0) ++ result |= CPU_CGP; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0) ++ result |= CPU_LSX1; ++ if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0) ++ result |= CPU_LSX2; ++ if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0) ++ result |= CPU_LASX; ++ if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0) ++ result |= CPU_LLSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0) ++ result |= CPU_TGTSYNC; ++ if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0) ++ result |= CPU_MUALP; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0) ++ result |= CPU_LEXT1; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0) ++ result |= CPU_LEXT2; ++ if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0) ++ result |= CPU_LEXT3; ++ if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0) ++ result |= CPU_LAMO; ++ if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0) ++ result |= CPU_LPIXU; ++ ++ result |= CPU_ULSYNC; ++ ++ return result; ++} ++ ++void read_cpu_info(const char *path, char *result) { ++ FILE *ptr; ++ char buf[1024]; ++ int i = 0; ++ if((ptr=fopen(path, "r")) != NULL) { ++ while(fgets(buf, 1024, ptr)!=NULL) { ++ strcat(result,buf); ++ i++; ++ if (i == 10) break; ++ } ++ fclose(ptr); ++ } else { ++ warning("Can't detect CPU info - cannot open %s", path); ++ } ++} ++ ++void strlwr(char *str) { ++ for (; *str!='\0'; str++) ++ *str = tolower(*str); ++} ++ ++int VM_Version::get_feature_flags_by_cpuinfo(int features) { ++ assert(!cpu_info_is_initialized(), "VM_Version should not be initialized"); ++ ++ char res[10240]; ++ int i; ++ memset(res, '\0', 10240 * sizeof(char)); ++ read_cpu_info("/proc/cpuinfo", res); ++ // res is converted to lower case ++ strlwr(res); ++ ++ if (strstr(res, "loongson")) { ++ // Loongson CPU ++ features |= CPU_LOONGSON; ++ ++ const struct Loongson_Cpuinfo loongson_cpuinfo[] = { ++ {L_3A1000, "3a1000"}, ++ {L_3B1500, "3b1500"}, ++ {L_3A2000, "3a2000"}, ++ {L_3B2000, "3b2000"}, ++ {L_3A3000, "3a3000"}, ++ {L_3B3000, "3b3000"}, ++ {L_2K1000, "2k1000"}, ++ {L_UNKNOWN, "unknown"} ++ }; ++ ++ // Loongson Family ++ int detected = 0; ++ for (i = 0; i <= L_UNKNOWN; i++) { ++ switch (i) { ++ // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed ++ // test PRID REV in /proc/cpuinfo ++ // 3A1000: V0.5, model name: ICT Loongson-3A V0.5 FPU V0.1 ++ // 3B1500: V0.7, model name: ICT Loongson-3B V0.7 FPU V0.1 ++ case L_3A1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3A1000 platform"); ++ } ++ break; ++ case L_3B1500: ++ if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) { ++ features |= CPU_LOONGSON_GS464; ++ detected++; ++ //tty->print_cr("3B1500 platform"); ++ } ++ break; ++ case L_3A2000: ++ case L_3B2000: ++ case L_3A3000: ++ case L_3B3000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS464E; ++ detected++; ++ //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform"); ++ } ++ break; ++ case L_2K1000: ++ if (strstr(res, loongson_cpuinfo[i].match_str)) { ++ features |= CPU_LOONGSON_GS264; ++ detected++; ++ //tty->print_cr("2K1000 platform"); ++ } ++ break; ++ case L_UNKNOWN: ++ if (detected == 0) { ++ detected++; ++ //tty->print_cr("unknown Loongson platform"); ++ } ++ break; ++ default: ++ ShouldNotReachHere(); ++ } ++ } ++ assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected"); ++ } else { // not Loongson ++ // Not Loongson CPU ++ //tty->print_cr("MIPS platform"); ++ } ++ ++ if (features & CPU_LOONGSON_GS264) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ features |= CPU_MSA1_0; ++ features |= CPU_LSX1; ++ } else if (features & CPU_LOONGSON_GS464) { ++ features |= CPU_LEXT1; ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ } else if (features & CPU_LOONGSON_GS464E) { ++ features |= CPU_LEXT1; ++ features |= CPU_LEXT2; ++ features |= CPU_LEXT3; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } else if (features & CPU_LOONGSON) { ++ // unknow loongson ++ features |= CPU_LLSYNC; ++ features |= CPU_TGTSYNC; ++ features |= CPU_ULSYNC; ++ } ++ VM_Version::_cpu_info_is_initialized = true; ++ ++ return features; ++} ++ ++void VM_Version::get_processor_features() { ++ ++ clean_cpuFeatures(); ++ ++ // test if cpucfg instruction is supported ++ VM_Version::_is_determine_cpucfg_supported_running = true; ++ __asm__ __volatile__( ++ ".insn \n\t" ++ ".word (0xc8080118)\n\t" // cpucfg zero, zero ++ : ++ : ++ : ++ ); ++ VM_Version::_is_determine_cpucfg_supported_running = false; ++ ++ if (supports_cpucfg()) { ++ get_cpu_info_stub(&_cpuid_info); ++ _cpuFeatures = get_feature_flags_by_cpucfg(); ++ // Only Loongson CPUs support cpucfg ++ _cpuFeatures |= CPU_LOONGSON; ++ } else { ++ _cpuFeatures = get_feature_flags_by_cpuinfo(0); ++ } ++ ++ _supports_cx8 = true; ++ ++ if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) { ++ FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650); ++ } ++ ++#ifdef COMPILER2 ++ if (MaxVectorSize > 0) { ++ if (!is_power_of_2(MaxVectorSize)) { ++ warning("MaxVectorSize must be a power of 2"); ++ MaxVectorSize = 8; ++ } ++ if (MaxVectorSize > 0 && supports_ps()) { ++ MaxVectorSize = 8; ++ } else { ++ MaxVectorSize = 0; ++ } ++ } ++ // ++ // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java. ++ // Vector optimization was closed by default. ++ // The reasons: ++ // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal. ++ // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions. ++ // ++ if (FLAG_IS_DEFAULT(MaxVectorSize)) { ++ MaxVectorSize = 0; ++ } ++ ++#endif ++ ++ if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 1000); ++ } ++ } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 2000); ++ } ++ } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 3000); ++ } ++ } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 4000); ++ } ++ } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) { ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } else { ++ assert(false, "Should Not Reach Here, what is the cpu type?"); ++ if (FLAG_IS_DEFAULT(UseSyncLevel)) { ++ FLAG_SET_DEFAULT(UseSyncLevel, 10000); ++ } ++ } ++ ++ if (supports_lext1()) { ++ if (FLAG_IS_DEFAULT(UseLEXT1)) { ++ FLAG_SET_DEFAULT(UseLEXT1, true); ++ } ++ } else if (UseLEXT1) { ++ warning("LEXT1 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT1, false); ++ } ++ ++ if (supports_lext2()) { ++ if (FLAG_IS_DEFAULT(UseLEXT2)) { ++ FLAG_SET_DEFAULT(UseLEXT2, true); ++ } ++ } else if (UseLEXT2) { ++ warning("LEXT2 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT2, false); ++ } ++ ++ if (supports_lext3()) { ++ if (FLAG_IS_DEFAULT(UseLEXT3)) { ++ FLAG_SET_DEFAULT(UseLEXT3, true); ++ } ++ } else if (UseLEXT3) { ++ warning("LEXT3 instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseLEXT3, false); ++ } ++ ++ if (UseLEXT2) { ++ if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) { ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1); ++ } ++ } else if (UseCountTrailingZerosInstructionMIPS64) { ++ if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) ++ warning("ctz/dctz instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0); ++ } ++ ++ if (TieredCompilation) { ++ if (!FLAG_IS_DEFAULT(TieredCompilation)) ++ warning("TieredCompilation not supported"); ++ FLAG_SET_DEFAULT(TieredCompilation, false); ++ } ++ ++ char buf[256]; ++ bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg(); ++ ++ // A note on the _features_string format: ++ // There are jtreg tests checking the _features_string for various properties. ++ // For some strange reason, these tests require the string to contain ++ // only _lowercase_ characters. Keep that in mind when being surprised ++ // about the unusual notation of features - and when adding new ones. ++ // Features may have one comma at the end. ++ // Furthermore, use one, and only one, separator space between features. ++ // Multiple spaces are considered separate tokens, messing up everything. ++ jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d", ++ (is_loongson() ? "mips-compatible loongson cpu" : "mips cpu"), ++ (is_gs464() ? ", gs464 (3a1000/3b1500)" : ""), ++ (is_gs464e() ? ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""), ++ (is_gs264() ? ", gs264 (2k1000)" : ""), ++ (is_unknown_loongson_cpu ? ", unknown loongson cpu" : ""), ++ (supports_dsp() ? ", dsp" : ""), ++ (supports_ps() ? ", ps" : ""), ++ (supports_3d() ? ", 3d" : ""), ++ (supports_mmi() ? ", mmi" : ""), ++ (supports_msa1_0() ? ", msa1_0" : ""), ++ (supports_msa2_0() ? ", msa2_0" : ""), ++ (supports_lsx1() ? ", lsx1" : ""), ++ (supports_lsx2() ? ", lsx2" : ""), ++ (supports_lasx() ? ", lasx" : ""), ++ (supports_lext1() ? ", lext1" : ""), ++ (supports_lext2() ? ", lext2" : ""), ++ (supports_lext3() ? ", lext3" : ""), ++ (supports_cgp() ? ", aes, crc, sha1, sha256, sha512" : ""), ++ (supports_lamo() ? ", lamo" : ""), ++ (supports_lpixu() ? ", lpixu" : ""), ++ (needs_llsync() ? ", llsync" : ""), ++ (needs_tgtsync() ? ", tgtsync": ""), ++ (needs_ulsync() ? ", ulsync": ""), ++ (supports_mualp() ? ", mualp" : ""), ++ UseSyncLevel); ++ _features_str = strdup(buf); ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchLines, 1); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) { ++ FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64); ++ } ++ ++ if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) { ++ FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1); ++ } ++ ++ if (UseSHA) { ++ warning("SHA instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA, false); ++ } ++ ++ if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) { ++ warning("SHA intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseSHA1Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA256Intrinsics, false); ++ FLAG_SET_DEFAULT(UseSHA512Intrinsics, false); ++ } ++ ++ if (UseAES) { ++ if (!FLAG_IS_DEFAULT(UseAES)) { ++ warning("AES instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAES, false); ++ } ++ } ++ ++ if (UseCRC32Intrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32Intrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32Intrinsics, false); ++ } ++ } ++ ++ if (UseCRC32CIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) { ++ warning("CRC32CIntrinsics instructions are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false); ++ } ++ } ++ ++ if (UseAESIntrinsics) { ++ if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) { ++ warning("AES intrinsics are not available on this CPU"); ++ FLAG_SET_DEFAULT(UseAESIntrinsics, false); ++ } ++ } ++ ++#ifdef COMPILER2 ++ if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) { ++ UseMontgomeryMultiplyIntrinsic = true; ++ } ++ if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) { ++ UseMontgomerySquareIntrinsic = true; ++ } ++#endif ++ ++ if (FLAG_IS_DEFAULT(UseFMA)) { ++ FLAG_SET_DEFAULT(UseFMA, true); ++ } ++ ++ UNSUPPORTED_OPTION(CriticalJNINatives); ++} ++ ++void VM_Version::initialize() { ++ ResourceMark rm; ++ // Making this stub must be FIRST use of assembler ++ ++ stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size); ++ if (stub_blob == NULL) { ++ vm_exit_during_initialization("Unable to allocate get_cpu_info_stub"); ++ } ++ CodeBuffer c(stub_blob); ++ VM_Version_StubGenerator g(&c); ++ get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t, ++ g.generate_get_cpu_info()); ++ ++ get_processor_features(); ++} +diff --git a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp +new file mode 100644 +index 0000000000..733a0af295 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp +@@ -0,0 +1,221 @@ ++/* ++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP ++ ++#include "runtime/abstract_vm_version.hpp" ++#include "runtime/globals_extension.hpp" ++#include "utilities/sizes.hpp" ++ ++class VM_Version: public Abstract_VM_Version { ++public: ++ ++ union Loongson_Cpucfg_Id1 { ++ uint32_t value; ++ struct { ++ uint32_t FP_CFG : 1, ++ FPREV : 3, ++ MMI : 1, ++ MSA1 : 1, ++ MSA2 : 1, ++ CGP : 1, ++ WRP : 1, ++ LSX1 : 1, ++ LSX2 : 1, ++ LASX : 1, ++ R6FXP : 1, ++ R6CRCP : 1, ++ R6FPP : 1, ++ CNT64 : 1, ++ LSLDR0 : 1, ++ LSPREF : 1, ++ LSPREFX : 1, ++ LSSYNCI : 1, ++ LSUCA : 1, ++ LLSYNC : 1, ++ TGTSYNC : 1, ++ LLEXC : 1, ++ SCRAND : 1, ++ MUALP : 1, ++ KMUALEn : 1, ++ ITLBT : 1, ++ LSUPERF : 1, ++ SFBP : 1, ++ CDMAP : 1, ++ : 1; ++ } bits; ++ }; ++ ++ union Loongson_Cpucfg_Id2 { ++ uint32_t value; ++ struct { ++ uint32_t LEXT1 : 1, ++ LEXT2 : 1, ++ LEXT3 : 1, ++ LSPW : 1, ++ LBT1 : 1, ++ LBT2 : 1, ++ LBT3 : 1, ++ LBTMMU : 1, ++ LPMP : 1, ++ LPMRev : 3, ++ LAMO : 1, ++ LPIXU : 1, ++ LPIXNU : 1, ++ LVZP : 1, ++ LVZRev : 3, ++ LGFTP : 1, ++ LGFTRev : 3, ++ LLFTP : 1, ++ LLFTRev : 3, ++ LCSRP : 1, ++ DISBLKLY : 1, ++ : 3; ++ } bits; ++ }; ++ ++protected: ++ ++ enum { ++ CPU_LOONGSON = (1 << 1), ++ CPU_LOONGSON_GS464 = (1 << 2), ++ CPU_LOONGSON_GS464E = (1 << 3), ++ CPU_LOONGSON_GS264 = (1 << 4), ++ CPU_MMI = (1 << 11), ++ CPU_MSA1_0 = (1 << 12), ++ CPU_MSA2_0 = (1 << 13), ++ CPU_CGP = (1 << 14), ++ CPU_LSX1 = (1 << 15), ++ CPU_LSX2 = (1 << 16), ++ CPU_LASX = (1 << 17), ++ CPU_LEXT1 = (1 << 18), ++ CPU_LEXT2 = (1 << 19), ++ CPU_LEXT3 = (1 << 20), ++ CPU_LAMO = (1 << 21), ++ CPU_LPIXU = (1 << 22), ++ CPU_LLSYNC = (1 << 23), ++ CPU_TGTSYNC = (1 << 24), ++ CPU_ULSYNC = (1 << 25), ++ CPU_MUALP = (1 << 26), ++ ++ //////////////////////add some other feature here////////////////// ++ } cpuFeatureFlags; ++ ++ enum Loongson_Family { ++ L_3A1000 = 0, ++ L_3B1500 = 1, ++ L_3A2000 = 2, ++ L_3B2000 = 3, ++ L_3A3000 = 4, ++ L_3B3000 = 5, ++ L_2K1000 = 6, ++ L_UNKNOWN = 7 ++ }; ++ ++ struct Loongson_Cpuinfo { ++ Loongson_Family id; ++ const char* const match_str; ++ }; ++ ++ static int _cpuFeatures; ++ static const char* _features_str; ++ static volatile bool _is_determine_cpucfg_supported_running; ++ static bool _is_cpucfg_instruction_supported; ++ static bool _cpu_info_is_initialized; ++ ++ struct CpuidInfo { ++ uint32_t cpucfg_info_id0; ++ Loongson_Cpucfg_Id1 cpucfg_info_id1; ++ Loongson_Cpucfg_Id2 cpucfg_info_id2; ++ uint32_t cpucfg_info_id3; ++ uint32_t cpucfg_info_id4; ++ uint32_t cpucfg_info_id5; ++ uint32_t cpucfg_info_id6; ++ uint32_t cpucfg_info_id8; ++ }; ++ ++ // The actual cpuid info block ++ static CpuidInfo _cpuid_info; ++ ++ static uint32_t get_feature_flags_by_cpucfg(); ++ static int get_feature_flags_by_cpuinfo(int features); ++ static void get_processor_features(); ++ ++public: ++ // Offsets for cpuid asm stub ++ static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); } ++ static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); } ++ static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); } ++ static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); } ++ static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); } ++ static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); } ++ static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); } ++ static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); } ++ ++ static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; } ++ ++ static void clean_cpuFeatures() { _cpuFeatures = 0; } ++ ++ // Initialization ++ static void initialize(); ++ ++ static bool cpu_info_is_initialized() { return _cpu_info_is_initialized; } ++ ++ static bool supports_cpucfg() { return _is_cpucfg_instruction_supported; } ++ static bool set_supports_cpucfg(bool value) { return _is_cpucfg_instruction_supported = value; } ++ ++ static bool is_loongson() { return _cpuFeatures & CPU_LOONGSON; } ++ static bool is_gs264() { return _cpuFeatures & CPU_LOONGSON_GS264; } ++ static bool is_gs464() { return _cpuFeatures & CPU_LOONGSON_GS464; } ++ static bool is_gs464e() { return _cpuFeatures & CPU_LOONGSON_GS464E; } ++ static bool supports_dsp() { return 0; /*not supported yet*/} ++ static bool supports_ps() { return 0; /*not supported yet*/} ++ static bool supports_3d() { return 0; /*not supported yet*/} ++ static bool supports_msa1_0() { return _cpuFeatures & CPU_MSA1_0; } ++ static bool supports_msa2_0() { return _cpuFeatures & CPU_MSA2_0; } ++ static bool supports_cgp() { return _cpuFeatures & CPU_CGP; } ++ static bool supports_mmi() { return _cpuFeatures & CPU_MMI; } ++ static bool supports_lsx1() { return _cpuFeatures & CPU_LSX1; } ++ static bool supports_lsx2() { return _cpuFeatures & CPU_LSX2; } ++ static bool supports_lasx() { return _cpuFeatures & CPU_LASX; } ++ static bool supports_lext1() { return _cpuFeatures & CPU_LEXT1; } ++ static bool supports_lext2() { return _cpuFeatures & CPU_LEXT2; } ++ static bool supports_lext3() { return _cpuFeatures & CPU_LEXT3; } ++ static bool supports_lamo() { return _cpuFeatures & CPU_LAMO; } ++ static bool supports_lpixu() { return _cpuFeatures & CPU_LPIXU; } ++ static bool needs_llsync() { return _cpuFeatures & CPU_LLSYNC; } ++ static bool needs_tgtsync() { return _cpuFeatures & CPU_TGTSYNC; } ++ static bool needs_ulsync() { return _cpuFeatures & CPU_ULSYNC; } ++ static bool supports_mualp() { return _cpuFeatures & CPU_MUALP; } ++ ++ //mips has no such instructions, use ll/sc instead ++ static bool supports_compare_and_exchange() { return false; } ++ ++ static const char* cpu_features() { return _features_str; } ++ ++}; ++ ++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp +new file mode 100644 +index 0000000000..86bd74d430 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/assembler.hpp" ++#include "code/vmreg.hpp" ++ ++ ++ ++void VMRegImpl::set_regName() { ++ Register reg = ::as_Register(0); ++ int i; ++ for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) { ++ regName[i++] = reg->name(); ++ regName[i++] = reg->name(); ++ reg = reg->successor(); ++ } ++ ++ FloatRegister freg = ::as_FloatRegister(0); ++ for ( ; i < ConcreteRegisterImpl::max_fpr ; ) { ++ regName[i++] = freg->name(); ++ regName[i++] = freg->name(); ++ freg = freg->successor(); ++ } ++ ++ for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) { ++ regName[i] = "NON-GPR-FPR"; ++ } ++} +diff --git a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp +new file mode 100644 +index 0000000000..8ccc8c513c +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_HPP ++ ++inline Register as_Register() { ++ assert( is_Register(), "must be"); ++ return ::as_Register(value() >> 1); ++} ++ ++inline FloatRegister as_FloatRegister() { ++ assert( is_FloatRegister(), "must be" ); ++ assert( is_even(value()), "must be" ); ++ return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1); ++} ++ ++inline bool is_Register() { ++ return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr; ++} ++ ++inline bool is_FloatRegister() { ++ return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr; ++} ++ ++inline bool is_concrete() { ++ assert(is_reg(), "must be"); ++ if(is_Register()) return true; ++ if(is_FloatRegister()) return true; ++ assert(false, "what register?"); ++ return false; ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP +diff --git a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp +new file mode 100644 +index 0000000000..12ad7361aa +--- /dev/null ++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp +@@ -0,0 +1,38 @@ ++/* ++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP ++ ++inline VMReg RegisterImpl::as_VMReg() { ++ if( this==noreg ) return VMRegImpl::Bad(); ++ return VMRegImpl::as_VMReg(encoding() << 1 ); ++} ++ ++inline VMReg FloatRegisterImpl::as_VMReg() { ++ return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr); ++} ++ ++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP +diff --git a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp +new file mode 100644 +index 0000000000..75c23e8088 +--- /dev/null ++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp +@@ -0,0 +1,340 @@ ++/* ++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "asm/macroAssembler.hpp" ++#include "code/vtableStubs.hpp" ++#include "interp_masm_mips.hpp" ++#include "memory/resourceArea.hpp" ++#include "oops/compiledICHolder.hpp" ++#include "oops/klass.inline.hpp" ++#include "oops/klassVtable.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "vmreg_mips.inline.hpp" ++#ifdef COMPILER2 ++#include "opto/runtime.hpp" ++#endif ++ ++ ++// machine-dependent part of VtableStubs: create VtableStub of correct size and ++// initialize its code ++ ++#define __ masm-> ++ ++#define T0 RT0 ++#define T1 RT1 ++#define T2 RT2 ++#define T3 RT3 ++#define T8 RT8 ++#define T9 RT9 ++ ++#ifndef PRODUCT ++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index); ++#endif ++ ++// used by compiler only; reciever in T0. ++// used registers : ++// Rmethod : receiver klass & method ++// NOTE: If this code is used by the C1, the receiver_location is always 0. ++// when reach here, receiver in T0, klass in T8 ++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(true); ++ VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation. ++ const int index_dependent_slop = 0; ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler* masm = new MacroAssembler(&cb); ++ Register t1 = T8, t2 = Rmethod; ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(t1, AT , 0); ++ __ addiu(t1, t1, 1); ++ __ sw(t1, AT,0); ++ } ++#endif ++ ++ // get receiver (need to skip return address on top of stack) ++ //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0"); ++ ++ // get receiver klass ++ address npe_addr = __ pc(); ++ //add for compressedoops ++ __ load_klass(t1, T0); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ // check offset vs vtable length ++ __ lw(t2, t1, in_bytes(Klass::vtable_length_offset())); ++ assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code"); ++ __ move(AT, vtable_index*vtableEntry::size()); ++ __ slt(AT, AT, t2); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ move(A2, vtable_index); ++ __ move(A1, A0); ++ ++ // VTABLE TODO: find upper bound for call_VM length. ++ start_pc = __ pc(); ++ __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2); ++ const ptrdiff_t estimate = 512; ++ const ptrdiff_t codesize = __ pc() - start_pc; ++ slop_delta = estimate - codesize; // call_VM varies in length, depending on data ++ assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ const Register method = Rmethod; ++ ++ // load methodOop and target address ++ start_pc = __ pc(); ++ // lookup_virtual_method generates 18 instructions (worst case) ++ __ lookup_virtual_method(t1, vtable_index, method); ++ slop_delta = 18*BytesPerInstWord - (int)(__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ ++#ifndef PRODUCT ++ if (DebugVtables) { ++ Label L; ++ __ beq(method, R0, L); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L); ++ __ delayed()->nop(); ++ __ stop("Vtable entry is NULL"); ++ __ bind(L); ++ } ++#endif // PRODUCT ++ ++ // T8: receiver klass ++ // T0: receiver ++ // Rmethod: methodOop ++ // T9: entry ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ masm->flush(); ++ slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop); ++ ++ return s; ++} ++ ++ ++// used registers : ++// T1 T2 ++// when reach here, the receiver in T0, klass in T1 ++VtableStub* VtableStubs::create_itable_stub(int itable_index) { ++ // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing. ++ const int stub_code_length = code_size_limit(false); ++ VtableStub* s = new(stub_code_length) VtableStub(false, itable_index); ++ // Can be NULL if there is no free space in the code cache. ++ if (s == NULL) { ++ return NULL; ++ } ++ // Count unused bytes in instruction sequences of variable size. ++ // We add them to the computed buffer size in order to avoid ++ // overflow in subsequently generated stubs. ++ address start_pc; ++ int slop_bytes = 0; ++ int slop_delta = 0; ++ int load_const_maxLen = 6*BytesPerInstWord; // load_const generates 6 instructions. Assume that as max size for li ++ ++ ResourceMark rm; ++ CodeBuffer cb(s->entry_point(), stub_code_length); ++ MacroAssembler *masm = new MacroAssembler(&cb); ++ ++ // we T8,T9 as temparary register, they are free from register allocator ++ Register t1 = T8, t2 = T2; ++ // Entry arguments: ++ // T1: Interface ++ // T0: Receiver ++ ++#if (!defined(PRODUCT) && defined(COMPILER2)) ++ if (CountCompiledCalls) { ++ start_pc = __ pc(); ++ __ li(AT, SharedRuntime::nof_megamorphic_calls_addr()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ lw(T8, AT, 0); ++ __ addiu(T8, T8,1); ++ __ sw(T8, AT, 0); ++ } ++#endif // PRODUCT ++ ++ const Register holder_klass_reg = T1; // declaring interface klass (DECC) ++ const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC) ++ ++ const Register icholder_reg = T1; ++ __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset()); ++ __ ld_ptr(holder_klass_reg, icholder_reg, CompiledICHolder::holder_metadata_offset()); ++ ++ Label L_no_such_interface; ++ ++ // get receiver klass (also an implicit null-check) ++ address npe_addr = __ pc(); ++ __ load_klass(t1, T0); ++ { ++ // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS. ++ // No dynamic code size variance here, so slop_bytes is not needed. ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below"); ++ assert(Assembler::is_simm16(base), "change this code"); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, resolved_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ } ++ ++ // add for compressedoops ++ __ load_klass(t1, T0); ++ // compute itable entry offset (in words) ++ const int base = in_bytes(Klass::vtable_start_offset()); ++ __ daddiu(t2, t1, base); ++ __ lw(AT, t1, in_bytes(Klass::vtable_length_offset())); ++ __ dsll(AT, AT, Address::times_8); ++ __ daddu(t2, t2, AT); ++ if (HeapWordsPerLong > 1) { ++ __ round_to(t2, BytesPerLong); ++ } ++ ++ Label hit, entry; ++ __ bind(entry); ++ ++ // Check that the entry is non-null. A null entry means that ++ // the receiver class doesn't implement the interface, and wasn't the ++ // same as when the caller was compiled. ++ __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes()); ++ __ beq(AT, R0, L_no_such_interface); ++ __ delayed()->nop(); ++ ++ __ bne(AT, holder_klass_reg, entry); ++ __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize); ++ ++ // We found a hit, move offset into T9 ++ __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize); ++ ++ // Compute itableMethodEntry. ++ const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) + ++ itableMethodEntry::method_offset_in_bytes(); ++ ++ // Get methodOop and entrypoint for compiler ++ const Register method = Rmethod; ++ __ dsll(AT, t2, Address::times_1); ++ __ addu(AT, AT, t1 ); ++ start_pc = __ pc(); ++ __ set64(t1, method_offset); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ addu(AT, AT, t1 ); ++ __ ld_ptr(method, AT, 0); ++ ++#ifdef ASSERT ++ if (DebugVtables) { ++ Label L1; ++ __ beq(method, R0, L1); ++ __ delayed()->nop(); ++ __ ld(AT, method,in_bytes(Method::from_compiled_offset())); ++ __ bne(AT, R0, L1); ++ __ delayed()->nop(); ++ __ stop("methodOop is null"); ++ __ bind(L1); ++ } ++#endif // ASSERT ++ ++ // Rmethod: methodOop ++ // T0: receiver ++ // T9: entry point ++ address ame_addr = __ pc(); ++ __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset())); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ __ bind(L_no_such_interface); ++ // Handle IncompatibleClassChangeError in itable stubs. ++ // More detailed error message. ++ // We force resolving of the call site by jumping to the "handle ++ // wrong method" stub, and so let the interpreter runtime do all the ++ // dirty work. ++ start_pc = __ pc(); ++ __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub()); ++ slop_delta = load_const_maxLen - (__ pc() - start_pc); ++ slop_bytes += slop_delta; ++ assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta); ++ __ jr(T9); ++ __ delayed()->nop(); ++ ++ masm->flush(); ++ bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0); ++ ++ return s; ++} ++ ++// NOTE : whenever you change the code above, dont forget to change the const here ++int VtableStub::pd_code_alignment() { ++ const unsigned int icache_line_size = wordSize; ++ return icache_line_size; ++} +diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +index 847f7d61d2..f570946090 100644 +--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp ++++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp +@@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + Bytecodes::Code code = op->bytecode(); +@@ -1608,6 +1611,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + __ bind(skip); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { +diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +index d34ea45c0b..f6b6dbdee3 100644 +--- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp ++++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp +@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ move(temp, addr); + } + +- +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr tmp = FrameMap::R0_opr; + __ load(new LIR_Address(base, disp, T_INT), tmp, info); +- __ cmp(condition, tmp, c); ++ __ cmp_branch(condition, tmp, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, +- int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr tmp = FrameMap::R0_opr; + __ load(new LIR_Address(base, disp, type), tmp, info); +- __ cmp(condition, reg, tmp); ++ __ cmp_branch(condition, reg, tmp, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp +index ef9b0833d3..c6b25bf10e 100644 +--- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp ++++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp +@@ -62,3 +62,24 @@ void LIR_Address::verify() const { + #endif + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +index 897be2209e..0c27cc20f3 100644 +--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp +@@ -379,6 +379,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); +@@ -1503,6 +1506,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + } + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, + CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); +diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +index ae297ac635..c786803e0f 100644 +--- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp +@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr scratch = FrameMap::Z_R1_opr; + __ load(new LIR_Address(base, disp, T_INT), scratch, info); +- __ cmp(condition, scratch, c); ++ __ cmp_branch(condition, scratch, c, T_INT, tgt); + } + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); ++ ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); ++ __ branch(condition, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); ++ + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid()) { + if (is_power_of_2(c + 1)) { +diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp +index 9507ca0856..2116e9af2b 100644 +--- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp ++++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp +@@ -56,3 +56,23 @@ void LIR_Address::verify() const { + } + #endif // PRODUCT + ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp +index e503159eb7..2e5609fec8 100644 +--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp ++++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp +@@ -599,6 +599,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + // The peephole pass fills the delay slot + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + Bytecodes::Code code = op->bytecode(); +@@ -1638,6 +1641,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + __ bind(skip); + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "unused on this code path"); +diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp +index a09a159722..a02ffafc77 100644 +--- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp ++++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp +@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ move(temp, addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, T_INT), o7opr, info); +- __ cmp(condition, o7opr, c); ++ __ cmp_branch(condition, o7opr, c, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + LIR_Opr o7opr = FrameMap::O7_opr; + __ load(new LIR_Address(base, disp, type), o7opr, info); +- __ cmp(condition, reg, o7opr); ++ __ cmp_branch(condition, reg, o7opr, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) { + assert(left != result, "should be different registers"); +diff --git a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp +index c21d2c1d9a..9cebb387e2 100644 +--- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp ++++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp +@@ -54,3 +54,24 @@ void LIR_Address::verify() const { + "wrong type for addresses"); + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +index cee3140f4f..7b76eb0b9e 100644 +--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp +@@ -1442,6 +1442,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) { + } + } + ++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) { ++ ShouldNotReachHere(); ++} ++ + void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) { + LIR_Opr src = op->in_opr(); + LIR_Opr dest = op->result_opr(); +@@ -2030,6 +2034,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L + } + } + ++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) { ++ ShouldNotReachHere(); ++} + + void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) { + assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method"); +diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +index 905708a9fa..1c6774e1d6 100644 +--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp +@@ -255,15 +255,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) { + __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr); + } + +-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) { + __ cmp_mem_int(condition, base, disp, c, info); ++ __ branch(condition, T_INT, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*); + +-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) { ++template ++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) { + __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info); ++ __ branch(condition, type, tgt); + } + ++// Explicit instantiation for all supported types. ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*); ++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*); + + bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) { + if (tmp->is_valid() && c > 0 && c < max_jint) { +diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp +index 92277ee063..20e283e302 100644 +--- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp ++++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp +@@ -72,3 +72,24 @@ void LIR_Address::verify() const { + #endif + } + #endif // PRODUCT ++ ++template ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) { ++ cmp(condition, left, right, info); ++ branch(condition, type, tgt); ++} ++ ++// Explicit instantiation for all supported types. ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*); ++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*); ++ ++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) { ++ cmp(condition, left, right); ++ branch(condition, type, block, unordered); ++} ++ ++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) { ++ cmp(condition, left, right); ++ cmove(condition, src1, src2, dst, type); ++} +diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +index 95d7e51501..8d7b623ee7 100644 +--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp ++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp +@@ -263,7 +263,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, + #define __ ce->masm()-> + + void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const { ++ LIR_Opr ref, ++ LIR_Opr res) const { + __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread)); + } + +diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +index 3687754e71..791e4ed43f 100644 +--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp ++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp +@@ -77,7 +77,8 @@ public: + + #ifdef COMPILER1 + void generate_c1_load_barrier_test(LIR_Assembler* ce, +- LIR_Opr ref) const; ++ LIR_Opr ref, ++ LIR_Opr res) const; + + void generate_c1_load_barrier_stub(LIR_Assembler* ce, + ZLoadBarrierStubC1* stub) const; +diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp +index f89a8c360b..68a2a3e4fa 100644 +--- a/src/hotspot/os/linux/os_linux.cpp ++++ b/src/hotspot/os/linux/os_linux.cpp +@@ -23,6 +23,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2021 Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + // no precompiled headers + #include "jvm.h" + #include "classfile/classLoader.hpp" +@@ -3977,6 +3983,8 @@ size_t os::Linux::find_large_page_size() { + IA64_ONLY(256 * M) + PPC_ONLY(4 * M) + S390_ONLY(1 * M) ++ MIPS64_ONLY(4 * M) ++ LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA + SPARC_ONLY(4 * M); + #endif // ZERO + +diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +new file mode 100644 +index 0000000000..30719a0340 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58006,12 +104733,1566 @@ index 0000000000..30719a0340 + */ diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp new file mode 100644 -index 0000000000..6c71de772e +index 0000000000..86f8c963f5 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp +@@ -0,0 +1,160 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP ++ ++#include "runtime/vm_version.hpp" ++ ++// Implementation of class atomic ++ ++template ++struct Atomic::PlatformAdd ++ : Atomic::AddAndFetch > ++{ ++ template ++ D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const { ++ //Unimplemented(); ++ return __sync_add_and_fetch(dest, add_value); ++ } ++}; ++ ++template<> ++template ++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ T __ret, __tmp; ++ ++ STATIC_ASSERT(4 == sizeof(T)); ++ __asm__ __volatile__ ( ++ "1: ll.w %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.w %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++template<> ++template ++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __ret; ++ jlong __tmp; ++ __asm__ __volatile__ ( ++ "1: ll.d %[__ret], %[__dest] \n\t" ++ " move %[__tmp], %[__val] \n\t" ++ " sc.d %[__tmp], %[__dest] \n\t" ++ " beqz %[__tmp], 1b \n\t" ++ ++ : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) ++ : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __ret; ++} ++ ++#if 0 ++template<> ++template ++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(1 == sizeof(T)); ++} ++ ++#else ++// No direct support for cmpxchg of bytes; emulate using int. ++template<> ++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {}; ++#endif ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(4 == sizeof(T)); ++ T __prev; ++ jint __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.w %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.w %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ ++ return __prev; ++} ++ ++template<> ++template ++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value, ++ T volatile* dest, ++ T compare_value, ++ atomic_memory_order order) const { ++ STATIC_ASSERT(8 == sizeof(T)); ++ T __prev; ++ jlong __cmp; ++ ++ __asm__ __volatile__ ( ++ "1: ll.d %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $r0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc.d %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ "2: \n\t" ++ " dbar 0 \n\t" ++ ++ : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) ++ : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : "memory" ++ ); ++ return __prev; ++} ++ ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..c9f675baca +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp +@@ -0,0 +1,37 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP ++ ++#include ++ ++// Efficient swapping of data bytes from Java byte ++// ordering to native byte ordering and vice versa. ++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); } ++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } ++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } ++ ++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..826c1fe39a +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp +@@ -0,0 +1,125 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP ++ ++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ (void)memmove(to, from, count * HeapWordSize); ++} ++ ++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ (void)memcpy(to, from, count * HeapWordSize); ++ break; ++ } ++} ++ ++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) { ++ switch (count) { ++ case 8: to[7] = from[7]; ++ case 7: to[6] = from[6]; ++ case 6: to[5] = from[5]; ++ case 5: to[4] = from[4]; ++ case 4: to[3] = from[3]; ++ case 3: to[2] = from[2]; ++ case 2: to[1] = from[1]; ++ case 1: to[0] = from[0]; ++ case 0: break; ++ default: ++ while (count-- > 0) { ++ *to++ = *from++; ++ } ++ break; ++ } ++} ++ ++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_words(from, to, count); ++} ++ ++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_disjoint_words(from, to, count); ++} ++ ++static void pd_conjoint_bytes(const void* from, void* to, size_t count) { ++ (void)memmove(to, from, count); ++} ++ ++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) { ++ pd_conjoint_bytes(from, to, count); ++} ++ ++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) { ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size"); ++ copy_conjoint_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_bytes_atomic(from, to, count); ++} ++ ++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jints_atomic((jint*)from, (jint*)to, count); ++} ++ ++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) { ++ pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count); ++} ++ ++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) { ++ //assert(!UseCompressedOops, "foo!"); ++ assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size"); ++ pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp +new file mode 100644 +index 0000000000..0b5247aa0b +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp +@@ -0,0 +1,43 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP ++ ++// Sets the default values for platform dependent flags used by the runtime system. ++// (see globals.hpp) ++ ++define_pd_global(bool, DontYieldALot, false); ++define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 2048); ++ ++define_pd_global(intx, CompilerThreadStackSize, 2048); ++ ++define_pd_global(uintx,JVMInvokeMethodSlack, 8192); ++ ++// Used on 64 bit platforms for UseCompressedOops base address ++define_pd_global(uintx,HeapBaseMinAddress, 2*G); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s +new file mode 100644 +index 0000000000..ebd73af0c5 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s +@@ -0,0 +1,25 @@ ++# ++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. ++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++# ++# This code is free software; you can redistribute it and/or modify it ++# under the terms of the GNU General Public License version 2 only, as ++# published by the Free Software Foundation. ++# ++# This code is distributed in the hope that it will be useful, but WITHOUT ++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++# version 2 for more details (a copy is included in the LICENSE file that ++# accompanied this code). ++# ++# You should have received a copy of the GNU General Public License version ++# 2 along with this work; if not, write to the Free Software Foundation, ++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++# ++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++# or visit www.oracle.com if you need additional information or have any ++# questions. ++# ++ ++ +diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +new file mode 100644 +index 0000000000..295d20e19e +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +@@ -0,0 +1,51 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP ++ ++#include "runtime/os.hpp" ++ ++// Included in orderAccess.hpp header file. ++ ++// Implementation of class OrderAccess. ++#define inlasm_sync() if (os::is_ActiveCoresMP()) \ ++ __asm__ __volatile__ ("nop" : : : "memory"); \ ++ else \ ++ __asm__ __volatile__ ("dbar 0" : : : "memory"); ++ ++inline void OrderAccess::loadload() { inlasm_sync(); } ++inline void OrderAccess::storestore() { inlasm_sync(); } ++inline void OrderAccess::loadstore() { inlasm_sync(); } ++inline void OrderAccess::storeload() { inlasm_sync(); } ++ ++inline void OrderAccess::acquire() { inlasm_sync(); } ++inline void OrderAccess::release() { inlasm_sync(); } ++inline void OrderAccess::fence() { inlasm_sync(); } ++ ++ ++#undef inlasm_sync ++ ++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp +new file mode 100644 +index 0000000000..cf5fff0d04 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp +@@ -0,0 +1,710 @@ ++/* ++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++// no precompiled headers ++#include "asm/macroAssembler.hpp" ++#include "classfile/classLoader.hpp" ++#include "classfile/systemDictionary.hpp" ++#include "classfile/vmSymbols.hpp" ++#include "code/icBuffer.hpp" ++#include "code/vtableStubs.hpp" ++#include "interpreter/interpreter.hpp" ++#include "memory/allocation.inline.hpp" ++#include "os_share_linux.hpp" ++#include "prims/jniFastGetField.hpp" ++#include "prims/jvm_misc.hpp" ++#include "runtime/arguments.hpp" ++#include "runtime/extendedPC.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/interfaceSupport.inline.hpp" ++#include "runtime/java.hpp" ++#include "runtime/javaCalls.hpp" ++#include "runtime/mutexLocker.hpp" ++#include "runtime/osThread.hpp" ++#include "runtime/sharedRuntime.hpp" ++#include "runtime/stubRoutines.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/timer.hpp" ++#include "utilities/events.hpp" ++#include "utilities/vmError.hpp" ++#include "compiler/disassembler.hpp" ++ ++// put OS-includes here ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++# include ++ ++#define REG_SP 3 ++#define REG_FP 22 ++ ++NOINLINE address os::current_stack_pointer() { ++ register void *sp __asm__ ("$r3"); ++ return (address) sp; ++} ++ ++char* os::non_memory_address_word() { ++ // Must never look like an address returned by reserve_memory, ++ // even in its subfields (as defined by the CPU immediate fields, ++ // if the CPU splits constants across multiple instructions). ++ ++ return (char*) -1; ++} ++ ++address os::Linux::ucontext_get_pc(const ucontext_t * uc) { ++ return (address)uc->uc_mcontext.__pc; ++} ++ ++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { ++ uc->uc_mcontext.__pc = (intptr_t)pc; ++} ++ ++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++} ++ ++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { ++ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread ++// is currently interrupted by SIGPROF. ++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal ++// frames. Currently we don't do that on Linux, so it's the same as ++// os::fetch_frame_from_context(). ++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread, ++ const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ assert(thread != NULL, "just checking"); ++ assert(ret_sp != NULL, "just checking"); ++ assert(ret_fp != NULL, "just checking"); ++ ++ return os::fetch_frame_from_context(uc, ret_sp, ret_fp); ++} ++ ++ExtendedPC os::fetch_frame_from_context(const void* ucVoid, ++ intptr_t** ret_sp, intptr_t** ret_fp) { ++ ++ ExtendedPC epc; ++ ucontext_t* uc = (ucontext_t*)ucVoid; ++ ++ if (uc != NULL) { ++ epc = ExtendedPC(os::Linux::ucontext_get_pc(uc)); ++ if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc); ++ if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc); ++ } else { ++ // construct empty ExtendedPC for return value checking ++ epc = ExtendedPC(NULL); ++ if (ret_sp) *ret_sp = (intptr_t *)NULL; ++ if (ret_fp) *ret_fp = (intptr_t *)NULL; ++ } ++ ++ return epc; ++} ++ ++frame os::fetch_frame_from_context(const void* ucVoid) { ++ intptr_t* sp; ++ intptr_t* fp; ++ ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp); ++ return frame(sp, fp, epc.pc()); ++} ++ ++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) { ++ address pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (Interpreter::contains(pc)) { ++ // interpreter performs stack banging after the fixed frame header has ++ // been generated while the compilers perform it before. To maintain ++ // semantic consistency between interpreted and compiled frames, the ++ // method returns the Java sender of the current frame. ++ *fr = os::fetch_frame_from_context(uc); ++ if (!fr->is_first_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } else { ++ // more complex code with compiled code ++ assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above"); ++ CodeBlob* cb = CodeCache::find_blob(pc); ++ if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) { ++ // Not sure where the pc points to, fallback to default ++ // stack overflow handling ++ return false; ++ } else { ++ // In compiled code, the stack banging is performed before LR ++ // has been saved in the frame. RA is live, and SP and FP ++ // belong to the caller. ++ intptr_t* fp = os::Linux::ucontext_get_fp(uc); ++ intptr_t* sp = os::Linux::ucontext_get_sp(uc); ++ address pc = (address)(uc->uc_mcontext.__gregs[1]); ++ *fr = frame(sp, fp, pc); ++ if (!fr->is_java_frame()) { ++ assert(fr->safe_for_sender(thread), "Safety check"); ++ assert(!fr->is_first_frame(), "Safety check"); ++ *fr = fr->java_sender(); ++ } ++ } ++ } ++ assert(fr->is_java_frame(), "Safety check"); ++ return true; ++} ++ ++// By default, gcc always save frame pointer on stack. It may get ++// turned off by -fomit-frame-pointer, ++frame os::get_sender_for_C_frame(frame* fr) { ++ return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); ++} ++ ++frame os::current_frame() { ++ intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset]; ++ frame myframe((intptr_t*)os::current_stack_pointer(), ++ (intptr_t*)fp, ++ CAST_FROM_FN_PTR(address, os::current_frame)); ++ if (os::is_first_C_frame(&myframe)) { ++ // stack is not walkable ++ return frame(); ++ } else { ++ return os::get_sender_for_C_frame(&myframe); ++ } ++} ++ ++extern "C" int ++JVM_handle_linux_signal(int sig, ++ siginfo_t* info, ++ void* ucVoid, ++ int abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx", ++ info->si_signo, ++ info->si_code, ++ info->si_errno, ++ info->si_addr); ++#endif ++ ++ ucontext_t* uc = (ucontext_t*) ucVoid; ++ ++ Thread* t = Thread::current_or_null_safe(); ++ ++ SignalHandlerMark shm(t); ++ ++ // Note: it's not uncommon that JNI code uses signal/sigset to install ++ // then restore certain signal handler (e.g. to temporarily block SIGPIPE, ++ // or have a SIGILL handler when detecting CPU type). When that happens, ++ // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To ++ // avoid unnecessary crash when libjsig is not preloaded, try handle signals ++ // that do not require siginfo/ucontext first. ++ ++ if (sig == SIGPIPE/* || sig == SIGXFSZ*/) { ++ // allow chained handler to go first ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++ return true; ++ } else { ++ if (PrintMiscellaneous && (WizardMode || Verbose)) { ++ warning("Ignoring SIGPIPE - see bug 4229104"); ++ } ++ return true; ++ } ++ } ++ ++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT ++ if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) { ++ handle_assert_poison_fault(ucVoid, info->si_addr); ++ return 1; ++ } ++#endif ++ ++ JavaThread* thread = NULL; ++ VMThread* vmthread = NULL; ++ if (os::Linux::signal_handlers_are_installed) { ++ if (t != NULL ){ ++ if(t->is_Java_thread()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a java thread"); ++#endif ++ thread = (JavaThread*)t; ++ } ++ else if(t->is_VM_thread()){ ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("this thread is a VM thread\n"); ++#endif ++ vmthread = (VMThread *)t; ++ } ++ } ++ } ++ ++ // Handle SafeFetch faults: ++ if (uc != NULL) { ++ address const pc = (address) os::Linux::ucontext_get_pc(uc); ++ if (pc && StubRoutines::is_safefetch_fault(pc)) { ++ os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc)); ++ return 1; ++ } ++ } ++ ++ // decide if this trap can be handled by a stub ++ address stub = NULL; ++ address pc = NULL; ++ ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("pc=%lx", pc); ++ os::print_context(tty, uc); ++#endif ++ //%note os_trap_1 ++ if (info != NULL && uc != NULL && thread != NULL) { ++ pc = (address) os::Linux::ucontext_get_pc(uc); ++ ++ // Handle ALL stack overflow variations here ++ if (sig == SIGSEGV) { ++ address addr = (address) info->si_addr; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("handle all stack overflow variations: "); ++ /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n", ++ addr, ++ thread->stack_base(), ++ thread->stack_base() - thread->stack_size()); ++ */ ++#endif ++ ++ // check if fault address is within thread stack ++ if (thread->on_local_stack(addr)) { ++ // stack overflow ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("stack exception check \n"); ++#endif ++ if (thread->in_stack_yellow_reserved_zone(addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in yellow zone\n"); ++#endif ++ if (thread->thread_state() == _thread_in_Java) { ++ if (thread->in_stack_reserved_zone(addr)) { ++ frame fr; ++ if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) { ++ assert(fr.is_java_frame(), "Must be a Java frame"); ++ frame activation = ++ SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr); ++ if (activation.sp() != NULL) { ++ thread->disable_stack_reserved_zone(); ++ if (activation.is_interpreted_frame()) { ++ thread->set_reserved_stack_activation((address)( ++ activation.fp() + frame::interpreter_frame_initial_sp_offset)); ++ } else { ++ thread->set_reserved_stack_activation((address)activation.unextended_sp()); ++ } ++ return 1; ++ } ++ } ++ } ++ // Throw a stack overflow exception. Guard pages will be reenabled ++ // while unwinding the stack. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in java\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW); ++ } else { ++ // Thread was in the vm or native code. Return and try to finish. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("this thread is in vm or native codes and return\n"); ++#endif ++ thread->disable_stack_yellow_reserved_zone(); ++ return 1; ++ } ++ } else if (thread->in_stack_red_zone(addr)) { ++ // Fatal red zone violation. Disable the guard pages and fall through ++ // to handle_unexpected_exception way down below. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is in red zone\n"); ++#endif ++ thread->disable_stack_red_zone(); ++ tty->print_raw_cr("An irrecoverable stack overflow has occurred."); ++ ++ // This is a likely cause, but hard to verify. Let's just print ++ // it as a hint. ++ tty->print_raw_cr("Please check if any of your loaded .so files has " ++ "enabled executable stack (see man page execstack(8))"); ++ } else { ++ // Accessing stack address below sp may cause SEGV if current ++ // thread has MAP_GROWSDOWN stack. This should only happen when ++ // current thread was created by user code with MAP_GROWSDOWN flag ++ // and then attached to VM. See notes in os_linux.cpp. ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("exception addr is neither in yellow zone nor in the red one\n"); ++#endif ++ if (thread->osthread()->expanding_stack() == 0) { ++ thread->osthread()->set_expanding_stack(); ++ if (os::Linux::manually_expand_stack(thread, addr)) { ++ thread->osthread()->clear_expanding_stack(); ++ return 1; ++ } ++ thread->osthread()->clear_expanding_stack(); ++ } else { ++ fatal("recursive segv. expanding stack."); ++ } ++ } ++ } ++ } // sig == SIGSEGV ++ ++ if (thread->thread_state() == _thread_in_Java) { ++ // Java thread running in Java code => find exception handler if any ++ // a fault inside compiled code, the interpreter, or a stub ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("java thread running in java code\n"); ++#endif ++ ++ // Handle signal from NativeJump::patch_verified_entry(). ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); ++#endif ++ stub = SharedRuntime::get_handle_wrong_method_stub(); ++ } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig); ++#endif ++ stub = SharedRuntime::get_poll_stub(pc); ++ } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) { ++ // BugId 4454115: A read from a MappedByteBuffer can fault ++ // here if the underlying file has been truncated. ++ // Do not crash the VM in such a case. ++ CodeBlob* cb = CodeCache::find_blob_unsafe(pc); ++ CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL; ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("cb = %lx, nm = %lx\n", cb, nm); ++#endif ++ if (nm != NULL && nm->has_unsafe_access()) { ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) { ++ // HACK: si_code does not work on linux 2.2.12-20!!! ++ int op = pc[0] & 0x3f; ++ int op1 = pc[3] & 0x3f; ++ //FIXME, Must port to LA code!! ++ switch (op) { ++ case 0x1e: //ddiv ++ case 0x1f: //ddivu ++ case 0x1a: //div ++ case 0x1b: //divu ++ case 0x34: //trap ++ // In LA, div_by_zero exception can only be triggered by explicit 'trap'. ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, ++ pc, ++ SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); ++ break; ++ default: ++ // TODO: handle more cases if we are using other x86 instructions ++ // that can generate SIGFPE signal on linux. ++ tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1); ++ //fatal("please update this code."); ++ } ++ } else if (sig == SIGSEGV && ++ !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("continuation for implicit exception\n"); ++#endif ++ // Determination of interpreter/vtable stub/compiled code null exception ++ stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); ++#endif ++ } ++ } else if (thread->thread_state() == _thread_in_vm && ++ sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ ++ thread->doing_unsafe_access()) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("SIGBUS in vm thread \n"); ++#endif ++ address next_pc = (address)((unsigned long)pc + sizeof(unsigned int)); ++ stub = SharedRuntime::handle_unsafe_access(thread, next_pc); ++ } ++ ++ // jni_fast_GetField can trap at certain pc's if a GC kicks in ++ // and the heap gets shrunk before the field access. ++ if ((sig == SIGSEGV) || (sig == SIGBUS)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("jni fast get trap: "); ++#endif ++ address addr = JNI_FastGetField::find_slowcase_pc(pc); ++ if (addr != (address)-1) { ++ stub = addr; ++ } ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("addr = %d, stub = %lx", addr, stub); ++#endif ++ } ++ ++ // Check to see if we caught the safepoint code in the ++ // process of write protecting the memory serialization page. ++ // It write enables the page immediately after protecting it ++ // so we can just return to retry the write. ++ if ((sig == SIGSEGV) && ++ os::is_memory_serialize_page(thread, (address) info->si_addr)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print("write protecting the memory serialiazation page\n"); ++#endif ++ // Block current thread until the memory serialize page permission restored. ++ os::block_on_serialize_page_trap(); ++ return true; ++ } ++ } ++ ++ if (stub != NULL) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("resolved stub=%lx\n",stub); ++#endif ++ // save all thread context in case we need to restore it ++ if (thread != NULL) thread->set_saved_exception_pc(pc); ++ ++ os::Linux::ucontext_set_pc(uc, stub); ++ return true; ++ } ++ ++ // signal-chaining ++ if (os::Linux::chained_handler(sig, info, ucVoid)) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("signal chaining\n"); ++#endif ++ return true; ++ } ++ ++ if (!abort_if_unrecognized) { ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("abort becauce of unrecognized\n"); ++#endif ++ // caller wants another chance, so give it to him ++ return false; ++ } ++ ++ if (pc == NULL && uc != NULL) { ++ pc = os::Linux::ucontext_get_pc(uc); ++ } ++ ++ // unmask current signal ++ sigset_t newset; ++ sigemptyset(&newset); ++ sigaddset(&newset, sig); ++ sigprocmask(SIG_UNBLOCK, &newset, NULL); ++#ifdef PRINT_SIGNAL_HANDLE ++ tty->print_cr("VMError in signal handler\n"); ++#endif ++ VMError::report_and_die(t, sig, pc, info, ucVoid); ++ ++ ShouldNotReachHere(); ++ return true; // Mute compiler ++} ++ ++void os::Linux::init_thread_fpu_state(void) { ++} ++ ++int os::Linux::get_fpu_control_word(void) { ++ return 0; // mute compiler ++} ++ ++void os::Linux::set_fpu_control_word(int fpu_control) { ++} ++ ++bool os::is_allocatable(size_t bytes) { ++ ++ if (bytes < 2 * G) { ++ return true; ++ } ++ ++ char* addr = reserve_memory(bytes, NULL); ++ ++ if (addr != NULL) { ++ release_memory(addr, bytes); ++ } ++ ++ return addr != NULL; ++} ++ ++//////////////////////////////////////////////////////////////////////////////// ++// thread stack ++ ++// Minimum usable stack sizes required to get to user code. Space for ++// HotSpot guard pages is added later. ++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K; ++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K; ++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K; ++ ++// Return default stack size for thr_type ++size_t os::Posix::default_stack_size(os::ThreadType thr_type) { ++ // Default stack size (compiler thread needs larger stack) ++ size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K); ++ return s; ++} ++ ++///////////////////////////////////////////////////////////////////////////// ++// helper functions for fatal error handler ++void os::print_register_info(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ ucontext_t *uc = (ucontext_t*)context; ++ ++ st->print_cr("Register to memory mapping:"); ++ st->cr(); ++ // this is horrendously verbose but the layout of the registers in the ++ // // context does not match how we defined our abstract Register set, so ++ // // we can't just iterate through the gregs area ++ // ++ // // this is only for the "general purpose" registers ++ st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ ++} ++ ++void os::print_context(outputStream *st, const void *context) { ++ if (context == NULL) return; ++ ++ const ucontext_t *uc = (const ucontext_t*)context; ++ st->print_cr("Registers:"); ++ st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); ++ st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->cr(); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->cr(); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->cr(); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->cr(); ++ st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); ++ st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); ++ st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); ++ st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->cr(); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); ++ st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); ++ st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->cr(); ++ st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); ++ st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->cr(); ++ st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); ++ st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->cr(); ++ st->cr(); ++ ++ intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc); ++ st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp)); ++ print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t)); ++ st->cr(); ++ ++ // Note: it may be unsafe to inspect memory near pc. For example, pc may ++ // point to garbage if entry point in an nmethod is corrupted. Leave ++ // this at the end, and hope for the best. ++ address pc = os::Linux::ucontext_get_pc(uc); ++ st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc)); ++ print_hex_dump(st, pc - 64, pc + 64, sizeof(char)); ++ Disassembler::decode(pc - 80, pc + 80, st); ++} ++ ++void os::setup_fpu() { ++ // no use for LA ++} ++ ++#ifndef PRODUCT ++void os::verify_stack_alignment() { ++ assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment"); ++} ++#endif ++ ++int os::extra_bang_size_in_bytes() { ++ // LA does not require the additional stack bang. ++ return 0; ++} ++ ++bool os::is_ActiveCoresMP() { ++ return UseActiveCoresMP && _initial_active_processor_count == 1; ++} +diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +new file mode 100644 +index 0000000000..fa02f8ba2f --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp -@@ -0,0 +1,160 @@ ++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +@@ -0,0 +1,38 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP ++ ++ static void setup_fpu(); ++ static bool is_allocatable(size_t bytes); ++ ++ // Used to register dynamic code cache area with the OS ++ // Note: Currently only used in 64 bit Windows implementations ++ static bool register_code_area(char *low, char *high) { return true; } ++ ++ static bool is_ActiveCoresMP(); ++ ++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp +new file mode 100644 +index 0000000000..cf3a596387 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp +@@ -0,0 +1,56 @@ ++/* ++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++ ++ ++inline void Prefetch::read (void *loc, intx interval) { ++// According to previous and present SPECjbb2015 score, ++// comment prefetch is better than if (interval >= 0) prefetch branch. ++// So choose comment prefetch as the base line. ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 0, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++inline void Prefetch::write(void *loc, intx interval) { ++// Ditto ++#if 0 ++ __asm__ __volatile__ ( ++ " preld 8, %[__loc] \n" ++ : ++ : [__loc] "m"( *((address)loc + interval) ) ++ : "memory" ++ ); ++#endif ++} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp +new file mode 100644 +index 0000000000..a1a9f181bd +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp +@@ -0,0 +1,116 @@ ++/* ++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "memory/metaspaceShared.hpp" ++#include "runtime/frame.inline.hpp" ++#include "runtime/thread.inline.hpp" ++#include "runtime/sharedRuntime.hpp" ++ ++void JavaThread::pd_initialize() ++{ ++ _anchor.clear(); ++} ++ ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ ++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is ++// currently interrupted by SIGPROF ++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, ++ void* ucontext, bool isInJava) { ++ ++ assert(Thread::current() == this, "caller must be current thread"); ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++ ++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) { ++ return pd_get_top_frame(fr_addr, ucontext, isInJava); ++} ++ ++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) { ++ assert(this->is_Java_thread(), "must be JavaThread"); ++ JavaThread* jt = (JavaThread *)this; ++ ++ // If we have a last_Java_frame, then we should use it even if ++ // isInJava == true. It should be more reliable than ucontext info. ++ if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) { ++ *fr_addr = jt->pd_last_frame(); ++ return true; ++ } ++ ++ // At this point, we don't have a last_Java_frame, so ++ // we try to glean some information out of the ucontext ++ // if we were running Java code when SIGPROF came in. ++ if (isInJava) { ++ ucontext_t* uc = (ucontext_t*) ucontext; ++ ++ intptr_t* ret_fp; ++ intptr_t* ret_sp; ++ ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc, ++ &ret_sp, &ret_fp); ++ if (addr.pc() == NULL || ret_sp == NULL ) { ++ // ucontext wasn't useful ++ return false; ++ } ++ ++ if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) { ++ // In the middle of a trampoline call. Bail out for safety. ++ // This happens rarely so shouldn't affect profiling. ++ return false; ++ } ++ ++ frame ret_frame(ret_sp, ret_fp, addr.pc()); ++ if (!ret_frame.safe_for_sender(jt)) { ++#ifdef COMPILER2 ++ // C2 and JVMCI use ebp as a general register see if NULL fp helps ++ frame ret_frame2(ret_sp, NULL, addr.pc()); ++ if (!ret_frame2.safe_for_sender(jt)) { ++ // nothing else to try if the frame isn't good ++ return false; ++ } ++ ret_frame = ret_frame2; ++#else ++ // nothing else to try if the frame isn't good ++ return false; ++#endif // COMPILER2_OR_JVMCI ++ } ++ *fr_addr = ret_frame; ++ return true; ++ } ++ ++ // nothing else to try ++ return false; ++} ++ ++void JavaThread::cache_global_variables() { } +diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +new file mode 100644 +index 0000000000..a3ac28ebd3 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +@@ -0,0 +1,66 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++ ++ private: ++ void pd_initialize(); ++ ++ frame pd_last_frame(); ++ ++ public: ++ // Mutators are highly dangerous.... ++ intptr_t* last_Java_fp() { return _anchor.last_Java_fp(); } ++ void set_last_Java_fp(intptr_t* fp) { _anchor.set_last_Java_fp(fp); } ++ ++ void set_base_of_stack_pointer(intptr_t* base_sp) { ++ } ++ ++ static ByteSize last_Java_fp_offset() { ++ return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset(); ++ } ++ ++ intptr_t* base_of_stack_pointer() { ++ return NULL; ++ } ++ void record_base_of_stack_pointer() { ++ } ++ ++ bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext, ++ bool isInJava); ++ ++ bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava); ++private: ++ bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava); ++public: ++ ++ // These routines are only used on cpu architectures that ++ // have separate register stacks (Itanium). ++ static bool register_stack_overflow() { return false; } ++ static void enable_register_stack_guard() {} ++ static void disable_register_stack_guard() {} ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp +new file mode 100644 +index 0000000000..a39cb79bb1 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp +@@ -0,0 +1,55 @@ ++/* ++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP ++ ++// These are the OS and CPU-specific fields, types and integer ++// constants required by the Serviceability Agent. This file is ++// referenced by vmStructs.cpp. ++ ++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ \ ++ /******************************/ \ ++ /* Threads (NOTE: incomplete) */ \ ++ /******************************/ \ ++ nonstatic_field(OSThread, _thread_id, pid_t) \ ++ nonstatic_field(OSThread, _pthread_id, pthread_t) ++ ++ ++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \ ++ \ ++ /**********************/ \ ++ /* Posix Thread IDs */ \ ++ /**********************/ \ ++ \ ++ declare_integer_type(pid_t) \ ++ declare_unsigned_integer_type(pthread_t) ++ ++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) ++ ++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp +new file mode 100644 +index 0000000000..30719a0340 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp +@@ -0,0 +1,24 @@ ++/* ++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * @@ -58034,9 +106315,39 @@ index 0000000000..6c71de772e + * questions. + * + */ +diff --git a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp +new file mode 100644 +index 0000000000..cd7cecad63 +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp +@@ -0,0 +1,191 @@ ++/* ++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP + +#include "runtime/vm_version.hpp" + @@ -58062,13 +106373,21 @@ index 0000000000..6c71de772e + + STATIC_ASSERT(4 == sizeof(T)); + __asm__ __volatile__ ( -+ "1: ll.w %[__ret], %[__dest] \n\t" ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " ll %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" -+ " sc.w %[__tmp], %[__dest] \n\t" ++ " sc %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) -+ : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value) ++ : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value) + : "memory" + ); + @@ -58084,16 +106403,23 @@ index 0000000000..6c71de772e + T __ret; + jlong __tmp; + __asm__ __volatile__ ( -+ "1: ll.d %[__ret], %[__dest] \n\t" ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1: sync\n\t" ++ " lld %[__ret], %[__dest] \n\t" + " move %[__tmp], %[__val] \n\t" -+ " sc.d %[__tmp], %[__dest] \n\t" ++ " scd %[__tmp], %[__dest] \n\t" + " beqz %[__tmp], 1b \n\t" ++ " nop \n\t" ++ ++ " .set pop\n\t" + + : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp) -+ : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) ++ : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value) + : "memory" + ); -+ + return __ret; +} + @@ -58124,17 +106450,25 @@ index 0000000000..6c71de772e + jint __cmp; + + __asm__ __volatile__ ( -+ "1: ll.w %[__prev], %[__dest] \n\t" -+ " bne %[__prev], %[__old], 2f \n\t" -+ " move %[__cmp], $r0 \n\t" -+ " move %[__cmp], %[__new] \n\t" -+ " sc.w %[__cmp], %[__dest] \n\t" -+ " beqz %[__cmp], 1b \n\t" ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " ll %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " sc %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" + "2: \n\t" -+ " dbar 0 \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) -+ : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + @@ -58152,33 +106486,41 @@ index 0000000000..6c71de772e + jlong __cmp; + + __asm__ __volatile__ ( -+ "1: ll.d %[__prev], %[__dest] \n\t" -+ " bne %[__prev], %[__old], 2f \n\t" -+ " move %[__cmp], $r0 \n\t" -+ " move %[__cmp], %[__new] \n\t" -+ " sc.d %[__cmp], %[__dest] \n\t" -+ " beqz %[__cmp], 1b \n\t" ++ " .set push\n\t" ++ " .set mips64\n\t" ++ " .set noreorder\n\t" ++ ++ "1:sync \n\t" ++ " lld %[__prev], %[__dest] \n\t" ++ " bne %[__prev], %[__old], 2f \n\t" ++ " move %[__cmp], $0 \n\t" ++ " move %[__cmp], %[__new] \n\t" ++ " scd %[__cmp], %[__dest] \n\t" ++ " beqz %[__cmp], 1b \n\t" ++ " nop \n\t" + "2: \n\t" -+ " dbar 0 \n\t" ++ " sync \n\t" ++ ++ " .set pop\n\t" + + : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp) -+ : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) ++ : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value), [__new] "r" (exchange_value) + : "memory" + ); + return __prev; +} + + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp ++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp new file mode 100644 -index 0000000000..874ef835ea +index 0000000000..5b5cd10aa5 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp ++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp @@ -0,0 +1,37 @@ +/* + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58201,8 +106543,8 @@ index 0000000000..874ef835ea + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP + +#include + @@ -58212,16 +106554,16 @@ index 0000000000..874ef835ea +inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); } +inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); } + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp ++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp new file mode 100644 -index 0000000000..b1a2de2587 +index 0000000000..3fd6ef7b36 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp ++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58244,8 +106586,8 @@ index 0000000000..b1a2de2587 + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP + +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) { + (void)memmove(to, from, count * HeapWordSize); @@ -58343,16 +106685,16 @@ index 0000000000..b1a2de2587 + pd_conjoint_oops_atomic((oop*)from, (oop*)to, count); +} + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp ++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp new file mode 100644 -index 0000000000..f8546270e6 +index 0000000000..f1599ac5f1 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp -@@ -0,0 +1,43 @@ ++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp +@@ -0,0 +1,51 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58375,33 +106717,41 @@ index 0000000000..f8546270e6 + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP + +// Sets the default values for platform dependent flags used by the runtime system. +// (see globals.hpp) + +define_pd_global(bool, DontYieldALot, false); -+define_pd_global(intx, ThreadStackSize, 2048); // 0 => use system default -+define_pd_global(intx, VMThreadStackSize, 2048); ++#ifdef MIPS64 ++define_pd_global(intx, ThreadStackSize, 1024); // 0 => use system default ++define_pd_global(intx, VMThreadStackSize, 1024); ++#else ++// ThreadStackSize 320 allows a couple of test cases to run while ++// keeping the number of threads that can be created high. System ++// default ThreadStackSize appears to be 512 which is too big. ++define_pd_global(intx, ThreadStackSize, 320); ++define_pd_global(intx, VMThreadStackSize, 512); ++#endif // MIPS64 + -+define_pd_global(intx, CompilerThreadStackSize, 2048); ++define_pd_global(intx, CompilerThreadStackSize, 0); + +define_pd_global(uintx,JVMInvokeMethodSlack, 8192); + +// Used on 64 bit platforms for UseCompressedOops base address +define_pd_global(uintx,HeapBaseMinAddress, 2*G); + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s ++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s new file mode 100644 -index 0000000000..ebd73af0c5 +index 0000000000..36c8d810c3 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s ++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s @@ -0,0 +1,25 @@ +# +# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved. -+# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. +# +# This code is free software; you can redistribute it and/or modify it @@ -58424,11 +106774,11 @@ index 0000000000..ebd73af0c5 +# + + -diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp +diff --git a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp new file mode 100644 -index 0000000000..c7becbb96f +index 0000000000..bf9d679730 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp ++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. @@ -58455,8 +106805,8 @@ index 0000000000..c7becbb96f + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP + +#include "runtime/os.hpp" + @@ -58466,7 +106816,7 @@ index 0000000000..c7becbb96f +#define inlasm_sync() if (os::is_ActiveCoresMP()) \ + __asm__ __volatile__ ("nop" : : : "memory"); \ + else \ -+ __asm__ __volatile__ ("dbar 0" : : : "memory"); ++ __asm__ __volatile__ ("sync" : : : "memory"); + +inline void OrderAccess::loadload() { inlasm_sync(); } +inline void OrderAccess::storestore() { inlasm_sync(); } @@ -58480,16 +106830,16 @@ index 0000000000..c7becbb96f + +#undef inlasm_sync + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp ++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp new file mode 100644 -index 0000000000..2eee2eb549 +index 0000000000..d035d8edbb --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp -@@ -0,0 +1,886 @@ ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp +@@ -0,0 +1,1020 @@ +/* + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -58562,11 +106912,11 @@ index 0000000000..2eee2eb549 +# include +# include + -+#define REG_SP 3 -+#define REG_FP 22 ++#define REG_SP 29 ++#define REG_FP 30 + -+NOINLINE address os::current_stack_pointer() { -+ register void *sp __asm__ ("$r3"); ++address os::current_stack_pointer() { ++ register void *sp __asm__ ("$29"); + return (address) sp; +} + @@ -58579,20 +106929,19 @@ index 0000000000..2eee2eb549 +} + +address os::Linux::ucontext_get_pc(const ucontext_t * uc) { -+ //return (address)uc->uc_mcontext.pc; -+ return (address)uc->uc_mcontext.__pc; ++ return (address)uc->uc_mcontext.pc; +} + +void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) { -+ uc->uc_mcontext.__pc = (intptr_t)pc; ++ uc->uc_mcontext.pc = (intptr_t)pc; +} + +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) { -+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP]; ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_SP]; +} + +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) { -+ return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP]; ++ return (intptr_t*)uc->uc_mcontext.gregs[REG_FP]; +} + +// For Forte Analyzer AsyncGetCallTrace profiling support - thread @@ -58663,7 +107012,7 @@ index 0000000000..2eee2eb549 + // belong to the caller. + intptr_t* fp = os::Linux::ucontext_get_fp(uc); + intptr_t* sp = os::Linux::ucontext_get_sp(uc); -+ address pc = (address)(uc->uc_mcontext.__gregs[1]); ++ address pc = (address)(uc->uc_mcontext.gregs[31]); + *fr = frame(sp, fp, pc); + if (!fr->is_java_frame()) { + assert(fr->safe_for_sender(thread), "Safety check"); @@ -58682,8 +107031,38 @@ index 0000000000..2eee2eb549 + return frame(fr->sender_sp(), fr->link(), fr->sender_pc()); +} + ++//intptr_t* _get_previous_fp() { ++intptr_t* __attribute__((noinline)) os::get_previous_fp() { ++ int *pc; ++ intptr_t sp; ++ int *pc_limit = (int*)(void*)&os::get_previous_fp; ++ int insn; ++ ++ { ++ l_pc:; ++ pc = (int*)&&l_pc; ++ __asm__ __volatile__ ("move %0, $sp" : "=r" (sp)); ++ } ++ ++ do { ++ insn = *pc; ++ switch(bitfield(insn, 16, 16)) { ++ case 0x27bd: /* addiu $sp,$sp,-i */ ++ case 0x67bd: /* daddiu $sp,$sp,-i */ ++ assert ((short)bitfield(insn, 0, 16)<0, "bad frame"); ++ sp -= (short)bitfield(insn, 0, 16); ++ return (intptr_t*)sp; ++ } ++ --pc; ++ } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization. ++ ++ ShouldNotReachHere(); ++ return NULL; // mute compiler ++} ++ ++ +frame os::current_frame() { -+ intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset]; ++ intptr_t* fp = (intptr_t*)get_previous_fp(); + frame myframe((intptr_t*)os::current_stack_pointer(), + (intptr_t*)fp, + CAST_FROM_FN_PTR(address, os::current_frame)); @@ -58881,7 +107260,7 @@ index 0000000000..2eee2eb549 +#endif + + // Handle signal from NativeJump::patch_verified_entry(). -+ if (sig == SIGILL & nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { ++ if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig); +#endif @@ -58908,14 +107287,16 @@ index 0000000000..2eee2eb549 + // HACK: si_code does not work on linux 2.2.12-20!!! + int op = pc[0] & 0x3f; + int op1 = pc[3] & 0x3f; -+ //FIXME, Must port to LA code!! ++ //FIXME, Must port to mips code!! + switch (op) { + case 0x1e: //ddiv + case 0x1f: //ddivu + case 0x1a: //div + case 0x1b: //divu + case 0x34: //trap -+ // In LA, div_by_zero exception can only be triggered by explicit 'trap'. ++ /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'. ++ * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv() ++ */ + stub = SharedRuntime::continuation_for_implicit_exception(thread, + pc, + SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO); @@ -58936,7 +107317,110 @@ index 0000000000..2eee2eb549 +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("continuation_for_implicit_exception stub: %lx", stub); +#endif -+ } ++ } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) { ++ //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here. ++ //The method is to trigger kernel emulation of float emulation. ++ int inst = *(int*)pc; ++ int ops = (inst >> 26) & 0x3f; ++ int ops_fmt = (inst >> 21) & 0x1f; ++ int op = inst & 0x3f; ++ if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) { ++ int ft, fs, fd; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower; ++ double ft_value, fs_value, fd_value; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pl %0, %4\n\t" ++ "cvt.s.pu %1, %4\n\t" ++ "cvt.s.pl %2, %5\n\t" ++ "cvt.s.pu %3, %5\n\t" ++ : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper) ++ : "f" (fs_value), "f" (ft_value) ++ ); ++ ++ switch (op) { ++ case Assembler::fadd_op: ++ __asm__ __volatile__ ( ++ "add.s %1, %3, %5\n\t" ++ "add.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fsub_op: ++ //fd = fs - ft ++ __asm__ __volatile__ ( ++ "sub.s %1, %3, %5\n\t" ++ "sub.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ case Assembler::fmul_op: ++ __asm__ __volatile__ ( ++ "mul.s %1, %3, %5\n\t" ++ "mul.s %2, %4, %6\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower) ++ : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower) ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op); ++ } ++ } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) { ++ // madd.ps is not used, the code below were not tested ++ int fr, ft, fs, fd; ++ float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower; ++ double fr_value, ft_value, fs_value, fd_value; ++ switch (op) { ++ case Assembler::madd_ps_op: ++ // fd = (fs * ft) + fr ++ fr = (inst >> 21) & 0x1f; ++ ft = (inst >> 16) & 0x1f; ++ fs = (inst >> 11) & 0x1f; ++ fd = (inst >> 6) & 0x1f; ++ fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr]; ++ ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft]; ++ fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs]; ++ __asm__ __volatile__ ( ++ "cvt.s.pu %3, %9\n\t" ++ "cvt.s.pl %4, %9\n\t" ++ "cvt.s.pu %5, %10\n\t" ++ "cvt.s.pl %6, %10\n\t" ++ "cvt.s.pu %7, %11\n\t" ++ "cvt.s.pl %8, %11\n\t" ++ "madd.s %1, %3, %5, %7\n\t" ++ "madd.s %2, %4, %6, %8\n\t" ++ "pll.ps %0, %1, %2\n\t" ++ : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower) ++ : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/ ++ ); ++ uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value; ++ stub = pc + 4; ++ break; ++ default: ++ tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op); ++ } ++ } ++ } //SIGILL ++ } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) { ++ // thread->thread_state() != _thread_in_Java ++ // SIGILL must be caused by VM_Version::determine_features(). ++ VM_Version::set_supports_cpucfg(false); ++ stub = pc + 4; // continue with next instruction. + } else if (thread->thread_state() == _thread_in_vm && + sig == SIGBUS && /* info->si_code == BUS_OBJERR && */ + thread->doing_unsafe_access()) { @@ -58994,19 +107478,13 @@ index 0000000000..2eee2eb549 +#ifdef OPT_RANGECHECK + || sig == SIGSYS +#endif -+ ) -+#if 0 -+ // LoongArch doesn't have hi1 -+ && ++ ) && + //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) { + (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) { -+#endif -+ ) { +#ifdef PRINT_SIGNAL_HANDLE + tty->print_cr("execution protection violation\n"); +#endif + -+ guarantee(0, "LA not implemented yet"); + int page_size = os::vm_page_size(); + address addr = (address) info->si_addr; + address pc = os::Linux::ucontext_get_pc(uc); @@ -59145,30 +107623,23 @@ index 0000000000..2eee2eb549 +} + +int os::Linux::get_fpu_control_word(void) { -+ guarantee(0, "LA not implemented yet"); -+/* + int fcsr; + __asm__ __volatile__ ( + ".set noat;" -+ "addi.d %0, $r0, 0;" -+ "movfcsr2gr %0, $r1;" ++ "daddiu %0, $0, 0;" ++ "cfc1 %0, $31;" + : "=r" (fcsr) + ); + return fcsr; -+*/ -+ return 0; // mute compiler +} + +void os::Linux::set_fpu_control_word(int fpu_control) { -+ guarantee(0, "LA not implemented yet"); -+/* + __asm__ __volatile__ ( + ".set noat;" -+ "movgr2fcsr %0, $r1;" ++ "ctc1 %0, $31;" + : + : "r" (fpu_control) + ); -+*/ +} + +bool os::is_allocatable(size_t bytes) { @@ -59251,45 +107722,45 @@ index 0000000000..2eee2eb549 + // // we can't just iterate through the gregs area + // + // // this is only for the "general purpose" registers -+ st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]); -+ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]); -+ st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]); -+ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]); + st->cr(); -+ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]); -+ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]); -+ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]); -+ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]); + st->cr(); -+ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]); -+ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]); -+ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]); -+ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]); + st->cr(); -+ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]); -+ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]); -+ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]); -+ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]); + st->cr(); -+ st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]); -+ st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]); -+ st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]); -+ st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]); + st->cr(); -+ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]); -+ st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]); -+ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]); -+ st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]); + st->cr(); -+ st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]); -+ st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]); -+ st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]); -+ st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]); + st->cr(); -+ st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]); -+ st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]); -+ st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]); -+ st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]); + st->cr(); + +} @@ -59299,45 +107770,45 @@ index 0000000000..2eee2eb549 + + const ucontext_t *uc = (const ucontext_t*)context; + st->print_cr("Registers:"); -+ st->print( "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]); -+ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]); -+ st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]); -+ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]); ++ st->print( "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]); ++ st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]); ++ st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]); ++ st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]); + st->cr(); -+ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]); -+ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]); -+ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]); -+ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]); ++ st->print( "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]); ++ st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]); ++ st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]); ++ st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]); + st->cr(); -+ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]); -+ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]); -+ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]); -+ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]); ++ st->print( "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]); ++ st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]); ++ st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]); ++ st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]); + st->cr(); -+ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]); -+ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]); -+ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]); -+ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]); ++ st->print( "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]); ++ st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]); ++ st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]); ++ st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]); + st->cr(); -+ st->print( "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]); -+ st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]); -+ st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]); -+ st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]); ++ st->print( "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]); ++ st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]); ++ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]); ++ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]); + st->cr(); -+ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]); -+ st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]); -+ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]); -+ st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]); ++ st->print( "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]); ++ st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]); ++ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]); ++ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]); + st->cr(); -+ st->print( "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]); -+ st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]); -+ st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]); -+ st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]); ++ st->print( "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]); ++ st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]); ++ st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]); ++ st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]); + st->cr(); -+ st->print( "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]); -+ st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]); -+ st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]); -+ st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]); ++ st->print( "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]); ++ st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]); ++ st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]); ++ st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]); + st->cr(); + st->cr(); + @@ -59356,7 +107827,20 @@ index 0000000000..2eee2eb549 +} + +void os::setup_fpu() { -+ // no use for LA ++ /* ++ //no use for MIPS ++ int fcsr; ++ address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std(); ++ __asm__ __volatile__ ( ++ ".set noat;" ++ "cfc1 %0, $31;" ++ "sw %0, 0(%1);" ++ : "=r" (fcsr) ++ : "r" (fpu_cntrl) ++ : "memory" ++ ); ++ printf("fpu_cntrl: %lx\n", fpu_cntrl); ++ */ +} + +#ifndef PRODUCT @@ -59366,19 +107850,19 @@ index 0000000000..2eee2eb549 +#endif + +int os::extra_bang_size_in_bytes() { -+ // LA does not require the additional stack bang. ++ // MIPS does not require the additional stack bang. + return 0; +} + +bool os::is_ActiveCoresMP() { + return UseActiveCoresMP && _initial_active_processor_count == 1; +} -diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp +diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp new file mode 100644 -index 0000000000..93ed1ae033 +index 0000000000..c07d08156f --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp -@@ -0,0 +1,38 @@ ++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp +@@ -0,0 +1,39 @@ +/* + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. @@ -59404,11 +107888,12 @@ index 0000000000..93ed1ae033 + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP + + static void setup_fpu(); + static bool is_allocatable(size_t bytes); ++ static intptr_t *get_previous_fp(); + + // Used to register dynamic code cache area with the OS + // Note: Currently only used in 64 bit Windows implementations @@ -59416,16 +107901,16 @@ index 0000000000..93ed1ae033 + + static bool is_ActiveCoresMP(); + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp ++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp new file mode 100644 -index 0000000000..a1cedcd8cf +index 0000000000..93490345f0 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp -@@ -0,0 +1,56 @@ ++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp +@@ -0,0 +1,58 @@ +/* + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59448,46 +107933,48 @@ index 0000000000..a1cedcd8cf + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP ++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP + + +inline void Prefetch::read (void *loc, intx interval) { -+// According to previous and present SPECjbb2015 score, -+// comment prefetch is better than if (interval >= 0) prefetch branch. -+// So choose comment prefetch as the base line. -+#if 0 -+ __asm__ __volatile__ ( -+ " preld 0, %[__loc] \n" ++ // 'pref' is implemented as NOP in Loongson 3A ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 0, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) + : -+ : [__loc] "m"( *((address)loc + interval) ) + : "memory" + ); -+#endif +} + +inline void Prefetch::write(void *loc, intx interval) { -+// Ditto -+#if 0 -+ __asm__ __volatile__ ( -+ " preld 8, %[__loc] \n" ++ __asm__ __volatile__ ( ++ " .set push\n" ++ " .set mips32\n" ++ " .set noreorder\n" ++ " pref 1, 0(%[__loc]) \n" ++ " .set pop\n" ++ : [__loc] "=&r"(loc) + : -+ : [__loc] "m"( *((address)loc + interval) ) + : "memory" + ); -+#endif ++ +} + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp ++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp new file mode 100644 -index 0000000000..0f8992735a +index 0000000000..dbe8efe164 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp -@@ -0,0 +1,106 @@ ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp +@@ -0,0 +1,117 @@ +/* + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59511,6 +107998,7 @@ index 0000000000..0f8992735a + */ + +#include "precompiled.hpp" ++#include "compiler/compileBroker.hpp" +#include "memory/metaspaceShared.hpp" +#include "runtime/frame.inline.hpp" +#include "runtime/thread.inline.hpp" @@ -59521,6 +108009,16 @@ index 0000000000..0f8992735a + _anchor.clear(); +} + ++frame JavaThread::pd_last_frame() { ++ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); ++ if (_anchor.last_Java_pc() != NULL) { ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); ++ } else { ++ // This will pick up pc from sp ++ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); ++ } ++} ++ +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is +// currently interrupted by SIGPROF +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr, @@ -59569,7 +108067,7 @@ index 0000000000..0f8992735a + + frame ret_frame(ret_sp, ret_fp, addr.pc()); + if (!ret_frame.safe_for_sender(jt)) { -+#if COMPILER2 ++#ifdef COMPILER2 + // C2 and JVMCI use ebp as a general register see if NULL fp helps + frame ret_frame2(ret_sp, NULL, addr.pc()); + if (!ret_frame2.safe_for_sender(jt)) { @@ -59591,15 +108089,15 @@ index 0000000000..0f8992735a +} + +void JavaThread::cache_global_variables() { } -diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp +diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp new file mode 100644 -index 0000000000..c67e0f80d8 +index 0000000000..8b8dbe219c --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp -@@ -0,0 +1,74 @@ ++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp +@@ -0,0 +1,66 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59622,21 +108120,13 @@ index 0000000000..c67e0f80d8 + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP + + private: + void pd_initialize(); + -+ frame pd_last_frame() { -+ assert(has_last_Java_frame(), "must have last_Java_sp() when suspended"); -+ if (_anchor.last_Java_pc() != NULL) { -+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc()); -+ } else { -+ // This will pick up pc from sp -+ return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp()); -+ } -+ } ++ frame pd_last_frame(); + + public: + // Mutators are highly dangerous.... @@ -59670,16 +108160,16 @@ index 0000000000..c67e0f80d8 + static void enable_register_stack_guard() {} + static void disable_register_stack_guard() {} + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP -diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp ++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp new file mode 100644 -index 0000000000..5ff935c1b7 +index 0000000000..b7454bf045 --- /dev/null -+++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp ++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -59702,8 +108192,8 @@ index 0000000000..5ff935c1b7 + * + */ + -+#ifndef OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP -+#define OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP ++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP ++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP + +// These are the OS and CPU-specific fields, types and integer +// constants required by the Serviceability Agent. This file is @@ -59731,7 +108221,41 @@ index 0000000000..5ff935c1b7 + +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) + -+#endif // OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP ++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP +diff --git a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp +new file mode 100644 +index 0000000000..93e4bea04c +--- /dev/null ++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "runtime/os.hpp" ++#include "runtime/vm_version.hpp" diff --git a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp index 2b0fa83c1a..270e0bc180 100644 --- a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp @@ -59744,38 +108268,48 @@ index 2b0fa83c1a..270e0bc180 100644 + #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp -index 22704aa7ea..69258dfcdc 100644 +index 4912f88056..a420f7807b 100644 --- a/src/hotspot/share/asm/codeBuffer.cpp +++ b/src/hotspot/share/asm/codeBuffer.cpp -@@ -352,6 +352,9 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format) +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2023. These ++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "asm/codeBuffer.hpp" + #include "compiler/disassembler.hpp" +@@ -351,6 +357,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format) assert(rtype == relocInfo::none || rtype == relocInfo::runtime_call_type || rtype == relocInfo::internal_word_type|| -+#ifdef MIPS -+ rtype == relocInfo::internal_pc_type || -+#endif ++ NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||)) rtype == relocInfo::section_word_type || rtype == relocInfo::external_word_type, "code needs relocation information"); -diff --git a/src/hotspot/share/asm/codeBuffer.hpp b/src/hotspot/share/asm/codeBuffer.hpp -index 2f6b2ed4f1..f672acc52f 100644 ---- a/src/hotspot/share/asm/codeBuffer.hpp -+++ b/src/hotspot/share/asm/codeBuffer.hpp -@@ -402,6 +402,9 @@ class CodeBuffer: public StackObj { - _last_insn = NULL; - #if INCLUDE_AOT - _immutable_PIC = false; -+#endif -+#if defined(MIPS) && !defined(ZERO) -+ _continuous_load_instuctions_count = 0; - #endif - } - diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp -index aff12954b3..d2d09238e7 100644 +index aff12954b3..caa93fc804 100644 --- a/src/hotspot/share/c1/c1_Compiler.cpp +++ b/src/hotspot/share/c1/c1_Compiler.cpp -@@ -211,7 +211,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) { +@@ -44,6 +44,12 @@ + #include "utilities/bitMap.inline.hpp" + #include "utilities/macros.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + + Compiler::Compiler() : AbstractCompiler(compiler_c1) { + } +@@ -211,7 +217,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) { case vmIntrinsics::_updateCRC32: case vmIntrinsics::_updateBytesCRC32: case vmIntrinsics::_updateByteBufferCRC32: @@ -59785,7 +108319,7 @@ index aff12954b3..d2d09238e7 100644 case vmIntrinsics::_updateDirectByteBufferCRC32C: #endif diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp -index e30d39f73d..417771d535 100644 +index e30d39f73d..7461b7449a 100644 --- a/src/hotspot/share/c1/c1_LIR.cpp +++ b/src/hotspot/share/c1/c1_LIR.cpp @@ -250,6 +250,18 @@ void LIR_Op2::verify() const { @@ -60012,7 +108546,7 @@ index e30d39f73d..417771d535 100644 void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) { switch(cond) { case lir_cond_equal: out->print("[EQ]"); break; -@@ -1876,12 +2011,7 @@ void LIR_OpConvert::print_instr(outputStream* out) const { +@@ -1876,12 +2011,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const { print_bytecode(out, bytecode()); in_opr()->print(out); out->print(" "); result_opr()->print(out); out->print(" "); @@ -60020,13 +108554,14 @@ index e30d39f73d..417771d535 100644 - if(tmp1()->is_valid()) { - tmp1()->print(out); out->print(" "); - tmp2()->print(out); out->print(" "); -- } ++ if(tmp()->is_valid()) { ++ tmp()->print(out); out->print(" "); + } -#endif -+ if(tmp()->is_valid()) tmp()->print(out); out->print(" "); } void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) { -@@ -1979,6 +2109,19 @@ void LIR_Op3::print_instr(outputStream* out) const { +@@ -1979,6 +2111,19 @@ void LIR_Op3::print_instr(outputStream* out) const { } @@ -60047,7 +108582,7 @@ index e30d39f73d..417771d535 100644 hdr_opr()->print(out); out->print(" "); obj_opr()->print(out); out->print(" "); diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp -index 3912b41d3f..fe7cfefe4e 100644 +index 3234ca018b..1f46e44c77 100644 --- a/src/hotspot/share/c1/c1_LIR.hpp +++ b/src/hotspot/share/c1/c1_LIR.hpp @@ -864,9 +864,11 @@ class LIR_OpConvert; @@ -60667,10 +109202,23 @@ index 3ad325d759..f377b27859 100644 ciMethod *method, LIR_Opr step, int frequency, int bci, bool backedge, bool notify); diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp -index c28055fd99..b6f7685779 100644 +index c28055fd99..4e7df88102 100644 --- a/src/hotspot/share/c1/c1_LinearScan.cpp +++ b/src/hotspot/share/c1/c1_LinearScan.cpp -@@ -1258,6 +1258,23 @@ void LinearScan::add_register_hints(LIR_Op* op) { +@@ -35,6 +35,12 @@ + #include "runtime/timerTrace.hpp" + #include "utilities/bitMap.inline.hpp" + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef PRODUCT + + static LinearScanStatistic _stat_before_alloc; +@@ -1258,6 +1264,23 @@ void LinearScan::add_register_hints(LIR_Op* op) { } break; } @@ -60694,7 +109242,7 @@ index c28055fd99..b6f7685779 100644 default: break; } -@@ -3342,7 +3359,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { +@@ -3342,7 +3365,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() { check_live = (move->patch_code() == lir_patch_none); } LIR_OpBranch* branch = op->as_OpBranch(); @@ -60705,7 +109253,7 @@ index c28055fd99..b6f7685779 100644 // Don't bother checking the stub in this case since the // exception stub will never return to normal control flow. check_live = false; -@@ -6192,6 +6211,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi +@@ -6192,6 +6217,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch"); LIR_OpBranch* branch = (LIR_OpBranch*)op; @@ -60722,7 +109270,7 @@ index c28055fd99..b6f7685779 100644 if (branch->block() == target_from) { branch->change_block(target_to); } -@@ -6320,6 +6349,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { +@@ -6320,6 +6355,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) { } } } @@ -60743,7 +109291,7 @@ index c28055fd99..b6f7685779 100644 } } } -@@ -6395,6 +6438,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { +@@ -6395,6 +6444,13 @@ void ControlFlowOptimizer::verify(BlockList* code) { assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid"); assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid"); } @@ -60757,7 +109305,7 @@ index c28055fd99..b6f7685779 100644 } for (j = 0; j < block->number_of_sux() - 1; j++) { -@@ -6639,6 +6689,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { +@@ -6639,6 +6695,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) { break; } @@ -60975,6 +109523,31 @@ index 4289e5e5c4..9502463bd5 100644 __ branch_destination(slow->continuation()); } +diff --git a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp +index 98a2fe7f1c..b43a441066 100644 +--- a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp ++++ b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP + #define SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP + +@@ -71,6 +77,7 @@ template inline void G1FullGCMarker::mark_and_push(T* p) { + _oop_stack.push(obj); + assert(_bitmap->is_marked(obj), "Must be marked now - map self"); + } else { ++ DEBUG_ONLY(OrderAccess::loadload()); + assert(_bitmap->is_marked(obj) || G1ArchiveAllocator::is_closed_archive_object(obj), + "Must be marked by other or closed archive object"); + } diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp index 1ef900783d..b30456429d 100644 --- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp @@ -61274,11 +109847,64 @@ index e01a242a57..0661f3b9d1 100644 return false; #else #warning "Unconfigured platform" +diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +index 8927063330..b5bb5c2887 100644 +--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp ++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #include "precompiled.hpp" + #include "code/codeBlob.hpp" + #include "compiler/abstractCompiler.hpp" +@@ -715,6 +721,35 @@ + #endif + + ++#ifdef LOONGARCH64 ++ ++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ ++ volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*) ++ ++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \ ++ declare_constant(VM_Version::CPU_LA32) \ ++ declare_constant(VM_Version::CPU_LA64) \ ++ declare_constant(VM_Version::CPU_LLEXC) \ ++ declare_constant(VM_Version::CPU_SCDLY) \ ++ declare_constant(VM_Version::CPU_LLDBAR) \ ++ declare_constant(VM_Version::CPU_LBT_X86) \ ++ declare_constant(VM_Version::CPU_LBT_ARM) \ ++ declare_constant(VM_Version::CPU_LBT_MIPS) \ ++ declare_constant(VM_Version::CPU_CCDMA) \ ++ declare_constant(VM_Version::CPU_COMPLEX) \ ++ declare_constant(VM_Version::CPU_FP) \ ++ declare_constant(VM_Version::CPU_CRYPTO) \ ++ declare_constant(VM_Version::CPU_LSX) \ ++ declare_constant(VM_Version::CPU_LASX) \ ++ declare_constant(VM_Version::CPU_LAM) \ ++ declare_constant(VM_Version::CPU_LLSYNC) \ ++ declare_constant(VM_Version::CPU_TGTSYNC) \ ++ declare_constant(VM_Version::CPU_ULSYNC) \ ++ declare_constant(VM_Version::CPU_UAL) ++ ++#endif ++ ++ + #ifdef X86 + + #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \ diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp -index b37fec5829..707521abfa 100644 +index 80958b0469..08d13a4189 100644 --- a/src/hotspot/share/memory/metaspace.cpp +++ b/src/hotspot/share/memory/metaspace.cpp -@@ -1055,12 +1055,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a +@@ -1082,12 +1082,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a // Don't use large pages for the class space. bool large_pages = false; @@ -61293,7 +109919,7 @@ index b37fec5829..707521abfa 100644 ReservedSpace metaspace_rs; -@@ -1086,7 +1086,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a +@@ -1113,7 +1113,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a // below 32g to get a zerobased CCS. For simplicity we reuse the search // strategy for AARCH64. @@ -61303,7 +109929,7 @@ index b37fec5829..707521abfa 100644 for (char *a = align_up(requested_addr, increment); a < (char*)(1024*G); a += increment) { -@@ -1117,7 +1118,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a +@@ -1144,7 +1145,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a } } @@ -61312,11 +109938,37 @@ index b37fec5829..707521abfa 100644 if (!metaspace_rs.is_reserved()) { #if INCLUDE_CDS +diff --git a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp +index 6c631f5458..9865106720 100644 +--- a/src/hotspot/share/oops/oop.inline.hpp ++++ b/src/hotspot/share/oops/oop.inline.hpp +@@ -22,6 +22,12 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ */ ++ + #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP + #define SHARE_VM_OOPS_OOP_INLINE_HPP + +@@ -389,7 +395,7 @@ oop oopDesc::forward_to_atomic(oop p, atomic_memory_order order) { + // forwarding pointer. + oldMark = curMark; + } +- return forwardee(); ++ return (oop)oldMark->decode_pointer(); + } + + // Note that the forwardee is not the same thing as the displaced_mark. diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp -index 69e210b66b..b796c07d8f 100644 +index 569fbc6d69..c1f1b82ffa 100644 --- a/src/hotspot/share/opto/compile.hpp +++ b/src/hotspot/share/opto/compile.hpp -@@ -1186,7 +1186,7 @@ class Compile : public Phase { +@@ -1204,7 +1204,7 @@ class Compile : public Phase { bool in_scratch_emit_size() const { return _in_scratch_emit_size; } enum ScratchBufferBlob { @@ -61326,7 +109978,7 @@ index 69e210b66b..b796c07d8f 100644 #else MAX_inst_size = 1024, diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp -index b6540e06a3..b1578a4442 100644 +index b6540e06a3..52d1fc9fb9 100644 --- a/src/hotspot/share/opto/output.cpp +++ b/src/hotspot/share/opto/output.cpp @@ -22,6 +22,12 @@ @@ -61370,7 +110022,7 @@ index b6540e06a3..b1578a4442 100644 debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map); } else { mcall = mach->as_MachCall(); -@@ -1393,6 +1420,31 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { +@@ -1393,6 +1420,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { DEBUG_ONLY(uint instr_offset = cb->insts_size()); n->emit(*cb, _regalloc); current_offset = cb->insts_size(); @@ -61387,49 +110039,14 @@ index b6540e06a3..b1578a4442 100644 + adjust += 4; + inst = (NativeInstruction*) (cb->insts()->end() - 8); + } -+#ifdef MIPS64 -+ if (PatchContinuousLoad) { -+ // if PatchContinuousLoad is true, a nop may be inserted after a load instruction and -+ // the adjust would be 2 instructions. -+ if (inst->is_nop()) { -+ adjust += 4; -+ } -+ } -+#endif + previous_offset = current_offset - adjust; + } +#endif // Above we only verified that there is enough space in the instruction section. // However, the instruction may emit stubs that cause code buffer expansion. -@@ -1402,7 +1454,9 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { - } - - #ifdef ASSERT -- uint n_size = n->size(_regalloc); -+ // adjust: one node may be inserted one and only one nop. -+ int adjust = MIPS64_ONLY(PatchContinuousLoad ? block->number_of_nodes() * 4 :) 0; -+ uint n_size = n->size(regalloc()) + adjust; - if (n_size < (current_offset-instr_offset)) { - MachNode* mach = n->as_Mach(); - n->dump(); -@@ -1488,7 +1542,13 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) { - } - // Verify that the distance for generated before forward - // short branches is still valid. -- guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size"); -+ // adjust: one node may be inserted one and only one nop. -+ int adjust = MIPS64_ONLY(PatchContinuousLoad ? block->number_of_nodes() * 4 :) 0; -+#ifndef PRODUCT -+ if ((int)(blk_starts[i+1] - blk_starts[i] + adjust) < (current_offset - blk_offset)) -+ tty->print_cr("%s:%d blk_starts[i+1]:%d, blk_starts[i]:%d, adjust: %d, current_offset:%d, blk_offset:%d", __func__, __LINE__, blk_starts[i+1], blk_starts[i], adjust, current_offset, blk_offset); -+#endif -+ guarantee((int)(blk_starts[i+1] - blk_starts[i] + adjust) >= (current_offset - blk_offset), "shouldn't increase block size"); - - // Save new block start offset - blk_starts[i] = blk_offset; diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp -index 223b7a1c66..01ca28e7ab 100644 +index 7d767c47c9..23ec34e5e2 100644 --- a/src/hotspot/share/opto/type.cpp +++ b/src/hotspot/share/opto/type.cpp @@ -22,6 +22,12 @@ @@ -61458,8 +110075,20 @@ index 223b7a1c66..01ca28e7ab 100644 #else // all other { Bad, T_ILLEGAL, "vectors:", false, Op_VecS, relocInfo::none }, // VectorS { Bad, T_ILLEGAL, "vectord:", false, Op_VecD, relocInfo::none }, // VectorD +diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp +index 84123b29ec..77fbacf2d8 100644 +--- a/src/hotspot/share/runtime/java.cpp ++++ b/src/hotspot/share/runtime/java.cpp +@@ -68,6 +68,7 @@ + #include "runtime/thread.inline.hpp" + #include "runtime/timer.hpp" + #include "runtime/vmOperations.hpp" ++#include "runtime/vmThread.hpp" + #include "services/memTracker.hpp" + #include "utilities/dtrace.hpp" + #include "utilities/globalDefinitions.hpp" diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp -index 389a1e95f8..2f9b990456 100644 +index e0f4a2af1f..09cc4b1ba5 100644 --- a/src/hotspot/share/runtime/os.cpp +++ b/src/hotspot/share/runtime/os.cpp @@ -22,6 +22,12 @@ @@ -61467,104 +110096,24 @@ index 389a1e95f8..2f9b990456 100644 */ +/* -+ * This file has been modified by Loongson Technology in 2021, These -+ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + #include "precompiled.hpp" #include "jvm.h" #include "classfile/classLoader.hpp" -@@ -1237,7 +1243,8 @@ bool os::is_first_C_frame(frame* fr) { +@@ -1242,7 +1248,8 @@ bool os::is_first_C_frame(frame* fr) { + if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true; - uintptr_t old_fp = (uintptr_t)fr->link(); - if ((old_fp & fp_align_mask) != 0) return true; -- if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) return true; + uintptr_t old_fp = (uintptr_t)fr->link_or_null(); +- if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp || + // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs. -+ if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp)) return true; ++ if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) || + is_pointer_bad(fr->link_or_null())) return true; // stack grows downwards; if old_fp is below current fp or if the stack - // frame is too large, either the stack is corrupted or fp is not saved -diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp -index 595ff7495a..2d6d63b00b 100644 ---- a/src/hotspot/share/runtime/sharedRuntime.cpp -+++ b/src/hotspot/share/runtime/sharedRuntime.cpp -@@ -22,6 +22,12 @@ - * - */ - -+/* -+ * This file has been modified by Loongson Technology in 2018, These -+ * modifications are Copyright (c) 2018, Loongson Technology, and are made -+ * available on the same license terms set forth above. -+ */ -+ - #include "precompiled.hpp" - #include "jvm.h" - #include "aot/aotLoader.hpp" -@@ -3215,3 +3221,31 @@ void SharedRuntime::on_slowpath_allocation_exit(JavaThread* thread) { - BarrierSet *bs = BarrierSet::barrier_set(); - bs->on_slowpath_allocation_exit(thread, new_obj); - } -+ -+void SharedRuntime::print_long(long long i) { -+ tty->print("%llx", i); -+} -+ -+void SharedRuntime::print_int(int i) { -+ tty->print("%x", i); -+} -+ -+void SharedRuntime::print_float(float f) { -+ //tty->print("ld:%ld ", f); -+ //tty->print("lx:%lx ", f); -+ tty->print("lf:%g ", f); -+} -+ -+void SharedRuntime::print_double(double f) { -+ //tty->print("%ld ", f); -+ //tty->print("0x%lx ", f); -+ tty->print("%g ", f); -+} -+ -+void SharedRuntime::print_str(char *str) { -+ tty->print("%s", str); -+} -+ -+void SharedRuntime::print_reg_with_pc(char *reg_name, long i, long pc) { -+ tty->print_cr("%s: %lx pc: %lx", reg_name, i, pc); -+} -diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp -index a110098376..c05f7411f7 100644 ---- a/src/hotspot/share/runtime/sharedRuntime.hpp -+++ b/src/hotspot/share/runtime/sharedRuntime.hpp -@@ -22,6 +22,12 @@ - * - */ - -+/* -+ * This file has been modified by Loongson Technology in 2018, These -+ * modifications are Copyright (c) 2018, Loongson Technology, and are made -+ * available on the same license terms set forth above. -+ */ -+ - #ifndef SHARE_VM_RUNTIME_SHAREDRUNTIME_HPP - #define SHARE_VM_RUNTIME_SHAREDRUNTIME_HPP - -@@ -596,6 +602,13 @@ class SharedRuntime: AllStatic { - static void print_ic_miss_histogram(); - - #endif // PRODUCT -+ static void print_long(long long i); -+ static void print_int(int i); -+ static void print_float(float i); -+ static void print_double(double i); -+ static void print_str(char *str); -+ -+ static void print_reg_with_pc(char *reg_name, long i, long pc); - }; - - diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp index e086f794cd..f480195775 100644 --- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp @@ -61599,10 +110148,10 @@ index e086f794cd..f480195775 100644 S1 = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */ S2 = 8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */ diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp -index aef662ec15..9a907de8b1 100644 +index c758fc5743..a8c4638f6a 100644 --- a/src/hotspot/share/utilities/globalDefinitions.hpp +++ b/src/hotspot/share/utilities/globalDefinitions.hpp -@@ -1154,6 +1154,15 @@ inline int exact_log2_long(jlong x) { +@@ -1161,6 +1161,15 @@ inline int exact_log2_long(jlong x) { return log2_long(x); } @@ -61709,28 +110258,8 @@ index cf80253868..f611daf36d 100644 // basename.hpp / basename.inline.hpp #define COMPILER_HEADER(basename) XSTR(COMPILER_HEADER_STEM(basename).hpp) #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp) -diff --git a/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c b/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c -index 5a83e747f7..bf4bb4bc70 100644 ---- a/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c -+++ b/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c -@@ -613,7 +613,15 @@ Java_sun_nio_fs_UnixNativeDispatcher_fstatat0(JNIEnv* env, jclass this, jint dfd - JNU_ThrowInternalError(env, "should not reach here"); - return; - } -+ -+#ifdef __mips__ -+ // __NR_newfstatat is incorrect on Loongnix -+ // workaround it using glibc's fstatat64 -+ RESTARTABLE(fstatat64((int)dfd, path, &buf, (int)flag), err); -+#else - RESTARTABLE((*my_fstatat64_func)((int)dfd, path, &buf, (int)flag), err); -+#endif -+ - if (err == -1) { - throwUnixException(env, errno); - } else { diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c -index 0d834302c5..38de59100a 100644 +index 0d834302c5..6afafea095 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c @@ -22,6 +22,13 @@ @@ -61738,8 +110267,8 @@ index 0d834302c5..38de59100a 100644 */ +/* -+ * This file has been modified by Loongson Technology in 2021. These -+ * modifications are Copyright (c) 2021, Loongson Technology, and are made ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + * + */ @@ -61801,8 +110330,8 @@ index 0d834302c5..38de59100a 100644 + { + int i; + for (i = 0; i < 31; i++) -+ regs[i] = gregs.gpr[i]; -+ regs[REG_INDEX(PC)] = gregs.pc; ++ regs[i] = gregs.regs[i]; ++ regs[REG_INDEX(PC)] = gregs.csr_era; + } +#endif /* loongarch64 */ + @@ -61856,10 +110385,32 @@ index 0d834302c5..38de59100a 100644 return array; } diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -index 8318e8e021..e4546370f8 100644 +index 8318e8e021..07064e76ee 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h -@@ -44,6 +44,10 @@ +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2022. These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + #ifndef _LIBPROC_H_ + #define _LIBPROC_H_ + +@@ -37,13 +44,17 @@ + #include + #define user_regs_struct pt_regs + #endif +-#if defined(aarch64) || defined(arm64) ++#if defined(aarch64) || defined(arm64) || defined(loongarch64) + #include + #define user_regs_struct user_pt_regs + #elif defined(arm) #include #define user_regs_struct pt_regs #endif @@ -61871,7 +110422,7 @@ index 8318e8e021..e4546370f8 100644 // This C bool type must be int for compatibility with Linux calls and // it would be a mistake to equivalence it to C++ bool on many platforms diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c -index 0b4e8e4e35..c4a67d7a95 100644 +index de5254d859..eefe55959c 100644 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c @@ -22,6 +22,12 @@ @@ -62028,7 +110579,7 @@ index 0000000000..1b49efd201 + } +} diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java -index efde22ef76..025c4af761 100644 +index 5e5a6bb714..7d7f6424e6 100644 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java @@ -23,6 +23,12 @@ @@ -65507,17 +114058,1077 @@ index 0000000000..65d88016ea + System.out.println(addr + ": " + addr.getAddressAt(0)); + } + } -+ } ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +new file mode 100644 +index 0000000000..dfe3066af0 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +@@ -0,0 +1,57 @@ ++/* ++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import java.util.*; ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.types.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64JavaCallWrapper extends JavaCallWrapper { ++ private static AddressField lastJavaFPField; ++ ++ static { ++ VM.registerVMInitializedObserver(new Observer() { ++ public void update(Observable o, Object data) { ++ initialize(VM.getVM().getTypeDataBase()); ++ } ++ }); ++ } ++ ++ private static synchronized void initialize(TypeDataBase db) { ++ Type type = db.lookupType("JavaFrameAnchor"); ++ ++ lastJavaFPField = type.getAddressField("_last_Java_fp"); ++ } ++ ++ public MIPS64JavaCallWrapper(Address addr) { ++ super(addr); ++ } ++ ++ public Address getLastJavaFP() { ++ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); ++ } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +new file mode 100644 +index 0000000000..f2da760af4 +--- /dev/null ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +@@ -0,0 +1,52 @@ ++/* ++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++package sun.jvm.hotspot.runtime.mips64; ++ ++import sun.jvm.hotspot.debugger.*; ++import sun.jvm.hotspot.runtime.*; ++ ++public class MIPS64RegisterMap extends RegisterMap { ++ ++ /** This is the only public constructor */ ++ public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { ++ super(thread, updateMap); ++ } ++ ++ protected MIPS64RegisterMap(RegisterMap map) { ++ super(map); ++ } ++ ++ public Object clone() { ++ MIPS64RegisterMap retval = new MIPS64RegisterMap(this); ++ return retval; ++ } ++ ++ // no PD state to clear or copy: ++ protected void clearPD() {} ++ protected void initializePD() {} ++ protected void initializeFromPD(RegisterMap map) {} ++ protected Address getLocationPD(VMReg reg) { return null; } ++} +diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +index 7d7a6107ca..06d79318d9 100644 +--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java ++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java +@@ -22,6 +22,13 @@ + * + */ + ++/* ++ * This file has been modified by Loongson Technology in 2021. These ++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * available on the same license terms set forth above. ++ * ++ */ ++ + package sun.jvm.hotspot.utilities; + + /** Provides canonicalized OS and CPU information for the rest of the +@@ -54,7 +61,7 @@ public class PlatformInfo { + + public static boolean knownCPU(String cpu) { + final String[] KNOWN = +- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; ++ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"}; + + for(String s : KNOWN) { + if(s.equals(cpu)) +@@ -101,6 +108,12 @@ public class PlatformInfo { + if (cpu.equals("ppc64le")) + return "ppc64"; + ++ if (cpu.equals("mips64el")) ++ return "mips64"; ++ ++ if (cpu.equals("loongarch64")) ++ return "loongarch64"; ++ + return cpu; + + } +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java +new file mode 100644 +index 0000000000..0d3953ddff +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java +@@ -0,0 +1,220 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static java.util.Collections.emptyMap; ++import static jdk.vm.ci.common.InitTimer.timer; ++ ++import java.util.EnumSet; ++import java.util.Map; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.stack.StackIntrospection; ++import jdk.vm.ci.common.InitTimer; ++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider; ++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider; ++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory; ++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; ++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider; ++import jdk.vm.ci.hotspot.HotSpotStackIntrospection; ++import jdk.vm.ci.meta.ConstantReflectionProvider; ++import jdk.vm.ci.runtime.JVMCIBackend; ++ ++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory { ++ ++ protected EnumSet computeFeatures(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { ++ // Configure the feature set using the HotSpot flag settings. ++ EnumSet features = EnumSet.noneOf(LoongArch64.CPUFeature.class); ++ ++ if ((config.vmVersionFeatures & config.loongarch64LA32) != 0) { ++ features.add(LoongArch64.CPUFeature.LA32); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LA64) != 0) { ++ features.add(LoongArch64.CPUFeature.LA64); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLEXC) != 0) { ++ features.add(LoongArch64.CPUFeature.LLEXC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64SCDLY) != 0) { ++ features.add(LoongArch64.CPUFeature.SCDLY); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLDBAR) != 0) { ++ features.add(LoongArch64.CPUFeature.LLDBAR); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_X86) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_X86); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_ARM) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_ARM); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LBT_MIPS) != 0) { ++ features.add(LoongArch64.CPUFeature.LBT_MIPS); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64CCDMA) != 0) { ++ features.add(LoongArch64.CPUFeature.CCDMA); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64COMPLEX) != 0) { ++ features.add(LoongArch64.CPUFeature.COMPLEX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64FP) != 0) { ++ features.add(LoongArch64.CPUFeature.FP); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64CRYPTO) != 0) { ++ features.add(LoongArch64.CPUFeature.CRYPTO); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LSX) != 0) { ++ features.add(LoongArch64.CPUFeature.LSX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LASX) != 0) { ++ features.add(LoongArch64.CPUFeature.LASX); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LAM) != 0) { ++ features.add(LoongArch64.CPUFeature.LAM); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64LLSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.LLSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64TGTSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.TGTSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64ULSYNC) != 0) { ++ features.add(LoongArch64.CPUFeature.ULSYNC); ++ } ++ ++ if ((config.vmVersionFeatures & config.loongarch64UAL) != 0) { ++ features.add(LoongArch64.CPUFeature.UAL); ++ } ++ ++ return features; ++ } ++ ++ protected EnumSet computeFlags(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) { ++ EnumSet flags = EnumSet.noneOf(LoongArch64.Flag.class); ++ ++ if (config.useLSX) { ++ flags.add(LoongArch64.Flag.useLSX); ++ } ++ ++ if (config.useLASX) { ++ flags.add(LoongArch64.Flag.useLASX); ++ } ++ ++ return flags; ++ } ++ ++ protected TargetDescription createTarget(LoongArch64HotSpotVMConfig config) { ++ final int stackFrameAlignment = 16; ++ final int implicitNullCheckLimit = 4096; ++ final boolean inlineObjects = true; ++ Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config)); ++ return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects); ++ } ++ ++ protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotConstantReflectionProvider(runtime); ++ } ++ ++ protected RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) { ++ return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops); ++ } ++ ++ protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) { ++ return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig); ++ } ++ ++ protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) { ++ return new HotSpotMetaAccessProvider(runtime); ++ } ++ ++ @Override ++ public String getArchitecture() { ++ return "loongarch64"; ++ } ++ ++ @Override ++ public String toString() { ++ return "JVMCIBackend:" + getArchitecture(); ++ } ++ ++ @Override ++ @SuppressWarnings("try") ++ public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) { ++ ++ assert host == null; ++ LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore()); ++ TargetDescription target = createTarget(config); ++ ++ RegisterConfig regConfig; ++ HotSpotCodeCacheProvider codeCache; ++ ConstantReflectionProvider constantReflection; ++ HotSpotMetaAccessProvider metaAccess; ++ StackIntrospection stackIntrospection; ++ try (InitTimer t = timer("create providers")) { ++ try (InitTimer rt = timer("create MetaAccess provider")) { ++ metaAccess = createMetaAccess(runtime); ++ } ++ try (InitTimer rt = timer("create RegisterConfig")) { ++ regConfig = createRegisterConfig(config, target); ++ } ++ try (InitTimer rt = timer("create CodeCache provider")) { ++ codeCache = createCodeCache(runtime, target, regConfig); ++ } ++ try (InitTimer rt = timer("create ConstantReflection provider")) { ++ constantReflection = createConstantReflection(runtime); ++ } ++ try (InitTimer rt = timer("create StackIntrospection provider")) { ++ stackIntrospection = new HotSpotStackIntrospection(runtime); ++ } ++ } ++ try (InitTimer rt = timer("instantiate backend")) { ++ return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++ } ++ ++ protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection, ++ StackIntrospection stackIntrospection) { ++ return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection); ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java +new file mode 100644 +index 0000000000..2ee6a4b847 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java +@@ -0,0 +1,297 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import static jdk.vm.ci.loongarch64.LoongArch64.ra; ++import static jdk.vm.ci.loongarch64.LoongArch64.a0; ++import static jdk.vm.ci.loongarch64.LoongArch64.a1; ++import static jdk.vm.ci.loongarch64.LoongArch64.a2; ++import static jdk.vm.ci.loongarch64.LoongArch64.a3; ++import static jdk.vm.ci.loongarch64.LoongArch64.a4; ++import static jdk.vm.ci.loongarch64.LoongArch64.a5; ++import static jdk.vm.ci.loongarch64.LoongArch64.a6; ++import static jdk.vm.ci.loongarch64.LoongArch64.a7; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1; ++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2; ++import static jdk.vm.ci.loongarch64.LoongArch64.t0; ++import static jdk.vm.ci.loongarch64.LoongArch64.v0; ++import static jdk.vm.ci.loongarch64.LoongArch64.s5; ++import static jdk.vm.ci.loongarch64.LoongArch64.s6; ++import static jdk.vm.ci.loongarch64.LoongArch64.sp; ++import static jdk.vm.ci.loongarch64.LoongArch64.fp; ++import static jdk.vm.ci.loongarch64.LoongArch64.tp; ++import static jdk.vm.ci.loongarch64.LoongArch64.rx; ++import static jdk.vm.ci.loongarch64.LoongArch64.f0; ++import static jdk.vm.ci.loongarch64.LoongArch64.f1; ++import static jdk.vm.ci.loongarch64.LoongArch64.f2; ++import static jdk.vm.ci.loongarch64.LoongArch64.f3; ++import static jdk.vm.ci.loongarch64.LoongArch64.f4; ++import static jdk.vm.ci.loongarch64.LoongArch64.f5; ++import static jdk.vm.ci.loongarch64.LoongArch64.f6; ++import static jdk.vm.ci.loongarch64.LoongArch64.f7; ++import static jdk.vm.ci.loongarch64.LoongArch64.fv0; ++import static jdk.vm.ci.loongarch64.LoongArch64.zero; ++ ++import java.util.ArrayList; ++import java.util.HashSet; ++import java.util.List; ++import java.util.Set; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CallingConvention.Type; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterAttributes; ++import jdk.vm.ci.code.RegisterConfig; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.TargetDescription; ++import jdk.vm.ci.code.ValueKindFactory; ++import jdk.vm.ci.common.JVMCIError; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.JavaType; ++import jdk.vm.ci.meta.PlatformKind; ++import jdk.vm.ci.meta.Value; ++import jdk.vm.ci.meta.ValueKind; ++ ++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig { ++ ++ private final TargetDescription target; ++ ++ private final RegisterArray allocatable; ++ ++ /** ++ * The caller saved registers always include all parameter registers. ++ */ ++ private final RegisterArray callerSaved; ++ ++ private final boolean allAllocatableAreCallerSaved; ++ ++ private final RegisterAttributes[] attributesMap; ++ ++ @Override ++ public RegisterArray getAllocatableRegisters() { ++ return allocatable; ++ } ++ ++ @Override ++ public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) { ++ ArrayList list = new ArrayList<>(); ++ for (Register reg : registers) { ++ if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) { ++ list.add(reg); ++ } ++ } ++ ++ return new RegisterArray(list); ++ } ++ ++ @Override ++ public RegisterAttributes[] getAttributesMap() { ++ return attributesMap.clone(); ++ } ++ ++ private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7); ++ private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7); ++ ++ public static final Register heapBaseRegister = s5; ++ public static final Register TREG = s6; ++ ++ private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG); ++ ++ private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) { ++ RegisterArray allRegisters = arch.getAvailableValueRegisters(); ++ Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)]; ++ List reservedRegistersList = reservedRegisters.asList(); ++ ++ int idx = 0; ++ for (Register reg : allRegisters) { ++ if (reservedRegistersList.contains(reg)) { ++ // skip reserved registers ++ continue; ++ } ++ if (reserveForHeapBase && reg.equals(heapBaseRegister)) { ++ // skip heap base register ++ continue; ++ } ++ ++ registers[idx++] = reg; ++ } ++ ++ assert idx == registers.length; ++ return new RegisterArray(registers); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) { ++ this(target, initAllocatable(target.arch, useCompressedOops)); ++ assert callerSaved.size() >= allocatable.size(); ++ } ++ ++ public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) { ++ this.target = target; ++ ++ this.allocatable = allocatable; ++ Set callerSaveSet = new HashSet<>(); ++ allocatable.addTo(callerSaveSet); ++ floatParameterRegisters.addTo(callerSaveSet); ++ javaGeneralParameterRegisters.addTo(callerSaveSet); ++ nativeGeneralParameterRegisters.addTo(callerSaveSet); ++ callerSaved = new RegisterArray(callerSaveSet); ++ ++ allAllocatableAreCallerSaved = true; ++ attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters); ++ } ++ ++ @Override ++ public RegisterArray getCallerSaveRegisters() { ++ return callerSaved; ++ } ++ ++ @Override ++ public RegisterArray getCalleeSaveRegisters() { ++ return null; ++ } ++ ++ @Override ++ public boolean areAllAllocatableRegistersCallerSaved() { ++ return allAllocatableAreCallerSaved; ++ } ++ ++ @Override ++ public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory valueKindFactory) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ if (type == HotSpotCallingConventionType.NativeCall) { ++ return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ // On x64, parameter locations are the same whether viewed ++ // from the caller or callee perspective ++ return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory); ++ } ++ ++ @Override ++ public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) { ++ HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type; ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters; ++ case Float: ++ case Double: ++ return floatParameterRegisters; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ } ++ ++ private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type, ++ ValueKindFactory valueKindFactory) { ++ AllocatableValue[] locations = new AllocatableValue[parameterTypes.length]; ++ ++ int currentGeneral = 0; ++ int currentFloat = 0; ++ int currentStackOffset = 0; ++ ++ for (int i = 0; i < parameterTypes.length; i++) { ++ final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind(); ++ ++ switch (kind) { ++ case Byte: ++ case Boolean: ++ case Short: ++ case Char: ++ case Int: ++ case Long: ++ case Object: ++ if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ case Float: ++ case Double: ++ if (currentFloat < floatParameterRegisters.size()) { ++ Register register = floatParameterRegisters.get(currentFloat++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } else if (currentGeneral < generalParameterRegisters.size()) { ++ Register register = generalParameterRegisters.get(currentGeneral++); ++ locations[i] = register.asValue(valueKindFactory.getValueKind(kind)); ++ } ++ break; ++ default: ++ throw JVMCIError.shouldNotReachHere(); ++ } ++ ++ if (locations[i] == null) { ++ ValueKind valueKind = valueKindFactory.getValueKind(kind); ++ locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out); ++ currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize); ++ } ++ } ++ ++ JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind(); ++ AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind())); ++ return new CallingConvention(currentStackOffset, returnLocation, locations); ++ } ++ ++ @Override ++ public Register getReturnRegister(JavaKind kind) { ++ switch (kind) { ++ case Boolean: ++ case Byte: ++ case Char: ++ case Short: ++ case Int: ++ case Long: ++ case Object: ++ return v0; ++ case Float: ++ case Double: ++ return fv0; ++ case Void: ++ case Illegal: ++ return null; ++ default: ++ throw new UnsupportedOperationException("no return register for type " + kind); ++ } ++ } ++ ++ @Override ++ public Register getFrameRegister() { ++ return sp; ++ } ++ ++ @Override ++ public String toString() { ++ return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave: " + getCallerSaveRegisters() + "%n"); ++ } ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java +new file mode 100644 +index 0000000000..c8605976a0 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java +@@ -0,0 +1,77 @@ ++/* ++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; ++ ++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess; ++import jdk.vm.ci.hotspot.HotSpotVMConfigStore; ++import jdk.vm.ci.services.Services; ++ ++/** ++ * Used to access native configuration details. ++ * ++ * All non-static, public fields in this class are so that they can be compiled as constants. ++ */ ++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess { ++ ++ LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) { ++ super(config); ++ } ++ ++ final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class); ++ ++ // CPU Capabilities ++ ++ /* ++ * These flags are set based on the corresponding command line flags. ++ */ ++ final boolean useLSX = getFlag("UseLSX", Boolean.class); ++ final boolean useLASX = getFlag("UseLASX", Boolean.class); ++ ++ final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t"); ++ ++ /* ++ * These flags are set if the corresponding support is in the hardware. ++ */ ++ // Checkstyle: stop ++ final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class); ++ final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class); ++ final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class); ++ final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class); ++ final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class); ++ final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class); ++ final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class); ++ final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class); ++ final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class); ++ final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class); ++ final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class); ++ final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class); ++ final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class); ++ final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class); ++ final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class); ++ final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class); ++ final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class); ++ final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class); ++ final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class); ++ // Checkstyle: resume ++} +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java +new file mode 100644 +index 0000000000..1048ea9d64 +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java +@@ -0,0 +1,28 @@ ++/* ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++/** ++ * The LoongArch64 HotSpot specific portions of the JVMCI API. ++ */ ++package jdk.vm.ci.hotspot.loongarch64; +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java +new file mode 100644 +index 0000000000..1bb12e7a5f +--- /dev/null ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java +@@ -0,0 +1,247 @@ ++/* ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++package jdk.vm.ci.loongarch64; ++ ++import java.nio.ByteOrder; ++import java.util.EnumSet; ++ ++import jdk.vm.ci.code.Architecture; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.Register.RegisterCategory; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.PlatformKind; ++ ++/** ++ * Represents the LoongArch64 architecture. ++ */ ++public class LoongArch64 extends Architecture { ++ ++ public static final RegisterCategory CPU = new RegisterCategory("CPU"); ++ ++ // General purpose CPU registers ++ public static final Register zero = new Register(0, 0, "r0", CPU); ++ public static final Register ra = new Register(1, 1, "r1", CPU); ++ public static final Register tp = new Register(2, 2, "r2", CPU); ++ public static final Register sp = new Register(3, 3, "r3", CPU); ++ public static final Register a0 = new Register(4, 4, "r4", CPU); ++ public static final Register a1 = new Register(5, 5, "r5", CPU); ++ public static final Register a2 = new Register(6, 6, "r6", CPU); ++ public static final Register a3 = new Register(7, 7, "r7", CPU); ++ public static final Register a4 = new Register(8, 8, "r8", CPU); ++ public static final Register a5 = new Register(9, 9, "r9", CPU); ++ public static final Register a6 = new Register(10, 10, "r10", CPU); ++ public static final Register a7 = new Register(11, 11, "r11", CPU); ++ public static final Register t0 = new Register(12, 12, "r12", CPU); ++ public static final Register t1 = new Register(13, 13, "r13", CPU); ++ public static final Register t2 = new Register(14, 14, "r14", CPU); ++ public static final Register t3 = new Register(15, 15, "r15", CPU); ++ public static final Register t4 = new Register(16, 16, "r16", CPU); ++ public static final Register t5 = new Register(17, 17, "r17", CPU); ++ public static final Register t6 = new Register(18, 18, "r18", CPU); ++ public static final Register t7 = new Register(19, 19, "r19", CPU); ++ public static final Register t8 = new Register(20, 20, "r20", CPU); ++ public static final Register rx = new Register(21, 21, "r21", CPU); ++ public static final Register fp = new Register(22, 22, "r22", CPU); ++ public static final Register s0 = new Register(23, 23, "r23", CPU); ++ public static final Register s1 = new Register(24, 24, "r24", CPU); ++ public static final Register s2 = new Register(25, 25, "r25", CPU); ++ public static final Register s3 = new Register(26, 26, "r26", CPU); ++ public static final Register s4 = new Register(27, 27, "r27", CPU); ++ public static final Register s5 = new Register(28, 28, "r28", CPU); ++ public static final Register s6 = new Register(29, 29, "r29", CPU); ++ public static final Register s7 = new Register(30, 30, "r30", CPU); ++ public static final Register s8 = new Register(31, 31, "r31", CPU); ++ ++ public static final Register SCR1 = t7; ++ public static final Register SCR2 = t4; ++ public static final Register v0 = a0; ++ ++ // @formatter:off ++ public static final RegisterArray cpuRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8 ++ ); ++ // @formatter:on ++ ++ public static final RegisterCategory SIMD = new RegisterCategory("SIMD"); ++ ++ // Simd registers ++ public static final Register f0 = new Register(32, 0, "f0", SIMD); ++ public static final Register f1 = new Register(33, 1, "f1", SIMD); ++ public static final Register f2 = new Register(34, 2, "f2", SIMD); ++ public static final Register f3 = new Register(35, 3, "f3", SIMD); ++ public static final Register f4 = new Register(36, 4, "f4", SIMD); ++ public static final Register f5 = new Register(37, 5, "f5", SIMD); ++ public static final Register f6 = new Register(38, 6, "f6", SIMD); ++ public static final Register f7 = new Register(39, 7, "f7", SIMD); ++ public static final Register f8 = new Register(40, 8, "f8", SIMD); ++ public static final Register f9 = new Register(41, 9, "f9", SIMD); ++ public static final Register f10 = new Register(42, 10, "f10", SIMD); ++ public static final Register f11 = new Register(43, 11, "f11", SIMD); ++ public static final Register f12 = new Register(44, 12, "f12", SIMD); ++ public static final Register f13 = new Register(45, 13, "f13", SIMD); ++ public static final Register f14 = new Register(46, 14, "f14", SIMD); ++ public static final Register f15 = new Register(47, 15, "f15", SIMD); ++ public static final Register f16 = new Register(48, 16, "f16", SIMD); ++ public static final Register f17 = new Register(49, 17, "f17", SIMD); ++ public static final Register f18 = new Register(50, 18, "f18", SIMD); ++ public static final Register f19 = new Register(51, 19, "f19", SIMD); ++ public static final Register f20 = new Register(52, 20, "f20", SIMD); ++ public static final Register f21 = new Register(53, 21, "f21", SIMD); ++ public static final Register f22 = new Register(54, 22, "f22", SIMD); ++ public static final Register f23 = new Register(55, 23, "f23", SIMD); ++ public static final Register f24 = new Register(56, 24, "f24", SIMD); ++ public static final Register f25 = new Register(57, 25, "f25", SIMD); ++ public static final Register f26 = new Register(58, 26, "f26", SIMD); ++ public static final Register f27 = new Register(59, 27, "f27", SIMD); ++ public static final Register f28 = new Register(60, 28, "f28", SIMD); ++ public static final Register f29 = new Register(61, 29, "f29", SIMD); ++ public static final Register f30 = new Register(62, 30, "f30", SIMD); ++ public static final Register f31 = new Register(63, 31, "f31", SIMD); ++ ++ public static final Register fv0 = f0; ++ ++ // @formatter:off ++ public static final RegisterArray simdRegisters = new RegisterArray( ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ // @formatter:off ++ public static final RegisterArray allRegisters = new RegisterArray( ++ zero, ra, tp, sp, a0, a1, a2, a3, ++ a4, a5, a6, a7, t0, t1, t2, t3, ++ t4, t5, t6, t7, t8, rx, fp, s0, ++ s1, s2, s3, s4, s5, s6, s7, s8, ++ ++ f0, f1, f2, f3, f4, f5, f6, f7, ++ f8, f9, f10, f11, f12, f13, f14, f15, ++ f16, f17, f18, f19, f20, f21, f22, f23, ++ f24, f25, f26, f27, f28, f29, f30, f31 ++ ); ++ // @formatter:on ++ ++ /** ++ * Basic set of CPU features mirroring what is returned from the cpuid instruction. See: ++ * {@code VM_Version::cpuFeatureFlags}. ++ */ ++ public enum CPUFeature { ++ LA32, ++ LA64, ++ LLEXC, ++ SCDLY, ++ LLDBAR, ++ LBT_X86, ++ LBT_ARM, ++ LBT_MIPS, ++ CCDMA, ++ COMPLEX, ++ FP, ++ CRYPTO, ++ LSX, ++ LASX, ++ LAM, ++ LLSYNC, ++ TGTSYNC, ++ ULSYNC, ++ UAL ++ } ++ ++ private final EnumSet features; ++ ++ /** ++ * Set of flags to control code emission. ++ */ ++ public enum Flag { ++ useLSX, ++ useLASX ++ } ++ ++ private final EnumSet flags; ++ ++ public LoongArch64(EnumSet features, EnumSet flags) { ++ super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0); ++ this.features = features; ++ this.flags = flags; ++ } ++ ++ public EnumSet getFeatures() { ++ return features; ++ } ++ ++ public EnumSet getFlags() { ++ return flags; ++ } ++ ++ @Override ++ public PlatformKind getPlatformKind(JavaKind javaKind) { ++ switch (javaKind) { ++ case Boolean: ++ case Byte: ++ return LoongArch64Kind.BYTE; ++ case Short: ++ case Char: ++ return LoongArch64Kind.WORD; ++ case Int: ++ return LoongArch64Kind.DWORD; ++ case Long: ++ case Object: ++ return LoongArch64Kind.QWORD; ++ case Float: ++ return LoongArch64Kind.SINGLE; ++ case Double: ++ return LoongArch64Kind.DOUBLE; ++ default: ++ return null; ++ } ++ } ++ ++ @Override ++ public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) { ++ LoongArch64Kind kind = (LoongArch64Kind) platformKind; ++ if (kind.isInteger()) { ++ return category.equals(CPU); ++ } else if (kind.isSIMD()) { ++ return category.equals(SIMD); ++ } ++ return false; ++ } ++ ++ @Override ++ public LoongArch64Kind getLargestStorableKind(RegisterCategory category) { ++ if (category.equals(CPU)) { ++ return LoongArch64Kind.QWORD; ++ } else if (category.equals(SIMD)) { ++ return LoongArch64Kind.V256_QWORD; ++ } else { ++ return null; ++ } ++ } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java new file mode 100644 -index 0000000000..dfe3066af0 +index 0000000000..84b7f2027f --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java -@@ -0,0 +1,57 @@ ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java +@@ -0,0 +1,163 @@ +/* -+ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -65537,50 +115148,156 @@ index 0000000000..dfe3066af0 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. -+ * + */ ++package jdk.vm.ci.loongarch64; + -+package sun.jvm.hotspot.runtime.mips64; ++import jdk.vm.ci.meta.PlatformKind; + -+import java.util.*; -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.types.*; -+import sun.jvm.hotspot.runtime.*; ++public enum LoongArch64Kind implements PlatformKind { + -+public class MIPS64JavaCallWrapper extends JavaCallWrapper { -+ private static AddressField lastJavaFPField; ++ // scalar ++ BYTE(1), ++ WORD(2), ++ DWORD(4), ++ QWORD(8), ++ UBYTE(1), ++ UWORD(2), ++ UDWORD(4), ++ SINGLE(4), ++ DOUBLE(8), + -+ static { -+ VM.registerVMInitializedObserver(new Observer() { -+ public void update(Observable o, Object data) { -+ initialize(VM.getVM().getTypeDataBase()); -+ } -+ }); -+ } ++ // SIMD ++ V128_BYTE(16, BYTE), ++ V128_WORD(16, WORD), ++ V128_DWORD(16, DWORD), ++ V128_QWORD(16, QWORD), ++ V128_SINGLE(16, SINGLE), ++ V128_DOUBLE(16, DOUBLE), ++ V256_BYTE(32, BYTE), ++ V256_WORD(32, WORD), ++ V256_DWORD(32, DWORD), ++ V256_QWORD(32, QWORD), ++ V256_SINGLE(32, SINGLE), ++ V256_DOUBLE(32, DOUBLE); + -+ private static synchronized void initialize(TypeDataBase db) { -+ Type type = db.lookupType("JavaFrameAnchor"); ++ private final int size; ++ private final int vectorLength; + -+ lastJavaFPField = type.getAddressField("_last_Java_fp"); -+ } ++ private final LoongArch64Kind scalar; ++ private final EnumKey key = new EnumKey<>(this); + -+ public MIPS64JavaCallWrapper(Address addr) { -+ super(addr); -+ } ++ LoongArch64Kind(int size) { ++ this.size = size; ++ this.scalar = this; ++ this.vectorLength = 1; ++ } + -+ public Address getLastJavaFP() { -+ return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset())); -+ } ++ LoongArch64Kind(int size, LoongArch64Kind scalar) { ++ this.size = size; ++ this.scalar = scalar; ++ ++ assert size % scalar.size == 0; ++ this.vectorLength = size / scalar.size; ++ } ++ ++ public LoongArch64Kind getScalar() { ++ return scalar; ++ } ++ ++ @Override ++ public int getSizeInBytes() { ++ return size; ++ } ++ ++ @Override ++ public int getVectorLength() { ++ return vectorLength; ++ } ++ ++ @Override ++ public Key getKey() { ++ return key; ++ } ++ ++ public boolean isInteger() { ++ switch (this) { ++ case BYTE: ++ case WORD: ++ case DWORD: ++ case QWORD: ++ case UBYTE: ++ case UWORD: ++ case UDWORD: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ public boolean isSIMD() { ++ switch (this) { ++ case SINGLE: ++ case DOUBLE: ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return true; ++ default: ++ return false; ++ } ++ } ++ ++ @Override ++ public char getTypeChar() { ++ switch (this) { ++ case BYTE: ++ return 'b'; ++ case WORD: ++ return 'w'; ++ case DWORD: ++ return 'd'; ++ case QWORD: ++ return 'q'; ++ case SINGLE: ++ return 'S'; ++ case DOUBLE: ++ return 'D'; ++ case V128_BYTE: ++ case V128_WORD: ++ case V128_DWORD: ++ case V128_QWORD: ++ case V128_SINGLE: ++ case V128_DOUBLE: ++ case V256_BYTE: ++ case V256_WORD: ++ case V256_DWORD: ++ case V256_QWORD: ++ case V256_SINGLE: ++ case V256_DOUBLE: ++ return 'v'; ++ default: ++ return '-'; ++ } ++ } +} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java +diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java new file mode 100644 -index 0000000000..f2da760af4 +index 0000000000..9d020833ea --- /dev/null -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java -@@ -0,0 +1,52 @@ ++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java +@@ -0,0 +1,28 @@ +/* -+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved. -+ * Copyright (c) 2018, Loongson Technology. All rights reserved. ++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it @@ -65600,76 +115317,37 @@ index 0000000000..f2da760af4 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. -+ * + */ + -+package sun.jvm.hotspot.runtime.mips64; -+ -+import sun.jvm.hotspot.debugger.*; -+import sun.jvm.hotspot.runtime.*; -+ -+public class MIPS64RegisterMap extends RegisterMap { -+ -+ /** This is the only public constructor */ -+ public MIPS64RegisterMap(JavaThread thread, boolean updateMap) { -+ super(thread, updateMap); -+ } -+ -+ protected MIPS64RegisterMap(RegisterMap map) { -+ super(map); -+ } -+ -+ public Object clone() { -+ MIPS64RegisterMap retval = new MIPS64RegisterMap(this); -+ return retval; -+ } -+ -+ // no PD state to clear or copy: -+ protected void clearPD() {} -+ protected void initializePD() {} -+ protected void initializeFromPD(RegisterMap map) {} -+ protected Address getLocationPD(VMReg reg) { return null; } -+} -diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -index 7d7a6107ca..06d79318d9 100644 ---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java -@@ -22,6 +22,13 @@ - * ++/** ++ * The LoongArch64 platform independent portions of the JVMCI API. ++ */ ++package jdk.vm.ci.loongarch64; +diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java +index fed310d386..661f106d30 100644 +--- a/src/jdk.internal.vm.ci/share/classes/module-info.java ++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java +@@ -23,6 +23,12 @@ + * questions. */ +/* -+ * This file has been modified by Loongson Technology in 2021. These -+ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2022, Loongson Technology, and are made + * available on the same license terms set forth above. -+ * + */ + - package sun.jvm.hotspot.utilities; - - /** Provides canonicalized OS and CPU information for the rest of the -@@ -54,7 +61,7 @@ public class PlatformInfo { - - public static boolean knownCPU(String cpu) { - final String[] KNOWN = -- new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"}; -+ new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"}; - - for(String s : KNOWN) { - if(s.equals(cpu)) -@@ -101,6 +108,12 @@ public class PlatformInfo { - if (cpu.equals("ppc64le")) - return "ppc64"; - -+ if (cpu.equals("mips64el")) -+ return "mips64"; -+ -+ if (cpu.equals("loongarch64")) -+ return "loongarch64"; -+ - return cpu; + module jdk.internal.vm.ci { + exports jdk.vm.ci.services to jdk.internal.vm.compiler; + exports jdk.vm.ci.runtime to +@@ -37,6 +43,7 @@ module jdk.internal.vm.ci { - } + provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with + jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory, ++ jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory, + jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory; + } diff --git a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile index 2514a895da..08fbe3b953 100644 --- a/src/utils/hsdis/Makefile @@ -65684,75 +115362,6 @@ index 2514a895da..08fbe3b953 100644 LDFLAGS += -ldl OUTFLAGS += -o $@ else -diff --git a/test/hotspot/jtreg/ProblemList-Xcomp.txt b/test/hotspot/jtreg/ProblemList-Xcomp.txt -index 4d6159a22b..2ff512758f 100644 ---- a/test/hotspot/jtreg/ProblemList-Xcomp.txt -+++ b/test/hotspot/jtreg/ProblemList-Xcomp.txt -@@ -29,3 +29,6 @@ - - vmTestbase/vm/mlvm/meth/stress/jni/nativeAndMH/Test.java 8208235 solaris-all - runtime/appcds/cacheObject/DifferentHeapSizes.java 8210102 solaris-all -+ -+# loongson added -+compiler/intrinsics/bigInteger/TestMultiplyToLenReturnProfile.java generic-mips64el -diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt -index 941429356c..f66c98b1fb 100644 ---- a/test/hotspot/jtreg/ProblemList.txt -+++ b/test/hotspot/jtreg/ProblemList.txt -@@ -21,6 +21,12 @@ - # questions. - # - -+# -+# This file has been modified by Loongson Technology in 2022. These -+# modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made -+# available on the same license terms set forth above. -+# -+ - ############################################################################# - # - # List of quarantined tests -- tests that should not be run by default, because -@@ -233,3 +239,40 @@ vmTestbase/nsk/jdb/exclude/exclude001/exclude001.java 8197938 windows-all - vmTestbase/nsk/jdwp/ThreadReference/ForceEarlyReturn/forceEarlyReturn001/forceEarlyReturn001.java 7199837 generic-all - - ############################################################################# -+ -+# loongson added -+compiler/loopopts/TestSkeletonPredicateNegation.java #25538 generic-loongarch64 -+compiler/profiling/TestTypeProfiling.java #25171 generic-loongarch64 -+compiler/tiered/Level2RecompilationTest.java #10070 generic-mips64el,generic-loongarch64 -+containers/cgroup/PlainRead.java #20028 generic-mips64el -+gc/cms/TestBubbleUpRef.java #17221 generic-mips64el -+gc/stress/gcbasher/TestGCBasherWithCMS.java #17221 generic-mips64el -+resourcehogs/serviceability/sa/ClhsdbRegionDetailsScanOopsForG1.java #24312 generic-loongarch64 -+resourcehogs/serviceability/sa/TestHeapDumpForLargeArray.java #9797 generic-mips64el -+runtime/classFileParserBug/TestEmptyBootstrapMethodsAttr.java generic-all -+runtime/NMT/CheckForProperDetailStackTrace.java #9499 generic-mips64el,generic-loongarch64 -+serviceability/sa/CDSJMapClstats.java #9797 generic-mips64el -+serviceability/sa/ClhsdbCDSJstackPrintAll.java #9797 generic-mips64el -+serviceability/sa/ClhsdbInspect.java #9797 generic-mips64el -+serviceability/sa/ClhsdbJdis.java #9797 generic-mips64el -+serviceability/sa/ClhsdbJstack.java #9797 generic-mips64el -+serviceability/sa/ClhsdbJstackXcompStress.java #10632 generic-mips64el -+serviceability/sa/ClhsdbPrintAs.java #9797 generic-mips64el -+serviceability/sa/ClhsdbPstack.java #9797 generic-mips64el -+serviceability/sa/ClhsdbSource.java #9797 generic-mips64el -+serviceability/sa/ClhsdbThread.java #9797 generic-mips64el -+serviceability/sa/ClhsdbWhere.java #9797 generic-mips64el -+serviceability/sa/DeadlockDetectionTest.java #9797 generic-mips64el -+serviceability/sa/JhsdbThreadInfoTest.java #9797 generic-mips64el -+serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java #9797 generic-mips64el -+serviceability/sa/sadebugd/DebugdConnectTest.java #9797 generic-mips64el -+serviceability/sa/TestClhsdbJstackLock.java #9797 generic-mips64el -+serviceability/sa/TestHeapDumpForInvokeDynamic.java #9797 generic-mips64el -+serviceability/sa/TestHeapDumpForLargeArray.java #9797 generic-mips64el -+serviceability/sa/TestInstanceKlassSize.java #9797 generic-mips64el -+serviceability/sa/TestJhsdbJstackLock.java #9797 generic-mips64el -+serviceability/sa/TestJhsdbJstackMixed.java #9797 generic-mips64el -+serviceability/sa/TestJmapCore.java #9797 generic-mips64el -+serviceability/sa/TestJmapCoreMetaspace.java #9797 generic-mips64el -+serviceability/sa/TestPrintMdo.java #9797,#25534 generic-mips64el,generic-loongarch64 -+vmTestbase/jit/tiered/Test.java generic-mips64el diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java index ac17e567b0..9b004a2033 100644 --- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java @@ -65844,6 +115453,770 @@ index faa9fdbae6..a635f03d24 100644 } @Override +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +index 62d0e99155..c3fa3fb93e 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java +@@ -29,6 +29,7 @@ import jdk.vm.ci.code.InstalledCode; + import jdk.vm.ci.code.TargetDescription; + import jdk.vm.ci.code.test.amd64.AMD64TestAssembler; + import jdk.vm.ci.code.test.sparc.SPARCTestAssembler; ++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler; + import jdk.vm.ci.hotspot.HotSpotCompiledCode; + import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime; + import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod; +@@ -37,6 +38,7 @@ import jdk.vm.ci.meta.MetaAccessProvider; + import jdk.vm.ci.runtime.JVMCI; + import jdk.vm.ci.runtime.JVMCIBackend; + import jdk.vm.ci.sparc.SPARC; ++import jdk.vm.ci.loongarch64.LoongArch64; + import org.junit.Assert; + + import java.lang.reflect.Method; +@@ -72,6 +74,8 @@ public class CodeInstallationTest { + return new AMD64TestAssembler(codeCache, config); + } else if (arch instanceof SPARC) { + return new SPARCTestAssembler(codeCache, config); ++ } else if (arch instanceof LoongArch64) { ++ return new LoongArch64TestAssembler(codeCache, config); + } else { + Assert.fail("unsupported architecture"); + return null; +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +index 8afc7d7b98..520d7707a2 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.DataPatchTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +index 75d0748da5..a6826e2ffe 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code + * jdk.internal.vm.ci/jdk.vm.ci.code.site +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.InterpreterFrameSizeTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +index a67fa2c1df..59cce6454d 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -33,7 +33,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.MaxOopMapStackOffsetTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +index d9e1f24c30..259218b305 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library /test/lib / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.code +@@ -33,7 +33,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.common + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest + */ + package jdk.vm.ci.code.test; +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +index 9b92114055..00d0f53cdb 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleCodeInstallationTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +index 5b2204868c..ecfcb1cf01 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleDebugInfoTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +index a10e90acda..5b1a58c74b 100644 +--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java +@@ -23,7 +23,7 @@ + + /** + * @test +- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9") ++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64") + * @library / + * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot + * jdk.internal.vm.ci/jdk.vm.ci.meta +@@ -32,7 +32,8 @@ + * jdk.internal.vm.ci/jdk.vm.ci.runtime + * jdk.internal.vm.ci/jdk.vm.ci.amd64 + * jdk.internal.vm.ci/jdk.vm.ci.sparc +- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java ++ * jdk.internal.vm.ci/jdk.vm.ci.loongarch64 ++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java + * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.VirtualObjectDebugInfoTest + */ + +diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +new file mode 100644 +index 0000000000..4c76868453 +--- /dev/null ++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java +@@ -0,0 +1,568 @@ ++/* ++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2022, Loongson Technology. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package jdk.vm.ci.code.test.loongarch64; ++ ++import jdk.vm.ci.loongarch64.LoongArch64; ++import jdk.vm.ci.loongarch64.LoongArch64Kind; ++import jdk.vm.ci.code.CallingConvention; ++import jdk.vm.ci.code.CodeCacheProvider; ++import jdk.vm.ci.code.DebugInfo; ++import jdk.vm.ci.code.Register; ++import jdk.vm.ci.code.RegisterArray; ++import jdk.vm.ci.code.RegisterValue; ++import jdk.vm.ci.code.StackSlot; ++import jdk.vm.ci.code.site.ConstantReference; ++import jdk.vm.ci.code.site.DataSectionReference; ++import jdk.vm.ci.code.test.TestAssembler; ++import jdk.vm.ci.code.test.TestHotSpotVMConfig; ++import jdk.vm.ci.hotspot.HotSpotCallingConventionType; ++import jdk.vm.ci.hotspot.HotSpotConstant; ++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget; ++import jdk.vm.ci.meta.AllocatableValue; ++import jdk.vm.ci.meta.JavaKind; ++import jdk.vm.ci.meta.VMConstant; ++ ++public class LoongArch64TestAssembler extends TestAssembler { ++ ++ private static final Register scratchRegister = LoongArch64.SCR1; ++ private static final Register doubleScratch = LoongArch64.f23; ++ private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0, ++ LoongArch64.a1, LoongArch64.a2, ++ LoongArch64.a3, LoongArch64.a4, ++ LoongArch64.a5, LoongArch64.a6, ++ LoongArch64.a7); ++ private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0, ++ LoongArch64.f1, LoongArch64.f2, ++ LoongArch64.f3, LoongArch64.f4, ++ LoongArch64.f5, LoongArch64.f6, ++ LoongArch64.f7); ++ private static int currentGeneral = 0; ++ private static int currentFloat = 0; ++ public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) { ++ super(codeCache, config, ++ 16 /* initialFrameSize */, 16 /* stackAlignment */, ++ LoongArch64Kind.UDWORD /* narrowOopKind */, ++ /* registers */ ++ LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3, ++ LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7); ++ } ++ ++ private static int low(int x, int l) { ++ assert l < 32; ++ return (x >> 0) & ((1 << l)-1); ++ } ++ ++ private static int low16(int x) { ++ return low(x, 16); ++ } ++ ++ private void emitNop() { ++ code.emitInt(0x3400000); ++ } ++ ++ private void emitPcaddu12i(Register rj, int si20) { ++ // pcaddu12i ++ code.emitInt((0b0001110 << 25) ++ | (low(si20, 20) << 5) ++ | rj.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, Register rk) { ++ // add_d ++ code.emitInt((0b00000000000100001 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitAdd(Register rd, Register rj, int si12) { ++ // addi_d ++ code.emitInt((0b0000001011 << 22) ++ | (low(si12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitSub(Register rd, Register rj, Register rk) { ++ // sub_d ++ code.emitInt((0b00000000000100011 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitShiftLeft(Register rd, Register rj, int shift) { ++ // slli_d ++ code.emitInt((0b00000000010000 << 18) ++ | (low(( (0b01 << 6) | shift ), 8) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu12i_w(Register rj, int imm20) { ++ // lu12i_w ++ code.emitInt((0b0001010 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitOri(Register rd, Register rj, int ui12) { ++ // ori ++ code.emitInt((0b0000001110 << 22) ++ | (low(ui12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLu32i_d(Register rj, int imm20) { ++ // lu32i_d ++ code.emitInt((0b0001011 << 25) ++ | (low(imm20, 20)<<5) ++ | rj.encoding); ++ } ++ ++ private void emitLu52i_d(Register rd, Register rj, int imm12) { ++ // lu52i_d ++ code.emitInt((0b0000001100 << 22) ++ | (low(imm12, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadImmediate(Register rd, int imm32) { ++ emitLu12i_w(rd, (imm32 >> 12) & 0xfffff); ++ emitOri(rd, rd, imm32 & 0xfff); ++ } ++ ++ private void emitLi52(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ } ++ ++ private void emitLi64(Register rj, long imm) { ++ emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff)); ++ emitOri(rj, rj, (int) (imm & 0xfff)); ++ emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff)); ++ emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff)); ++ } ++ ++ private void emitOr(Register rd, Register rj, Register rk) { ++ // orr ++ code.emitInt((0b00000000000101010 << 15) ++ | (rk.encoding << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitMove(Register rd, Register rs) { ++ // move ++ emitOr(rd, rs, LoongArch64.zero); ++ } ++ ++ private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) { ++ // movfr2gr_s/movfr2gr_d ++ int opc = 0; ++ switch (kind) { ++ case SINGLE: opc = 0b0000000100010100101101; break; ++ case DOUBLE: opc = 0b0000000100010100101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // load ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100000; break; ++ case WORD: opc = 0b0010100001; break; ++ case DWORD: opc = 0b0010100010; break; ++ case QWORD: opc = 0b0010100011; break; ++ case UDWORD: opc = 0b0010101010; break; ++ case SINGLE: opc = 0b0010101100; break; ++ case DOUBLE: opc = 0b0010101110; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) { ++ // store ++ assert offset >= 0; ++ int opc = 0; ++ switch (kind) { ++ case BYTE: opc = 0b0010100100; break; ++ case WORD: opc = 0b0010100101; break; ++ case DWORD: opc = 0b0010100110; break; ++ case QWORD: opc = 0b0010100111; break; ++ case SINGLE: opc = 0b0010101101; break; ++ case DOUBLE: opc = 0b0010101111; break; ++ default: throw new IllegalArgumentException(); ++ } ++ code.emitInt((opc << 22) ++ | (low(offset, 12) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ private void emitJirl(Register rd, Register rj, int offs) { ++ // jirl ++ code.emitInt((0b010011 << 26) ++ | (low16(offs >> 2) << 10) ++ | (rj.encoding << 5) ++ | rd.encoding); ++ } ++ ++ @Override ++ public void emitGrowStack(int size) { ++ assert size % 16 == 0; ++ if (size > -4096 && size < 0) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size == 0) { ++ // No-op ++ } else if (size < 4096) { ++ emitAdd(LoongArch64.sp, LoongArch64.sp, -size); ++ } else if (size < 65535) { ++ emitLoadImmediate(scratchRegister, size); ++ emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister); ++ } else { ++ throw new IllegalArgumentException(); ++ } ++ } ++ ++ @Override ++ public void emitPrologue() { ++ // Must be patchable by NativeJump::patch_verified_entry ++ emitNop(); ++ emitGrowStack(32); ++ emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24); ++ emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16); ++ emitGrowStack(-16); ++ emitMove(LoongArch64.fp, LoongArch64.sp); ++ setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD)); ++ } ++ ++ @Override ++ public void emitEpilogue() { ++ recordMark(config.MARKID_DEOPT_HANDLER_ENTRY); ++ recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null); ++ emitCall(0xdeaddeaddeadL); ++ } ++ ++ @Override ++ public void emitCallPrologue(CallingConvention cc, Object... prim) { ++ emitGrowStack(cc.getStackSize()); ++ frameSize += cc.getStackSize(); ++ AllocatableValue[] args = cc.getArguments(); ++ for (int i = 0; i < args.length; i++) { ++ emitLoad(args[i], prim[i]); ++ } ++ currentGeneral = 0; ++ currentFloat = 0; ++ } ++ ++ @Override ++ public void emitCallEpilogue(CallingConvention cc) { ++ emitGrowStack(-cc.getStackSize()); ++ frameSize -= cc.getStackSize(); ++ } ++ ++ @Override ++ public void emitCall(long addr) { ++ // long call (absolute) ++ // lu12i_w(T4, split_low20(value >> 12)); ++ // lu32i_d(T4, split_low20(value >> 32)); ++ // jirl(RA, T4, split_low12(value)); ++ emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff)); ++ emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff)); ++ emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff)); ++ } ++ ++ @Override ++ public void emitLoad(AllocatableValue av, Object prim) { ++ if (av instanceof RegisterValue) { ++ Register reg = ((RegisterValue) av).getRegister(); ++ if (prim instanceof Float) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadFloat(reg, (Float) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadFloat(doubleScratch, (Float) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch); ++ } ++ } else if (prim instanceof Double) { ++ if (currentFloat < floatParameterRegisters.size()) { ++ currentFloat++; ++ emitLoadDouble(reg, (Double) prim); ++ } else if (currentGeneral < nativeGeneralParameterRegisters.size()) { ++ currentGeneral++; ++ emitLoadDouble(doubleScratch, (Double) prim); ++ emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch); ++ } ++ } else if (prim instanceof Integer) { ++ emitLoadInt(reg, (Integer) prim); ++ } else if (prim instanceof Long) { ++ emitLoadLong(reg, (Long) prim); ++ } ++ } else if (av instanceof StackSlot) { ++ StackSlot slot = (StackSlot) av; ++ if (prim instanceof Float) { ++ emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim)); ++ } else if (prim instanceof Double) { ++ emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim)); ++ } else if (prim instanceof Integer) { ++ emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim)); ++ } else if (prim instanceof Long) { ++ emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim)); ++ } else { ++ assert false : "Unimplemented"; ++ } ++ } else { ++ throw new IllegalArgumentException("Unknown value " + av); ++ } ++ } ++ ++ @Override ++ public Register emitLoadPointer(HotSpotConstant c) { ++ recordDataPatchInCode(new ConstantReference((VMConstant) c)); ++ ++ Register ret = newRegister(); ++ // need to match patchable_li52 instruction sequence ++ // lu12i_ori_lu32i ++ emitLi52(ret, 0xdeaddead); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(Register b, int offset) { ++ Register ret = newRegister(); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadNarrowPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0); ++ return ret; ++ } ++ ++ @Override ++ public Register emitLoadPointer(DataSectionReference ref) { ++ recordDataPatchInCode(ref); ++ ++ Register ret = newRegister(); ++ emitPcaddu12i(ret, 0xdead >> 12); ++ emitAdd(ret, ret, 0xdead & 0xfff); ++ emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0); ++ return ret; ++ } ++ ++ private Register emitLoadDouble(Register reg, double c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitDouble(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ private Register emitLoadFloat(Register reg, float c) { ++ DataSectionReference ref = new DataSectionReference(); ++ ref.setOffset(data.position()); ++ data.emitFloat(c); ++ ++ recordDataPatchInCode(ref); ++ emitPcaddu12i(scratchRegister, 0xdead >> 12); ++ emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff); ++ emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadFloat(float c) { ++ Register ret = LoongArch64.fv0; ++ return emitLoadFloat(ret, c); ++ } ++ ++ private Register emitLoadLong(Register reg, long c) { ++ emitLi64(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadLong(long c) { ++ Register ret = newRegister(); ++ return emitLoadLong(ret, c); ++ } ++ ++ private Register emitLoadInt(Register reg, int c) { ++ emitLoadImmediate(reg, c); ++ return reg; ++ } ++ ++ @Override ++ public Register emitLoadInt(int c) { ++ Register ret = newRegister(); ++ return emitLoadInt(ret, c); ++ } ++ ++ @Override ++ public Register emitIntArg0() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(0); ++ } ++ ++ @Override ++ public Register emitIntArg1() { ++ return codeCache.getRegisterConfig() ++ .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int) ++ .get(1); ++ } ++ ++ @Override ++ public Register emitIntAdd(Register a, Register b) { ++ emitAdd(a, a, b); ++ return a; ++ } ++ ++ @Override ++ public void emitTrap(DebugInfo info) { ++ // Dereference null pointer ++ emitMove(scratchRegister, LoongArch64.zero); ++ recordImplicitException(info); ++ emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0); ++ } ++ ++ @Override ++ public void emitIntRet(Register a) { ++ emitMove(LoongArch64.v0, a); ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitFloatRet(Register a) { ++ assert a == LoongArch64.fv0 : "Unimplemented move " + a; ++ emitMove(LoongArch64.sp, LoongArch64.fp); ++ emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8); ++ emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0); ++ emitGrowStack(-16); ++ emitJirl(LoongArch64.zero, LoongArch64.ra, 0); ++ } ++ ++ @Override ++ public void emitPointerRet(Register a) { ++ emitIntRet(a); ++ } ++ ++ @Override ++ public StackSlot emitPointerToStack(Register a) { ++ return emitLongToStack(a); ++ } ++ ++ @Override ++ public StackSlot emitNarrowPointerToStack(Register a) { ++ return emitIntToStack(a); ++ } ++ ++ @Override ++ public Register emitUncompressPointer(Register compressed, long base, int shift) { ++ if (shift > 0) { ++ emitShiftLeft(compressed, compressed, shift); ++ } ++ ++ if (base != 0) { ++ emitLoadLong(scratchRegister, base); ++ emitAdd(compressed, compressed, scratchRegister); ++ } ++ ++ return compressed; ++ } ++ ++ private StackSlot emitDoubleToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitDoubleToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE); ++ return emitDoubleToStack(ret, a); ++ } ++ ++ private StackSlot emitFloatToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitFloatToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE); ++ return emitFloatToStack(ret, a); ++ } ++ ++ private StackSlot emitIntToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitIntToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.DWORD); ++ return emitIntToStack(ret, a); ++ } ++ ++ private StackSlot emitLongToStack(StackSlot slot, Register a) { ++ emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize)); ++ return slot; ++ } ++ ++ @Override ++ public StackSlot emitLongToStack(Register a) { ++ StackSlot ret = newStackSlot(LoongArch64Kind.QWORD); ++ return emitLongToStack(ret, a); ++ } ++ ++} diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java index acb86812d2..664ea11d0d 100644 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java @@ -65935,139 +116308,8 @@ index 7774dabcb5..c1cb6e00f3 100644 public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512" }, null), -diff --git a/test/hotspot/jtreg/loongson/25443/Test25443.java b/test/hotspot/jtreg/loongson/25443/Test25443.java -new file mode 100644 -index 0000000000..200485d1fd ---- /dev/null -+++ b/test/hotspot/jtreg/loongson/25443/Test25443.java -@@ -0,0 +1,58 @@ -+/* -+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+/** -+ * @test -+ * @summary test c2 or2s -+ * -+ * @run main/othervm -Xcomp -XX:-TieredCompilation Test25443 -+ */ -+public class Test25443 { -+ static short test_ori2s(int v1) { -+ short t = (short)(v1 | 0x14); -+ return t; -+ } -+ -+ static short test_or2s(int v1, int v2) { -+ short t = (short)(v1 | v2); -+ return t; -+ } -+ -+ static short ret; -+ public static void main(String[] args) { -+ for (int i = 0; i < 12000; i++) { //warmup -+ test_ori2s(0x333300); -+ test_or2s(0x333300, 0x14); -+ } -+ -+ if ( (test_ori2s(0x333300) == 0x3314) -+ && (test_or2s(0x333300, 0x14) == 0x3314) -+ && (test_or2s(0x333300, 0x1000) == 0x3300) -+ && (test_or2s(0x333300, 0x8000) == 0xffffb300)) { -+ System.out.println("TEST PASSED"); -+ } else { -+ throw new AssertionError("Not be expected results"); -+ } -+ } -+} -diff --git a/test/hotspot/jtreg/loongson/7432/Test7423.java b/test/hotspot/jtreg/loongson/7432/Test7423.java -new file mode 100644 -index 0000000000..defa026410 ---- /dev/null -+++ b/test/hotspot/jtreg/loongson/7432/Test7423.java -@@ -0,0 +1,61 @@ -+/* -+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ * -+ */ -+ -+/** -+ * @test -+ * @summary Divide by zero -+ * -+ * @run main/othervm -Xint Test7423 -+ * @run main/othervm -Xcomp Test7423 -+ */ -+public class Test7423 { -+ -+ private static int divInt(int n) { -+ int a = 1 / n; -+ return a; -+ } -+ -+ private static long divLong(long n) { -+ long a = (long)1 / n; -+ return a; -+ } -+ -+ public static void main(String[] args) throws Exception { -+ -+ try { -+ for (int i = 0; i < 20000; i++) { -+ if (i == 18000) { -+ divInt(0); -+ divLong((long)0); -+ } else { -+ divInt(1); -+ divLong((long)1); -+ } -+ } -+ } catch (java.lang.ArithmeticException exc) { -+ System.out.println("expected-exception " + exc); -+ } -+ } -+ -+} diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java -index 3386cfeb1f..a548c37d09 100644 +index 127bb6abcd..c9277604ae 100644 --- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java +++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java @@ -21,6 +21,12 @@ @@ -66093,7 +116335,7 @@ index 3386cfeb1f..a548c37d09 100644 Platform.isSolaris(); } diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java -index a016f233e7..a60fc94158 100644 +index 77458554b7..05aee6b84c 100644 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java @@ -45,7 +45,7 @@ import java.util.Set; @@ -66105,146 +116347,6 @@ index a016f233e7..a60fc94158 100644 BITNESS("is32bit", "is64bit"), OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"), VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"), -diff --git a/test/jdk/ProblemList-Xcomp.txt b/test/jdk/ProblemList-Xcomp.txt -index 0758209f2d..f849f73fc6 100644 ---- a/test/jdk/ProblemList-Xcomp.txt -+++ b/test/jdk/ProblemList-Xcomp.txt -@@ -21,6 +21,12 @@ - # questions. - # - -+# -+# This file has been modified by Loongson Technology in 2021. These -+# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made -+# available on the same license terms set forth above. -+# -+ - ############################################################################# - # - # List of quarantined tests for testing in Xcomp mode. -@@ -29,3 +35,55 @@ - - java/lang/invoke/MethodHandles/CatchExceptionTest.java 8146623 generic-all - java/lang/Class/forName/modules/TestDriver.java 8208212 solaris-all -+ -+# loongson added -+com/sun/net/httpserver/bugs/6725892/Test.java #uos generic-mips64el -+com/sun/net/httpserver/bugs/B6393710.java generic-mips64el -+com/sun/net/httpserver/bugs/B6529200.java generic-mips64el -+com/sun/net/httpserver/Test10.java generic-mips64el -+com/sun/net/httpserver/Test1.java generic-mips64el -+java/lang/annotation/loaderLeak/Main.java generic-all -+java/lang/Thread/UncaughtExceptions.sh generic-mips64el -+java/net/httpclient/MaxStreams.java generic-mips64el -+java/net/httpclient/ShortRequestBody.java #uos generic-mips64el -+java/net/httpclient/TimeoutOrdering.java generic-mips64el -+java/net/Socket/DeadlockTest.java #uos generic-mips64el -+java/net/Socket/LingerTest.java #uos generic-mips64el -+java/net/Socket/RejectIPv6.java generic-mips64el -+java/security/Security/ClassLoaderDeadlock/ClassLoaderDeadlock.sh #uos generic-mips64el -+javax/net/ssl/ServerName/SSLSocketSNISensitive.java generic-mips64el -+javax/net/ssl/SSLSession/SessionCacheSizeTests.java #uos generic-mips64el -+javax/net/ssl/SSLSession/SessionTimeOutTests.java generic-mips64el -+javax/net/ssl/Stapling/HttpsUrlConnClient.java generic-mips64el -+javax/net/ssl/Stapling/SSLEngineWithStapling.java generic-mips64el -+javax/net/ssl/Stapling/SSLSocketWithStapling.java generic-mips64el -+javax/net/ssl/Stapling/StapleEnableProps.java generic-mips64el -+javax/net/ssl/TLSCommon/TestSessionLocalPrincipal.java generic-mips64el -+javax/net/ssl/TLS/TestJSSEClientProtocol.java #uos generic-mips64el -+javax/net/ssl/TLS/TestJSSEServerProtocol.java #uos generic-mips64el -+jdk/security/logging/TestTLSHandshakeLog.java #uos generic-mips64el -+sun/net/InetAddress/nameservice/simple/DefaultCaching.java #uos generic-mips64el -+sun/net/www/http/HttpClient/B8209178.java generic-all -+sun/net/www/protocol/https/HttpsURLConnection/CookieHttpsClientTest.java #uos generic-mips64el -+sun/net/www/protocol/https/HttpsURLConnection/PostThruProxy.java #uos generic-mips64el -+sun/net/www/protocol/https/HttpsURLConnection/ReadTimeout.java generic-mips64el -+sun/security/ec/TestEC.java #uos generic-mips64el -+sun/security/krb5/auto/BogusKDC.java #uos generic-mips64el -+sun/security/krb5/auto/NullRenewUntil.java generic-mips64el -+sun/security/krb5/auto/rcache_usemd5.sh #uos generic-mips64el -+sun/security/krb5/auto/RefreshKrb5Config.java #uos generic-mips64el -+sun/security/krb5/auto/ReplayCacheTestProc.java #uos generic-mips64el -+sun/security/krb5/auto/Unreachable.java generic-mips64el -+sun/security/ssl/SSLSocketImpl/SSLSocketCloseHang.java generic-mips64el -+sun/security/ssl/Stapling/StatusResponseManager.java generic-mips64el -+sun/security/tools/jarsigner/certpolicy.sh #uos generic-mips64el -+sun/security/tools/jarsigner/checkusage.sh #uos generic-mips64el -+sun/security/tools/jarsigner/concise_jarsigner.sh generic-mips64el -+sun/security/tools/jarsigner/ec.sh #uos generic-mips64el -+sun/security/tools/keytool/selfissued.sh #uos generic-mips64el -+sun/security/tools/keytool/StorePasswordsByShell.sh #uos generic-mips64el -+sun/security/validator/certreplace.sh #uos generic-mips64el -+sun/security/validator/samedn.sh #uos generic-mips64el -+tools/jar/compat/CLICompatibility.java generic-mips64el -+tools/jar/modularJar/Basic.java generic-mips64el -+tools/jar/multiRelease/Basic.java generic-mips64el -diff --git a/test/jdk/ProblemList.txt b/test/jdk/ProblemList.txt -index 3923811fb4..7fa36b8f44 100644 ---- a/test/jdk/ProblemList.txt -+++ b/test/jdk/ProblemList.txt -@@ -21,6 +21,12 @@ - # or visit www.oracle.com if you need additional information or have any - # questions. - # -+ -+# -+# This file has been modified by Loongson Technology in 2022. These -+# modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made -+# available on the same license terms set forth above. -+# - ########################################################################### - # - # List of tests that should not be run by test/Makefile, for various reasons: -@@ -624,7 +630,7 @@ sun/security/pkcs11/sslecc/ClientJSSEServerJSSE.java 8161536 generic- - - sun/security/tools/keytool/ListKeychainStore.sh 8156889 macosx-all - --sun/security/tools/jarsigner/compatibility/SignTwice.java 8217375 windows-all -+sun/security/tools/jarsigner/compatibility/SignTwice.java 8217375,#24291 windows-all,generic-mips64el,generic-loongarch64 - sun/security/tools/jarsigner/warnings/BadKeyUsageTest.java 8026393 generic-all - - javax/net/ssl/ServerName/SSLEngineExplorerMatchedSNI.java 8212096 generic-all -@@ -895,3 +901,40 @@ jdk/jfr/event/oldobject/TestLargeRootSet.java 8205651 gener - - ############################################################################ - -+# loongson added -+java/awt/font/GlyphVector/NLGlyphTest.java #21476 generic-all -+java/lang/System/LoggerFinder/internal/BootstrapLogger/BootstrapLoggerTest.java generic-all -+java/rmi/server/UnicastRemoteObject/exportObject/GcDuringExport.java #10949 generic-mips64el,generic-loongarch64 -+java/util/logging/LocalizedLevelName.java generic-all -+java/util/logging/SimpleFormatterFormat.java generic-all -+jdk/jfr/api/consumer/TestRecordedFrame.java #10010 generic-mips64el,generic-loongarch64 -+jdk/jfr/jcmd/TestJcmdStartWithSettings.java #24259 generic-mips64el,generic-loongarch64 -+jdk/jfr/jvm/TestJFRIntrinsic.java #10011,JDK-8239423 generic-mips64el,generic-loongarch64 -+security/infra/java/security/cert/CertPathValidator/certification/LetsEncryptCA.java #24472 generic-loongarch64 -+sun/tools/jhsdb/BasicLauncherTest.java #9381 generic-mips64el -+sun/tools/jhsdb/HeapDumpTest.java #9381 generic-mips64el -+sun/tools/jhsdb/JShellHeapDumpTest.java #23705 generic-mips64el -+sun/util/logging/SourceClassName.java generic-all -+tools/pack200/DeprecatePack200.java generic-all -+tools/jpackage/linux/AppAboutUrlTest.java#id0 #24942 generic-loongarch64 -+tools/jpackage/linux/AppCategoryTest.java #24942 generic-loongarch64 -+tools/jpackage/linux/jdk/jpackage/tests/UsrTreeTest.java #24942 generic-loongarch64 -+tools/jpackage/linux/LicenseTypeTest.java #24942 generic-loongarch64 -+tools/jpackage/linux/LinuxBundleNameTest.java #24942 generic-loongarch64 -+tools/jpackage/linux/LinuxResourceTest.java #24942 generic-loongarch64 -+tools/jpackage/linux/PackageDepsTest.java #24942 generic-loongarch64 -+tools/jpackage/linux/ReleaseTest.java#id0 #24942 generic-loongarch64 -+tools/jpackage/linux/ShortcutHintTest.java#id0 #24942 generic-loongarch64 -+tools/jpackage/share/AddLauncherTest.java#id1 #24942 generic-loongarch64 -+tools/jpackage/share/AddLShortcutTest.java #24942 generic-loongarch64 -+tools/jpackage/share/AppContentTest.java #24942 generic-loongarch64 -+tools/jpackage/share/AppImagePackageTest.java #24942 generic-loongarch64 -+tools/jpackage/share/EmptyFolderPackageTest.java #24942 generic-loongarch64 -+tools/jpackage/share/FileAssociationsTest.java#id0 #24942 generic-loongarch64 -+tools/jpackage/share/IconTest.java #24942 generic-loongarch64 -+tools/jpackage/share/InstallDirTest.java#id0 #24942 generic-loongarch64 -+tools/jpackage/share/jdk/jpackage/tests/VendorTest.java#id1 #24942 generic-loongarch64 -+tools/jpackage/share/MultiLauncherTwoPhaseTest.java #24942 generic-loongarch64 -+tools/jpackage/share/MultiNameTwoPhaseTest.java #24942 generic-loongarch64 -+tools/jpackage/share/RuntimePackageTest.java#id0 #24942 generic-loongarch64 -+tools/jpackage/share/SimplePackageTest.java #24942 generic-loongarch64 diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java index 7990c49a1f..025048c6b0 100644 --- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java @@ -66300,71 +116402,8 @@ index 7ae0b68401..19689722d2 100644 osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" }); osMap.put("Windows-x86-32", new String[] {}); osMap.put("Windows-amd64-64", new String[] {}); -diff --git a/test/langtools/ProblemList-Xcomp.txt b/test/langtools/ProblemList-Xcomp.txt -new file mode 100644 -index 0000000000..60c3cd14ac ---- /dev/null -+++ b/test/langtools/ProblemList-Xcomp.txt -@@ -0,0 +1,35 @@ -+# -+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved. -+# Copyright (c) 2021, Loongson Technology. All rights reserved. -+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+# -+# This code is free software; you can redistribute it and/or modify it -+# under the terms of the GNU General Public License version 2 only, as -+# published by the Free Software Foundation. -+# -+# This code is distributed in the hope that it will be useful, but WITHOUT -+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+# version 2 for more details (a copy is included in the LICENSE file that -+# accompanied this code). -+# -+# You should have received a copy of the GNU General Public License version -+# 2 along with this work; if not, write to the Free Software Foundation, -+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+# -+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+# or visit www.oracle.com if you need additional information or have any -+# questions. -+# -+ -+############################################################################# -+# loongson added -+jdk/jshell/FailOverExecutionControlTest.java generic-mips64el -+jdk/jshell/JdiBadOptionLaunchExecutionControlTest.java generic-mips64el -+jdk/jshell/JdiFailingLaunchExecutionControlTest.java generic-mips64el -+jdk/jshell/JdiFailingListenExecutionControlTest.java generic-mips64el -+jdk/jshell/JdiHangingLaunchExecutionControlTest.java generic-mips64el -+tools/javac/completionDeps/DepsAndAnno.java #error generic-mips64el -+tools/javac/Paths/Class-Path.sh #error generic-mips64el -+tools/javac/Paths/Diagnostics.sh #error generic-mips64el -+tools/javac/Paths/wcMineField.sh #error generic-mips64el -diff --git a/test/langtools/ProblemList.txt b/test/langtools/ProblemList.txt -index 847d4f6939..8e085d9ae7 100644 ---- a/test/langtools/ProblemList.txt -+++ b/test/langtools/ProblemList.txt -@@ -21,6 +21,12 @@ - # or visit www.oracle.com if you need additional information or have any - # questions. - # -+ -+# -+# This file has been modified by Loongson Technology in 2021. These -+# modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made -+# available on the same license terms set forth above. -+# - ########################################################################### - - ########################################################################### -@@ -78,3 +84,4 @@ tools/sjavac/ClasspathDependencies.java 8158002 generic-all Requires i - # - # jdeps - -+# loongson added diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java -index e526252fe2..fc8114c965 100644 +index 739c351806..0ade89fff1 100644 --- a/test/lib/jdk/test/lib/Platform.java +++ b/test/lib/jdk/test/lib/Platform.java @@ -21,6 +21,12 @@ @@ -66372,15 +116411,15 @@ index e526252fe2..fc8114c965 100644 */ +/* -+ * This file has been modified by Loongson Technology in 2021, These -+ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made ++ * This file has been modified by Loongson Technology in 2022, These ++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made + * available on the same license terms set forth above. + */ + package jdk.test.lib; - + import java.io.FileNotFoundException; -@@ -247,10 +253,18 @@ public class Platform { +@@ -225,6 +231,14 @@ public class Platform { return isArch("(i386)|(x86(?!_64))"); } @@ -66388,16 +116427,12 @@ index e526252fe2..fc8114c965 100644 + return isArch("loongarch64"); + } + - public static String getOsArch() { - return osArch; - } - + public static boolean isMIPS() { + return isArch("mips.*"); + } + - public static boolean isRoot() { - return userName.equals("root"); + public static String getOsArch() { + return osArch; } diff --git a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java new file mode 100644 @@ -66492,79 +116527,6 @@ index 0000000000..81fd956a4e + } + +} -diff --git a/test/micro/org/openjdk/bench/loongarch/C2Memory.java b/test/micro/org/openjdk/bench/loongarch/C2Memory.java -new file mode 100644 -index 0000000000..65cf1773d0 ---- /dev/null -+++ b/test/micro/org/openjdk/bench/loongarch/C2Memory.java -@@ -0,0 +1,67 @@ -+/* -+ * Copyright (c) 2021, Loongson Technology. All rights reserved. -+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. -+ * -+ * This code is free software; you can redistribute it and/or modify it -+ * under the terms of the GNU General Public License version 2 only, as -+ * published by the Free Software Foundation. -+ * -+ * This code is distributed in the hope that it will be useful, but WITHOUT -+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or -+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -+ * version 2 for more details (a copy is included in the LICENSE file that -+ * accompanied this code). -+ * -+ * You should have received a copy of the GNU General Public License version -+ * 2 along with this work; if not, write to the Free Software Foundation, -+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. -+ * -+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA -+ * or visit www.oracle.com if you need additional information or have any -+ * questions. -+ */ -+ -+package org.openjdk.bench.loongarch; -+ -+import org.openjdk.jmh.annotations.Benchmark; -+ -+public class C2Memory { -+ public static int sum; -+ public static int array1[] = new int[0x8000]; -+ public static int array2[] = new int[0x8000]; -+ -+ @Benchmark -+ public void testMethod() { -+ for (int i = 0; i<10000;i++) { -+ sum = array1[0x7fff] + array2[0x1f0]; -+ array1[0x7fff] += array2[0x1f0]; -+ } -+ } -+ -+ @Benchmark -+ public void testBasePosIndexOffset() { -+ int xstart = 30000; -+ long carry = 63; -+ -+ for (int j=xstart; j >= 0; j--) { -+ array2[j] = array1[xstart]; -+ } -+ -+ array2[xstart] = (int)carry; -+ } -+ -+ public static byte b_array1[] = new byte[0x8000]; -+ public static byte b_array2[] = new byte[0x8000]; -+ -+ @Benchmark -+ public void testBaseIndexOffset() { -+ int xstart = 10000; -+ byte carry = 63; -+ -+ for (int j=xstart; j >= 0; j--) { -+ b_array2[j] = b_array1[xstart]; -+ } -+ -+ b_array2[xstart] = carry; -+ } -+} diff --git a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java new file mode 100644 index 0000000000..58400cadf6 -- Gitee