From ed6354398ae309d029a9e5b0aedd9a0e1fdc9586 Mon Sep 17 00:00:00 2001
From: panxuefeng <panxuefeng@loongson.cn>
Date: Wed, 1 Mar 2023 13:54:10 +0800
Subject: [PATCH] Update to Loongson jdk-11.0.18+10-ls-1

---
 LoongArch64-support-11.0.15.patch | 56542 ++++++++++++++++++++++++++--
 1 file changed, 53252 insertions(+), 3290 deletions(-)

diff --git a/LoongArch64-support-11.0.15.patch b/LoongArch64-support-11.0.15.patch
index 88bd20f..0cc36eb 100644
--- a/LoongArch64-support-11.0.15.patch
+++ b/LoongArch64-support-11.0.15.patch
@@ -1,11 +1,33 @@
-commit 8ad6215509c85805e7ebc1edfefde04b0fb087d1
-Author: aoqi <aoqi@loongson.cn>
-Date:   Mon Aug 8 21:21:49 2022 +0800
-
-    init la
-
+diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk
+index 40c7e06f54..142485c05a 100644
+--- a/make/CompileJavaModules.gmk
++++ b/make/CompileJavaModules.gmk
+@@ -434,6 +434,7 @@ jdk.internal.vm.ci_ADD_JAVAC_FLAGS += -parameters -Xlint:-exports -XDstringConca
+ 
+ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.loongarch64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \
+@@ -441,6 +442,7 @@ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.loongarch64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \
+@@ -460,6 +462,7 @@ jdk.internal.vm.compiler_EXCLUDES += \
+     org.graalvm.compiler.api.directives.test \
+     org.graalvm.compiler.api.test \
+     org.graalvm.compiler.asm.aarch64.test \
++    org.graalvm.compiler.asm.loongarch64.test \
+     org.graalvm.compiler.asm.amd64.test \
+     org.graalvm.compiler.asm.sparc.test \
+     org.graalvm.compiler.asm.test \
 diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
-index 5a3adf6b04..94f1eba2cc 100644
+index a3e1e00b2c..22f479120b 100644
 --- a/make/autoconf/hotspot.m4
 +++ b/make/autoconf/hotspot.m4
 @@ -34,6 +34,12 @@ DEPRECATED_JVM_FEATURES="trace"
@@ -21,7 +43,7 @@ index 5a3adf6b04..94f1eba2cc 100644
  ###############################################################################
  # Check if the specified JVM variant should be built. To be used in shell if
  # constructs, like this:
-@@ -334,6 +340,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+@@ -337,6 +343,26 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
      HOTSPOT_TARGET_CPU_ARCH=arm
    fi
  
@@ -48,8 +70,22 @@ index 5a3adf6b04..94f1eba2cc 100644
    # Verify that dependencies are met for explicitly set features.
    if HOTSPOT_CHECK_JVM_FEATURE(jvmti) && ! HOTSPOT_CHECK_JVM_FEATURE(services); then
      AC_MSG_ERROR([Specified JVM feature 'jvmti' requires feature 'services'])
+@@ -421,10 +447,11 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+     JVM_FEATURES_jvmci=""
+     INCLUDE_JVMCI="false"
+   else
+-    # Only enable jvmci on x86_64, sparcv9 and aarch64
++    # Only enable jvmci on x86_64, sparcv9, aarch64 and loongarch64
+     if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \
+        test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \
+-       test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then
++       test "x$OPENJDK_TARGET_CPU" = "xaarch64" || \
++       test "x$OPENJDK_TARGET_CPU" = "xloongarch64" ; then
+       AC_MSG_RESULT([yes])
+       JVM_FEATURES_jvmci="jvmci"
+       INCLUDE_JVMCI="true"
 diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
-index 95bdbb2831..9184315c7e 100644
+index f89b22f5fc..a7aa4f4aaa 100644
 --- a/make/autoconf/platform.m4
 +++ b/make/autoconf/platform.m4
 @@ -23,6 +23,12 @@
@@ -65,7 +101,7 @@ index 95bdbb2831..9184315c7e 100644
  # Support macro for PLATFORM_EXTRACT_TARGET_AND_BUILD.
  # Converts autoconf style CPU name to OpenJDK style, into
  # VAR_CPU, VAR_CPU_ARCH, VAR_CPU_BITS and VAR_CPU_ENDIAN.
-@@ -512,6 +518,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+@@ -554,6 +560,12 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
      HOTSPOT_$1_CPU_DEFINE=PPC64
    elif test "x$OPENJDK_$1_CPU" = xppc64le; then
      HOTSPOT_$1_CPU_DEFINE=PPC64
@@ -105,10 +141,10 @@ index fdd2c0ca3d..318191233a 100644
    assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
  
 diff --git a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
-index 8c23214a85..ce0905740f 100644
+index cebc1e410d..816226c068 100644
 --- a/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
 +++ b/src/hotspot/cpu/aarch64/c1_LIRGenerator_aarch64.cpp
-@@ -277,18 +277,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+@@ -260,18 +260,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
    __ store(reg, addr);
  }
  
@@ -1258,10 +1294,10 @@ index 0000000000..67c9a859aa
 +}
 diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
 new file mode 100644
-index 0000000000..b78f2ab280
+index 0000000000..9f50e46138
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.hpp
-@@ -0,0 +1,2799 @@
+@@ -0,0 +1,2804 @@
 +/*
 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -1287,8 +1323,8 @@ index 0000000000..b78f2ab280
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
 +
 +#include "asm/register.hpp"
 +#include "runtime/vm_version.hpp"
@@ -2719,7 +2755,6 @@ index 0000000000..b78f2ab280
 +  // else return -1.
 +  static int is_jlong_mask(jlong x);
 +
-+  // LOONGARCH lui is sign extended, so if you wan't to use imm, you have to use the follow
 +  static int split_low16(int x) {
 +    return (x & 0xffff);
 +  }
@@ -2748,6 +2783,12 @@ index 0000000000..b78f2ab280
 +    return (x & 0xfff);
 +  }
 +
++  static inline void split_simm32(jlong si32, jint& si12, jint& si20) {
++    si12 = ((jint)(si32 & 0xfff) << 20) >> 20;
++    si32 += (si32 & 0x800) << 1;
++    si20 = si32 >> 12;
++  }
++
 +  static inline void split_simm38(jlong si38, jint& si18, jint& si20) {
 +    si18 = ((jint)(si38 & 0x3ffff) << 14) >> 14;
 +    si38 += (si38 & 0x20000) << 1;
@@ -3200,7 +3241,7 @@ index 0000000000..b78f2ab280
 +    assert(is_uimm(hint, 15), "not a unsigned 15-bit int");
 +
 +    if (os::is_ActiveCoresMP())
-+      andi(R0, R0, 1);
++      andi(R0, R0, 0);
 +    else
 +      emit_int32(insn_I15(dbar_op, hint));
 +  }
@@ -4060,16 +4101,16 @@ index 0000000000..b78f2ab280
 +
 +#include "assembler_loongarch.inline.hpp"
 +
-+#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp
 new file mode 100644
-index 0000000000..0f4f452bf5
+index 0000000000..616cd9efe2
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/assembler_loongarch.inline.hpp
 @@ -0,0 +1,47 @@
 +/*
 + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -4092,8 +4133,8 @@ index 0000000000..0f4f452bf5
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
-+#define CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
++#ifndef CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
 +
 +#include "asm/assembler.inline.hpp"
 +#include "asm/codeBuffer.hpp"
@@ -4113,10 +4154,10 @@ index 0000000000..0f4f452bf5
 +  emit_int32(x);
 +}
 +
-+#endif // CPU_LOONGARCH_VM_ASSEMBLER_LOONGARCH_INLINE_HPP
++#endif // CPU_LOONGARCH_ASSEMBLER_LOONGARCH_INLINE_HPP
 diff --git a/src/hotspot/cpu/loongarch/bytes_loongarch.hpp b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp
 new file mode 100644
-index 0000000000..558cc1dc6e
+index 0000000000..c15344eb39
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/bytes_loongarch.hpp
 @@ -0,0 +1,73 @@
@@ -4145,8 +4186,8 @@ index 0000000000..558cc1dc6e
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_BYTES_LOONGARCH_HPP
++#define CPU_LOONGARCH_BYTES_LOONGARCH_HPP
 +
 +#include "memory/allocation.hpp"
 +
@@ -4155,12 +4196,12 @@ index 0000000000..558cc1dc6e
 +  // Returns true if the byte ordering used by Java is different from the native byte ordering
 +  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
 +  // on Sparc.
-+  // we use loongarch, so return true
++  // we use LoongArch, so return true
 +  static inline bool is_Java_byte_ordering_different(){ return true; }
 +
 +
 +  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
-+  // (no special code is needed since loongarch CPUs can access unaligned data)
++  // (no special code is needed since LoongArch CPUs can access unaligned data)
 +  static inline u2   get_native_u2(address p)         { return *(u2*)p; }
 +  static inline u4   get_native_u4(address p)         { return *(u4*)p; }
 +  static inline u8   get_native_u8(address p)         { return *(u8*)p; }
@@ -4172,7 +4213,7 @@ index 0000000000..558cc1dc6e
 +
 +  // Efficient reading and writing of unaligned unsigned data in Java
 +  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
-+  // needed since LOONGARCH64EL CPUs use little-endian format.
++  // needed since LoongArch64 CPUs use little-endian format.
 +  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
 +  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
 +  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
@@ -4192,7 +4233,7 @@ index 0000000000..558cc1dc6e
 +// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
 +#include OS_CPU_HEADER_INLINE(bytes)
 +
-+#endif // CPU_LOONGARCH_VM_BYTES_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_BYTES_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_CodeStubs_loongarch_64.cpp
 new file mode 100644
 index 0000000000..c0eeb63962
@@ -4630,13 +4671,13 @@ index 0000000000..1140e44431
 +#endif // CPU_LOONGARCH_C1_DEFS_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp
 new file mode 100644
-index 0000000000..b613abfa04
+index 0000000000..bd8578c72a
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c1_FpuStackSim_loongarch.hpp
 @@ -0,0 +1,32 @@
 +/*
 + * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -4662,7 +4703,7 @@ index 0000000000..b613abfa04
 +#ifndef CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
 +#define CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
 +
-+// No FPU stack on LOONGARCH
++// No FPU stack on LoongArch
 +class FpuStackSim;
 +
 +#endif // CPU_LOONGARCH_C1_FPUSTACKSIM_LOONGARCH_HPP
@@ -4854,7 +4895,7 @@ index 0000000000..4f0cf05361
 +#endif // CPU_LOONGARCH_C1_FRAMEMAP_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp
 new file mode 100644
-index 0000000000..eb333c61c1
+index 0000000000..3b60899071
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c1_FrameMap_loongarch_64.cpp
 @@ -0,0 +1,354 @@
@@ -5206,7 +5247,7 @@ index 0000000000..eb333c61c1
 +
 +// JSR 292
 +LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
-+  return LIR_OprFact::illegalOpr;  // Not needed on loongarch64
++  return LIR_OprFact::illegalOpr;  // Not needed on LoongArch64
 +}
 +
 +bool FrameMap::validate_frame() {
@@ -8696,13 +8737,13 @@ index 0000000000..c989e25c3a
 +#undef __
 diff --git a/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
 new file mode 100644
-index 0000000000..723010867e
+index 0000000000..72a80f37c4
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c1_LIRGenerator_loongarch_64.cpp
 @@ -0,0 +1,1396 @@
 +/*
 + * Copyright (c) 2005, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -9423,7 +9464,7 @@ index 0000000000..723010867e
 +  bool is_oop = is_reference_type(type);
 +  LIR_Opr result = new_register(type);
 +  value.load_item();
-+  assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type");
++  assert(type == T_INT || is_oop || type == T_LONG , "unexpected type");
 +  LIR_Opr tmp = new_register(T_INT);
 +  __ xchg(addr, value.result(), result, tmp);
 +  return result;
@@ -9432,7 +9473,7 @@ index 0000000000..723010867e
 +LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) {
 +  LIR_Opr result = new_register(type);
 +  value.load_item();
-+  assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type");
++  assert(type == T_INT || type == T_LONG , "unexpected type");
 +  LIR_Opr tmp = new_register(T_INT);
 +  __ xadd(addr, value.result(), result, tmp);
 +  return result;
@@ -10412,13 +10453,13 @@ index 0000000000..38ff4c5836
 +#endif // CPU_LOONGARCH_C1_MACROASSEMBLER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp
 new file mode 100644
-index 0000000000..28100b0116
+index 0000000000..17ff93a595
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c1_MacroAssembler_loongarch_64.cpp
-@@ -0,0 +1,343 @@
+@@ -0,0 +1,344 @@
 +/*
 + * Copyright (c) 1999, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -10450,6 +10491,7 @@ index 0000000000..28100b0116
 +#include "oops/arrayOop.hpp"
 +#include "oops/markOop.hpp"
 +#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
 +#include "runtime/os.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
@@ -11905,13 +11947,13 @@ index 0000000000..aaa708f71e
 +}
 diff --git a/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp
 new file mode 100644
-index 0000000000..f4df7b8958
+index 0000000000..164016e123
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c1_globals_loongarch.hpp
-@@ -0,0 +1,67 @@
+@@ -0,0 +1,71 @@
 +/*
 + * Copyright (c) 2000, 2021, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2021, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -11945,6 +11987,8 @@ index 0000000000..f4df7b8958
 +
 +#ifndef COMPILER2
 +define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, UseTLAB,                      true );
++define_pd_global(bool, ResizeTLAB,                   true );
 +define_pd_global(bool, InlineIntrinsics,             true );
 +define_pd_global(bool, PreferInterpreterNativeStubs, false);
 +define_pd_global(bool, ProfileTraps,                 false);
@@ -11953,6 +11997,7 @@ index 0000000000..f4df7b8958
 +define_pd_global(intx, CompileThreshold,             1500 );
 +
 +define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, FreqInlineSize,               325  );
 +define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
 +define_pd_global(intx, InitialCodeCacheSize,         160*K);
 +define_pd_global(intx, ReservedCodeCacheSize,        32*M );
@@ -11963,6 +12008,7 @@ index 0000000000..f4df7b8958
 +define_pd_global(intx, CodeCacheExpansionSize,       32*K );
 +define_pd_global(uintx, CodeCacheMinBlockLength,     1);
 +define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(uintx, MetaspaceSize,               12*M );
 +define_pd_global(bool, NeverActAsServerClassMachine, true );
 +define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
 +define_pd_global(bool, CICompileOSR,                 true );
@@ -11978,13 +12024,13 @@ index 0000000000..f4df7b8958
 +#endif // CPU_LOONGARCH_C1_GLOBALS_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp
 new file mode 100644
-index 0000000000..6d54dd6c06
+index 0000000000..27a4ec5229
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c2_globals_loongarch.hpp
 @@ -0,0 +1,94 @@
 +/*
 + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -12007,8 +12053,8 @@ index 0000000000..6d54dd6c06
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
 +
 +#include "utilities/globalDefinitions.hpp"
 +#include "utilities/macros.hpp"
@@ -12075,16 +12121,16 @@ index 0000000000..6d54dd6c06
 +// Ergonomics related flags
 +define_pd_global(bool, NeverActAsServerClassMachine, false);
 +
-+#endif // CPU_LOONGARCH_VM_C2_GLOBALS_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_C2_GLOBALS_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp
 new file mode 100644
-index 0000000000..cd27f36bfc
+index 0000000000..ec78b942d4
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/c2_init_loongarch.cpp
-@@ -0,0 +1,34 @@
+@@ -0,0 +1,37 @@
 +/*
 + * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -12111,14 +12157,17 @@ index 0000000000..cd27f36bfc
 +#include "opto/compile.hpp"
 +#include "opto/node.hpp"
 +
-+// processor dependent initialization for loongarch
++// processor dependent initialization for LoongArch
++
++extern void reg_mask_init();
 +
 +void Compile::pd_compiler2_init() {
 +  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  reg_mask_init();
 +}
 diff --git a/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp
 new file mode 100644
-index 0000000000..33ff315261
+index 0000000000..653d95806b
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/codeBuffer_loongarch.hpp
 @@ -0,0 +1,35 @@
@@ -12147,8 +12196,8 @@ index 0000000000..33ff315261
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
++#define CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
 +
 +private:
 +  void pd_initialize() {}
@@ -12156,16 +12205,16 @@ index 0000000000..33ff315261
 +public:
 +  void flush_bundle(bool start_new_bundle) {}
 +
-+#endif // CPU_LOONGARCH_VM_CODEBUFFER_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_CODEBUFFER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp
 new file mode 100644
-index 0000000000..ec86955854
+index 0000000000..d063d5d93e
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/compiledIC_loongarch.cpp
 @@ -0,0 +1,148 @@
 +/*
 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -12212,7 +12261,7 @@ index 0000000000..ec86955854
 +  // That's why we must use the macroassembler to generate a stub.
 +  MacroAssembler _masm(&cbuf);
 +
-+  address base = __ start_a_stub(Compile::MAX_stubs_size);
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
 +  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
 +  // static stub relocation stores the instruction address of the call
 +
@@ -12313,7 +12362,7 @@ index 0000000000..ec86955854
 +#endif // !PRODUCT
 diff --git a/src/hotspot/cpu/loongarch/copy_loongarch.hpp b/src/hotspot/cpu/loongarch/copy_loongarch.hpp
 new file mode 100644
-index 0000000000..c97d7ba90a
+index 0000000000..54b847a736
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/copy_loongarch.hpp
 @@ -0,0 +1,77 @@
@@ -12342,8 +12391,8 @@ index 0000000000..c97d7ba90a
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_COPY_LOONGARCH_HPP
++#define CPU_LOONGARCH_COPY_LOONGARCH_HPP
 +
 +// Inline functions for memory copy and fill.
 +
@@ -12393,10 +12442,10 @@ index 0000000000..c97d7ba90a
 +  (void)memset(to, 0, count);
 +}
 +
-+#endif //CPU_LOONGARCH_VM_COPY_LOONGARCH_HPP
++#endif //CPU_LOONGARCH_COPY_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp
 new file mode 100644
-index 0000000000..343a1b6c12
+index 0000000000..e4a92d1035
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.cpp
 @@ -0,0 +1,30 @@
@@ -12429,16 +12478,16 @@ index 0000000000..343a1b6c12
 +#include "compiler/disassembler.hpp"
 +#include "depChecker_loongarch.hpp"
 +
-+// Nothing to do on loongarch
++// Nothing to do on LoongArch
 diff --git a/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp
 new file mode 100644
-index 0000000000..26398f7c66
+index 0000000000..29c292a74a
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/depChecker_loongarch.hpp
 @@ -0,0 +1,31 @@
 +/*
 + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -12461,15 +12510,15 @@ index 0000000000..26398f7c66
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
++#define CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
 +
-+// Nothing to do on LOONGARCH
++// Nothing to do on LoongArch
 +
-+#endif // CPU_LOONGARCH_VM_DEPCHECKER_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_DEPCHECKER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp
 new file mode 100644
-index 0000000000..ccd89e8d6d
+index 0000000000..04359bc172
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/disassembler_loongarch.hpp
 @@ -0,0 +1,37 @@
@@ -12498,8 +12547,8 @@ index 0000000000..ccd89e8d6d
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
 +
 +  static int pd_instruction_alignment() {
 +    return sizeof(int);
@@ -12509,10 +12558,10 @@ index 0000000000..ccd89e8d6d
 +    return "gpr-names=64";
 +  }
 +
-+#endif // CPU_LOONGARCH_VM_DISASSEMBLER_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_DISASSEMBLER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.cpp b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
 new file mode 100644
-index 0000000000..bfeccffd35
+index 0000000000..9b4f3b88d4
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/frame_loongarch.cpp
 @@ -0,0 +1,690 @@
@@ -12911,11 +12960,11 @@ index 0000000000..bfeccffd35
 +  // Since the interpreter always saves FP if we record where it is then
 +  // we don't have to always save FP on entry and exit to c2 compiled
 +  // code, on entry will be enough.
-+#ifdef COMPILER2
++#ifdef COMPILER2_OR_JVMCI
 +  if (map->update_map()) {
 +    update_map_with_saved_link(map, (intptr_t**) addr_at(java_frame_link_offset));
 +  }
-+#endif /* COMPILER2 */
++#endif // COMPILER2_OR_JVMCI
 +  return frame(sender_sp, unextended_sp, link(), sender_pc());
 +}
 +
@@ -12945,7 +12994,7 @@ index 0000000000..bfeccffd35
 +//------------------------------------------------------------------------------
 +// frame::adjust_unextended_sp
 +void frame::adjust_unextended_sp() {
-+  // On LOONGARCH, sites calling method handle intrinsics and lambda forms are treated
++  // On LoongArch, sites calling method handle intrinsics and lambda forms are treated
 +  // as any other call site. Therefore, no special action is needed when we are
 +  // returning to any of these call sites.
 +
@@ -13208,7 +13257,7 @@ index 0000000000..bfeccffd35
 +#endif
 diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.hpp
 new file mode 100644
-index 0000000000..fb3040c458
+index 0000000000..b16389b3a3
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/frame_loongarch.hpp
 @@ -0,0 +1,171 @@
@@ -13237,8 +13286,8 @@ index 0000000000..fb3040c458
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_HPP
 +
 +#include "runtime/synchronizer.hpp"
 +
@@ -13382,16 +13431,16 @@ index 0000000000..fb3040c458
 +
 +  static jint interpreter_frame_expression_stack_direction() { return -1; }
 +
-+#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp
 new file mode 100644
-index 0000000000..8039789726
+index 0000000000..1ddc038eea
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/frame_loongarch.inline.hpp
-@@ -0,0 +1,246 @@
+@@ -0,0 +1,252 @@
 +/*
 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -13414,8 +13463,8 @@ index 0000000000..8039789726
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
-+#define CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
++#ifndef CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
 +
 +#include "code/codeCache.hpp"
 +#include "code/vmreg.inline.hpp"
@@ -13534,6 +13583,12 @@ index 0000000000..8039789726
 +  return (intptr_t*) *(intptr_t **)addr_at(native_frame_link_offset);
 +}
 +
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = is_java_frame() ? (intptr_t **)addr_at(java_frame_link_offset)
++                                   : (intptr_t **)addr_at(native_frame_link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
 +inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
 +
 +// Return address:
@@ -13634,7 +13689,7 @@ index 0000000000..8039789726
 +  *((oop*) map->location(V0->as_VMReg())) = obj;
 +}
 +
-+#endif // CPU_LOONGARCH_VM_FRAME_LOONGARCH_INLINE_HPP
++#endif // CPU_LOONGARCH_FRAME_LOONGARCH_INLINE_HPP
 diff --git a/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/gc/g1/g1BarrierSetAssembler_loongarch.cpp
 new file mode 100644
 index 0000000000..2cdf3dddb7
@@ -14907,10 +14962,10 @@ index 0000000000..18a6f92bc2
 +#endif // CPU_LOONGARCH_GC_SHARED_MODREFBARRIERSETASSEMBLER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp
 new file mode 100644
-index 0000000000..1276316760
+index 0000000000..dc21d001cc
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/globalDefinitions_loongarch.hpp
-@@ -0,0 +1,61 @@
+@@ -0,0 +1,53 @@
 +/*
 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -14936,9 +14991,9 @@ index 0000000000..1276316760
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
-+// Size of LOONGARCH Instructions
++#ifndef CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
++// Size of LoongArch Instructions
 +const int BytesPerInstWord = 4;
 +
 +const int StackAlignmentInBytes = (2*wordSize);
@@ -14952,14 +15007,6 @@ index 0000000000..1276316760
 +#define SUPPORTS_NATIVE_CX8
 +
 +// FIXME: LA
-+// According to the ARMv8 ARM, "Concurrent modification and execution
-+// of instructions can lead to the resulting instruction performing
-+// any behavior that can be achieved by executing any sequence of
-+// instructions that can be executed from the same Exception level,
-+// except where the instruction before modification and the
-+// instruction after modification is a B, BL, NOP, BKPT, SVC, HVC, or
-+// SMC instruction."
-+//
 +// This makes the games we play when patching difficult, so when we
 +// come across an access that needs patching we deoptimize.  There are
 +// ways we can avoid this, but these would slow down C1-compiled code
@@ -14971,13 +15018,13 @@ index 0000000000..1276316760
 +
 +#define THREAD_LOCAL_POLL
 +
-+#endif // CPU_LOONGARCH_VM_GLOBALDEFINITIONS_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_GLOBALDEFINITIONS_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/globals_loongarch.hpp b/src/hotspot/cpu/loongarch/globals_loongarch.hpp
 new file mode 100644
-index 0000000000..d4c18dfd87
+index 0000000000..e6b758b554
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/globals_loongarch.hpp
-@@ -0,0 +1,132 @@
+@@ -0,0 +1,109 @@
 +/*
 + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -15003,8 +15050,8 @@ index 0000000000..d4c18dfd87
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
++#define CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
 +
 +#include "utilities/globalDefinitions.hpp"
 +#include "utilities/macros.hpp"
@@ -15020,14 +15067,7 @@ index 0000000000..d4c18dfd87
 +define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
 +
 +define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
-+// See 4827828 for this change. There is no globals_core_i486.hpp. I can't
-+// assign a different value for C2 without touching a number of files. Use
-+// #ifdef to minimize the change as it's late in Mantis. -- FIXME.
-+// c1 doesn't have this problem because the fix to 4858033 assures us
-+// the the vep is aligned at CodeEntryAlignment whereas c2 only aligns
-+// the uep and the vep doesn't get real alignment but just slops on by
-+// only assured that the entry instruction meets the 5 byte size requirement.
-+define_pd_global(intx,  CodeEntryAlignment,      16);
++define_pd_global(intx, CodeEntryAlignment,       16);
 +define_pd_global(intx, OptoLoopAlignment,        16);
 +define_pd_global(intx, InlineFrequencyCount,     100);
 +define_pd_global(intx, InlineSmallCode,          2000);
@@ -15084,32 +15124,16 @@ index 0000000000..d4c18dfd87
 +  product(bool, UseLASX, false,                                             \
 +                "Use LASX 256-bit vector instructions")                     \
 +                                                                            \
-+  product(intx, UseSyncLevel, 10000,                                        \
-+                "The sync level on Loongson CPUs"                           \
-+                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
-+                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
-+                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
-+                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
-+                "UseSyncLevel == 1000, 110, maybe for GS464")               \
-+                                                                            \
 +  product(bool, UseBarriersForVolatile, false,                              \
 +          "Use memory barriers to implement volatile accesses")             \
 +                                                                            \
-+  develop(bool, UseBoundCheckInstruction, false,                            \
-+                "Use bound check instruction")                              \
-+                                                                            \
-+  product(intx, SetFSFOFN, 999,                                             \
-+          "Set the FS/FO/FN bits in FCSR"                                   \
-+          "999 means FS/FO/FN will not be changed"                          \
-+          "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on")         \
-+                                                                            \
 +  product(bool, UseCRC32, false,                                            \
 +          "Use CRC32 instructions for CRC32 computation")                   \
 +                                                                            \
 +  product(bool, UseActiveCoresMP, false,                                    \
 +                "Eliminate barriers for single active cpu")
 +
-+#endif // CPU_LOONGARCH_VM_GLOBALS_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_GLOBALS_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp b/src/hotspot/cpu/loongarch/icBuffer_loongarch.cpp
 new file mode 100644
 index 0000000000..7b97694827
@@ -15258,13 +15282,13 @@ index 0000000000..1ae7e5376c
 +}
 diff --git a/src/hotspot/cpu/loongarch/icache_loongarch.hpp b/src/hotspot/cpu/loongarch/icache_loongarch.hpp
 new file mode 100644
-index 0000000000..15e45cb350
+index 0000000000..3a180549fc
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/icache_loongarch.hpp
 @@ -0,0 +1,41 @@
 +/*
 + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15287,8 +15311,8 @@ index 0000000000..15e45cb350
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
++#define CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
 +
 +// Interface for updating the instruction cache.  Whenever the VM modifies
 +// code, part of the processor instruction cache potentially has to be flushed.
@@ -15302,13 +15326,13 @@ index 0000000000..15e45cb350
 +  };
 +};
 +
-+#endif // CPU_LOONGARCH_VM_ICACHE_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_ICACHE_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp
 new file mode 100644
-index 0000000000..23abf7b891
+index 0000000000..53a06ba7fd
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch.hpp
-@@ -0,0 +1,276 @@
+@@ -0,0 +1,281 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -15334,8 +15358,8 @@ index 0000000000..23abf7b891
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
-+#define CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
++#ifndef CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
 +
 +#include "asm/assembler.hpp"
 +#include "asm/macroAssembler.hpp"
@@ -15345,6 +15369,7 @@ index 0000000000..23abf7b891
 +
 +// This file specializes the assember with interpreter-specific macros
 +
++typedef ByteSize (*OffsetFunction)(uint);
 +
 +class InterpreterMacroAssembler: public MacroAssembler {
 +#ifndef CC_INTERP
@@ -15544,6 +15569,10 @@ index 0000000000..23abf7b891
 +                                      Register reg2, int start_row,
 +                                      Label& done, bool is_virtual_call);
 +
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
 +  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
 +  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
 +  void update_mdp_by_constant(Register mdp_in, int constant);
@@ -15584,16 +15613,16 @@ index 0000000000..23abf7b891
 +  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
 +};
 +
-+#endif // CPU_LOONGARCH_VM_INTERP_MASM_LOONGARCH_64_HPP
++#endif // CPU_LOONGARCH_INTERP_MASM_LOONGARCH_64_HPP
 diff --git a/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp
 new file mode 100644
-index 0000000000..85ef3d5535
+index 0000000000..c533a57652
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/interp_masm_loongarch_64.cpp
-@@ -0,0 +1,2019 @@
+@@ -0,0 +1,2043 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -15630,6 +15659,7 @@ index 0000000000..85ef3d5535
 +#include "prims/jvmtiThreadState.hpp"
 +#include "runtime/basicLock.hpp"
 +#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
 +#include "runtime/safepointMechanism.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/thread.inline.hpp"
@@ -15682,7 +15712,7 @@ index 0000000000..85ef3d5535
 +                                                  int number_of_arguments) {
 +  // interpreter specific
 +  //
-+  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
++  // Note: No need to save/restore bcp & locals pointer
 +  //       since these are callee saved registers and no blocking/
 +  //       GC can happen in leaf calls.
 +  // Further Note: DO NOT save/restore bcp/locals. If a caller has
@@ -16119,9 +16149,9 @@ index 0000000000..85ef3d5535
 +
 +
 +// The following two routines provide a hook so that an implementation
-+// can schedule the dispatch in two parts.  loongarch64 does not do this.
++// can schedule the dispatch in two parts. LoongArch64 does not do this.
 +void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
-+  // Nothing loongarch64 specific to be done here
++  // Nothing LoongArch64 specific to be done here
 +}
 +
 +void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
@@ -16215,7 +16245,7 @@ index 0000000000..85ef3d5535
 +
 +
 +void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
-+  // load next bytecode (load before advancing r13 to prevent AGI)
++  // load next bytecode
 +  ld_bu(Rnext, BCP, step);
 +  increment(BCP, step);
 +  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
@@ -16510,10 +16540,10 @@ index 0000000000..85ef3d5535
 +    save_bcp(); // Save in case of exception
 +
 +    // Convert from BasicObjectLock structure to object and BasicLock structure
-+    // Store the BasicLock address into %T2
++    // Store the BasicLock address into tmp_reg
 +    addi_d(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
 +
-+    // Load oop into scr_reg(%T1)
++    // Load oop into scr_reg
 +    ld_d(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
 +    // free entry
 +    st_d(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
@@ -16915,14 +16945,36 @@ index 0000000000..85ef3d5535
 +    if (is_virtual_call) {
 +      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
 +    }
-+    return;
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
++    }
++#endif // INCLUDE_JVMCI
++  } else {
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
++    }
++#endif // INCLUDE_JVMCI
++
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
 +  }
++}
 +
-+  int last_row = VirtualCallData::row_limit() - 1;
++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
++                                        Register reg2, int start_row, Label& done, int total_rows,
++                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                        int non_profiled_offset) {
++  int last_row = total_rows - 1;
 +  assert(start_row <= last_row, "must be work left to do");
-+  // Test this row for both the receiver and for null.
++  // Test this row for both the item and for null.
 +  // Take any of three different outcomes:
-+  //   1. found receiver => increment count and goto done
++  //   1. found item => increment count and goto done
 +  //   2. found null => keep looking for case 1, maybe allocate this cell
 +  //   3. found something else => keep looking for cases 1 and 2
 +  // Case 3 is handled by a recursive call.
@@ -16930,59 +16982,60 @@ index 0000000000..85ef3d5535
 +    Label next_test;
 +    bool test_for_null_also = (row == start_row);
 +
-+    // See if the receiver is receiver[n].
-+    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
-+    test_mdp_data_at(mdp, recvr_offset, receiver,
++    // See if the receiver is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
 +                     (test_for_null_also ? reg2 : noreg),
 +                     next_test);
-+    // (Reg2 now contains the receiver from the CallData.)
++    // (Reg2 now contains the item from the CallData.)
 +
-+    // The receiver is receiver[n].  Increment count[n].
-+    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++    // The receiver is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
 +    increment_mdp_data_at(mdp, count_offset);
-+    beq(R0, R0, done);
++    b(done);
 +    bind(next_test);
 +
 +    if (test_for_null_also) {
 +      Label found_null;
-+      // Failed the equality check on receiver[n]...  Test for null.
++      // Failed the equality check on item[n]...  Test for null.
 +      if (start_row == last_row) {
 +        // The only thing left to do is handle the null case.
-+        if (is_virtual_call) {
-+          beq(reg2, R0, found_null);
-+          // Receiver did not match any saved receiver and there is no empty row for it.
++        if (non_profiled_offset >= 0) {
++          beqz(reg2, found_null);
++          // Item did not match any saved item and there is no empty row for it.
 +          // Increment total counter to indicate polymorphic case.
-+          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
-+          beq(R0, R0, done);
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          b(done);
 +          bind(found_null);
 +        } else {
-+          bne(reg2, R0, done);
++          bnez(reg2, done);
 +        }
 +        break;
 +      }
 +      // Since null is rare, make it be the branch-taken case.
-+      beq(reg2, R0, found_null);
++      beqz(reg2, found_null);
 +
 +      // Put all the "Case 3" tests here.
-+      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
 +
-+      // Found a null.  Keep searching for a matching receiver,
++      // Found a null.  Keep searching for a matching item,
 +      // but remember that this is an empty (unused) slot.
 +      bind(found_null);
 +    }
 +  }
 +
-+  // In the fall-through case, we found no matching receiver, but we
-+  // observed the receiver[start_row] is NULL.
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
 +
-+  // Fill in the receiver field and increment the count.
-+  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
-+  set_mdp_data_at(mdp, recvr_offset, receiver);
-+  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
 +  li(reg2, DataLayout::counter_increment);
 +  set_mdp_data_at(mdp, count_offset, reg2);
 +  if (start_row > 0) {
-+    beq(R0, R0, done);
++    b(done);
 +  }
 +}
 +
@@ -17455,7 +17508,7 @@ index 0000000000..85ef3d5535
 +}
 +
 +void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
-+  guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LOONGARCH !");
++  guarantee(T4 == tmp1, "You are reqired to use T4 as the index register for LoongArch !");
 +
 +  if (ProfileInterpreter && MethodData::profile_parameters()) {
 +    Label profile_continue, done;
@@ -17612,13 +17665,13 @@ index 0000000000..85ef3d5535
 +}
 diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp
 new file mode 100644
-index 0000000000..052eb997e4
+index 0000000000..d53d951a16
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch.hpp
-@@ -0,0 +1,66 @@
+@@ -0,0 +1,62 @@
 +/*
 + * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -17641,10 +17694,11 @@ index 0000000000..052eb997e4
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
++#define CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
 +
-+#include "memory/allocation.hpp"
++// This is included in the middle of class Interpreter.
++// Do not include files here.
 +
 +// native method calls
 +
@@ -17665,12 +17719,7 @@ index 0000000000..052eb997e4
 +
 + public:
 +  // Creation
-+  SignatureHandlerGenerator(methodHandle method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
-+    _masm = new MacroAssembler(buffer);
-+    _num_int_args = (method->is_static() ? 1 : 0);
-+    _num_fp_args = 0;
-+    _stack_offset = 0;
-+  }
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
 +
 +  // Code generation
 +  void generate(uint64_t fingerprint);
@@ -17681,13 +17730,13 @@ index 0000000000..052eb997e4
 +  static Register temp();
 +};
 +
-+#endif // CPU_LOONGARCH_VM_INTERPRETERRT_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_INTERPRETERRT_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp
 new file mode 100644
-index 0000000000..3b75424f05
+index 0000000000..e2f31997b7
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/interpreterRT_loongarch_64.cpp
-@@ -0,0 +1,266 @@
+@@ -0,0 +1,273 @@
 +/*
 + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
@@ -17738,6 +17787,13 @@ index 0000000000..3b75424f05
 +#define T8 RT8
 +
 +// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++  _num_int_args = (method->is_static() ? 1 : 0);
++  _num_fp_args = 0;
++  _stack_offset = 0;
++}
 +
 +void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
 +  __ ld_d(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
@@ -17956,13 +18012,13 @@ index 0000000000..3b75424f05
 +IRT_END
 diff --git a/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp
 new file mode 100644
-index 0000000000..a2a86c5ead
+index 0000000000..6814fa44a0
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/javaFrameAnchor_loongarch.hpp
 @@ -0,0 +1,87 @@
 +/*
 + * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -17985,8 +18041,8 @@ index 0000000000..a2a86c5ead
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
++#define CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
 +
 +private:
 +
@@ -18046,7 +18102,7 @@ index 0000000000..a2a86c5ead
 +  // Assert (last_Java_sp == NULL || fp == NULL)
 +  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
 +
-+#endif // CPU_LOONGARCH_VM_JAVAFRAMEANCHOR_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_JAVAFRAMEANCHOR_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp b/src/hotspot/cpu/loongarch/jniFastGetField_loongarch_64.cpp
 new file mode 100644
 index 0000000000..114e47b053
@@ -18220,7 +18276,7 @@ index 0000000000..114e47b053
 +}
 diff --git a/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp
 new file mode 100644
-index 0000000000..dc838b3531
+index 0000000000..b281f86372
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/jniTypes_loongarch.hpp
 @@ -0,0 +1,144 @@
@@ -18249,8 +18305,8 @@ index 0000000000..dc838b3531
 + *
 + */
 +
-+#ifndef CPU_LOOGNARCH_VM_JNITYPES_LOOGNARCH_HPP
-+#define CPU_LOOGNARCH_VM_JNITYPES_LOOGNARCH_HPP
++#ifndef CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
++#define CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
 +
 +#include "jni.h"
 +#include "memory/allocation.hpp"
@@ -18367,7 +18423,212 @@ index 0000000000..dc838b3531
 +#undef _JNI_SLOT_OFFSET
 +};
 +
-+#endif // CPU_LOOGNARCH_VM_JNITYPES_LOOGNARCH_HPP
++#endif // CPU_LOONGARCH_JNITYPES_LOONGARCH_HPP
+diff --git a/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp
+new file mode 100644
+index 0000000000..ea481c7fa6
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/jvmciCodeInstaller_loongarch.cpp
+@@ -0,0 +1,199 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "jvmci/jvmciCodeInstaller.hpp"
++#include "jvmci/jvmciRuntime.hpp"
++#include "jvmci/jvmciCompilerToVM.hpp"
++#include "jvmci/jvmciJavaClasses.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_loongarch.inline.hpp"
++
++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
++  if (inst->is_int_branch() || inst->is_float_branch()) {
++    return pc_offset + NativeInstruction::nop_instruction_size;
++  } else if (inst->is_call()) {
++    return pc_offset + NativeCall::instruction_size;
++  } else if (inst->is_far_call()) {
++    return pc_offset + NativeFarCall::instruction_size;
++  } else if (inst->is_jump()) {
++    return pc_offset + NativeGeneralJump::instruction_size;
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match LoongArch64TestAssembler.java emitCall
++    // lu12i_w; lu32i_d; jirl
++    return pc_offset + 3 * NativeInstruction::nop_instruction_size;
++  } else {
++    JVMCI_ERROR_0("unsupported type of instruction for call site");
++  }
++  return 0;
++}
++
++void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  Handle obj(THREAD, HotSpotObjectConstantImpl::object(constant));
++  jobject value = JNIHandles::make_local(obj());
++  if (HotSpotObjectConstantImpl::compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(CompressedOops::encode(cast_to_oop(cast_from_oop<address>(obj())))));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec, Assembler::narrow_oop_operand);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    move->set_data((intptr_t)(cast_from_oop<address>(obj())));
++    int oop_index = _oop_recorder->find_index(value);
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    _instructions->relocate(pc, rspec);
++  }
++}
++
++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  if (HotSpotMetaspaceConstantImpl::compressed(constant)) {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    narrowKlass narrowOop = record_narrow_metadata_reference(_instructions, pc, constant, CHECK);
++    move->set_data((intptr_t) narrowOop);
++    TRACE_jvmci_3("relocating (narrow metaspace constant) at " PTR_FORMAT "/0x%x", p2i(pc), narrowOop);
++  } else {
++    NativeMovConstReg* move = nativeMovConstReg_at(pc);
++    void* reference = record_metadata_reference(_instructions, pc, constant, CHECK);
++    move->set_data((intptr_t) reference);
++    TRACE_jvmci_3("relocating (metaspace constant) at " PTR_FORMAT "/" PTR_FORMAT, p2i(pc), p2i(reference));
++  }
++}
++
++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
++  address pc = _instructions->start() + pc_offset;
++  NativeInstruction* inst = nativeInstruction_at(pc);
++  if (inst->is_pcaddu12i_add()) {
++    address dest = _constants->start() + data_offset;
++    _instructions->relocate(pc, section_word_Relocation::spec((address) dest, CodeBuffer::SECT_CONSTS));
++    TRACE_jvmci_3("relocating at " PTR_FORMAT " (+%d) with destination at %d", p2i(pc), pc_offset, data_offset);
++  } else {
++    JVMCI_ERROR("unknown load or move instruction at " PTR_FORMAT, p2i(pc));
++  }
++}
++
++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
++  address pc = (address) inst;
++  if (inst->is_call()) {
++    NativeCall* call = nativeCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_far_call()) {
++    NativeFarCall* call = nativeFarCall_at(pc);
++    call->set_destination((address) foreign_call_destination);
++    _instructions->relocate(call->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_jump()) {
++    NativeGeneralJump* jump = nativeGeneralJump_at(pc);
++    jump->set_jump_destination((address) foreign_call_destination);
++    _instructions->relocate(jump->instruction_address(), runtime_call_Relocation::spec());
++  } else if (inst->is_lu12iw_lu32id()) {
++    // match emitCall of LoongArch64TestAssembler.java
++    // lu12i_w; lu32i_d; jirl
++    MacroAssembler::pd_patch_instruction((address)inst, (address)foreign_call_destination);
++  } else {
++    JVMCI_ERROR("unknown call or jump instruction at " PTR_FORMAT, p2i(pc));
++  }
++  TRACE_jvmci_3("relocating (foreign call) at " PTR_FORMAT, p2i(inst));
++}
++
++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) {
++#ifdef ASSERT
++  Method* method = NULL;
++  // we need to check, this might also be an unresolved method
++  if (hotspot_method->is_a(HotSpotResolvedJavaMethodImpl::klass())) {
++    method = getMethodFromHotSpotMethod(hotspot_method());
++  }
++#endif
++  switch (_next_call_type) {
++    case INLINE_INVOKE:
++      break;
++    case INVOKEVIRTUAL:
++    case INVOKEINTERFACE: {
++      assert(!method->is_static(), "cannot call static method with invokeinterface");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), virtual_call_Relocation::spec(_invoke_mark_pc));
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_virtual_call_stub());
++      break;
++    }
++    case INVOKESTATIC: {
++      assert(method->is_static(), "cannot call non-static method with invokestatic");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::static_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_static_call_stub());
++      break;
++    }
++    case INVOKESPECIAL: {
++      assert(!method->is_static(), "cannot call static method with invokespecial");
++      NativeCall* call = nativeCall_at(_instructions->start() + pc_offset);
++      _instructions->relocate(call->instruction_address(), relocInfo::opt_virtual_call_type);
++      call->trampoline_jump(cbuf, SharedRuntime::get_resolve_opt_virtual_call_stub());
++      break;
++    }
++    default:
++      JVMCI_ERROR("invalid _next_call_type value");
++      break;
++  }
++}
++
++void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
++  switch (mark) {
++    case POLL_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_FAR:
++      _instructions->relocate(pc, relocInfo::poll_type);
++      break;
++    case POLL_RETURN_NEAR:
++      JVMCI_ERROR("unimplemented");
++      break;
++    case POLL_RETURN_FAR:
++      _instructions->relocate(pc, relocInfo::poll_return_type);
++      break;
++    default:
++      JVMCI_ERROR("invalid mark value");
++      break;
++  }
++}
++
++// convert JVMCI register indices (as used in oop maps) to HotSpot registers
++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
++  if (jvmci_reg < RegisterImpl::number_of_registers) {
++    return as_Register(jvmci_reg)->as_VMReg();
++  } else {
++    jint floatRegisterNumber = jvmci_reg - RegisterImpl::number_of_registers;
++    if (floatRegisterNumber >= 0 && floatRegisterNumber < FloatRegisterImpl::number_of_registers) {
++      return as_FloatRegister(floatRegisterNumber)->as_VMReg();
++    }
++    JVMCI_ERROR_NULL("invalid register number: %d", jvmci_reg);
++  }
++}
++
++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
++  return !hotspotRegister->is_FloatRegister();
++}
 diff --git a/src/hotspot/cpu/loongarch/loongarch.ad b/src/hotspot/cpu/loongarch/loongarch.ad
 new file mode 100644
 index 0000000000..80dff0c762
@@ -18401,10 +18662,10 @@ index 0000000000..80dff0c762
 +
 diff --git a/src/hotspot/cpu/loongarch/loongarch_64.ad b/src/hotspot/cpu/loongarch/loongarch_64.ad
 new file mode 100644
-index 0000000000..3c65f7518d
+index 0000000000..a5fb5f7b85
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/loongarch_64.ad
-@@ -0,0 +1,14043 @@
+@@ -0,0 +1,13906 @@
 +//
 +// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 +// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -18452,10 +18713,10 @@ index 0000000000..3c65f7518d
 +
 +// General Registers
 +// Integer Registers
-+  reg_def R0    ( NS,  NS,  Op_RegI,   0, VMRegImpl::Bad());
++  reg_def R0    ( NS,  NS,  Op_RegI,   0, R0->as_VMReg());
++  reg_def R0_H  ( NS,  NS,  Op_RegI,   0, R0->as_VMReg()->next());
 +  reg_def RA    ( NS,  NS,  Op_RegI,   1, RA->as_VMReg());
 +  reg_def RA_H  ( NS,  NS,  Op_RegI,   1, RA->as_VMReg()->next());
-+  // TODO: LA
 +  reg_def TP    ( NS,  NS,  Op_RegI,   2, TP->as_VMReg());
 +  reg_def TP_H  ( NS,  NS,  Op_RegI,   2, TP->as_VMReg()->next());
 +  reg_def SP    ( NS,  NS,  Op_RegI,   3, SP->as_VMReg());
@@ -18519,293 +18780,293 @@ index 0000000000..3c65f7518d
 +
 +
 +// Floating/Vector registers.
-+reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()          );
-+reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()  );
-+reg_def F0_J        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) );
-+reg_def F0_K        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) );
-+reg_def F0_L        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) );
-+reg_def F0_M        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) );
-+reg_def F0_N        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) );
-+reg_def F0_O        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) );
-+
-+reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()          );
-+reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()  );
-+reg_def F1_J        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) );
-+reg_def F1_K        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) );
-+reg_def F1_L        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) );
-+reg_def F1_M        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) );
-+reg_def F1_N        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) );
-+reg_def F1_O        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) );
-+
-+reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()          );
-+reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()  );
-+reg_def F2_J        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) );
-+reg_def F2_K        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) );
-+reg_def F2_L        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) );
-+reg_def F2_M        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) );
-+reg_def F2_N        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) );
-+reg_def F2_O        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) );
-+
-+reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()          );
-+reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()  );
-+reg_def F3_J        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) );
-+reg_def F3_K        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) );
-+reg_def F3_L        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) );
-+reg_def F3_M        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) );
-+reg_def F3_N        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) );
-+reg_def F3_O        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) );
-+
-+reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()          );
-+reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()  );
-+reg_def F4_J        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) );
-+reg_def F4_K        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) );
-+reg_def F4_L        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) );
-+reg_def F4_M        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) );
-+reg_def F4_N        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) );
-+reg_def F4_O        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) );
-+
-+reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()          );
-+reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()  );
-+reg_def F5_J        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) );
-+reg_def F5_K        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) );
-+reg_def F5_L        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) );
-+reg_def F5_M        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) );
-+reg_def F5_N        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) );
-+reg_def F5_O        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) );
-+
-+reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()          );
-+reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()  );
-+reg_def F6_J        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) );
-+reg_def F6_K        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) );
-+reg_def F6_L        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) );
-+reg_def F6_M        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) );
-+reg_def F6_N        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) );
-+reg_def F6_O        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) );
-+
-+reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()          );
-+reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()  );
-+reg_def F7_J        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) );
-+reg_def F7_K        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) );
-+reg_def F7_L        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) );
-+reg_def F7_M        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) );
-+reg_def F7_N        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) );
-+reg_def F7_O        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) );
-+
-+reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()          );
-+reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()  );
-+reg_def F8_J        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) );
-+reg_def F8_K        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) );
-+reg_def F8_L        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) );
-+reg_def F8_M        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) );
-+reg_def F8_N        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) );
-+reg_def F8_O        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) );
-+
-+reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()          );
-+reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()  );
-+reg_def F9_J        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) );
-+reg_def F9_K        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) );
-+reg_def F9_L        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) );
-+reg_def F9_M        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) );
-+reg_def F9_N        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) );
-+reg_def F9_O        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) );
-+
-+reg_def F10          ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()          );
-+reg_def F10_H        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()  );
-+reg_def F10_J        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) );
-+reg_def F10_K        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) );
-+reg_def F10_L        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) );
-+reg_def F10_M        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) );
-+reg_def F10_N        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) );
-+reg_def F10_O        ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) );
-+
-+reg_def F11          ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()          );
-+reg_def F11_H        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()  );
-+reg_def F11_J        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) );
-+reg_def F11_K        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) );
-+reg_def F11_L        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) );
-+reg_def F11_M        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) );
-+reg_def F11_N        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) );
-+reg_def F11_O        ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) );
-+
-+reg_def F12          ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()          );
-+reg_def F12_H        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()  );
-+reg_def F12_J        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) );
-+reg_def F12_K        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) );
-+reg_def F12_L        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) );
-+reg_def F12_M        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) );
-+reg_def F12_N        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) );
-+reg_def F12_O        ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) );
-+
-+reg_def F13          ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()          );
-+reg_def F13_H        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()  );
-+reg_def F13_J        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) );
-+reg_def F13_K        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) );
-+reg_def F13_L        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) );
-+reg_def F13_M        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) );
-+reg_def F13_N        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) );
-+reg_def F13_O        ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) );
-+
-+reg_def F14          ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()          );
-+reg_def F14_H        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()  );
-+reg_def F14_J        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) );
-+reg_def F14_K        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) );
-+reg_def F14_L        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) );
-+reg_def F14_M        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) );
-+reg_def F14_N        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) );
-+reg_def F14_O        ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) );
-+
-+reg_def F15          ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()          );
-+reg_def F15_H        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()  );
-+reg_def F15_J        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) );
-+reg_def F15_K        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) );
-+reg_def F15_L        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) );
-+reg_def F15_M        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) );
-+reg_def F15_N        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) );
-+reg_def F15_O        ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) );
-+
-+reg_def F16          ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()          );
-+reg_def F16_H        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()  );
-+reg_def F16_J        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) );
-+reg_def F16_K        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) );
-+reg_def F16_L        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) );
-+reg_def F16_M        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) );
-+reg_def F16_N        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) );
-+reg_def F16_O        ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) );
-+
-+reg_def F17          ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()          );
-+reg_def F17_H        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()  );
-+reg_def F17_J        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) );
-+reg_def F17_K        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) );
-+reg_def F17_L        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) );
-+reg_def F17_M        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) );
-+reg_def F17_N        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) );
-+reg_def F17_O        ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) );
-+
-+reg_def F18          ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()          );
-+reg_def F18_H        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()  );
-+reg_def F18_J        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) );
-+reg_def F18_K        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) );
-+reg_def F18_L        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) );
-+reg_def F18_M        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) );
-+reg_def F18_N        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) );
-+reg_def F18_O        ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) );
-+
-+reg_def F19          ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()          );
-+reg_def F19_H        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()  );
-+reg_def F19_J        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) );
-+reg_def F19_K        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) );
-+reg_def F19_L        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) );
-+reg_def F19_M        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) );
-+reg_def F19_N        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) );
-+reg_def F19_O        ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) );
-+
-+reg_def F20          ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()          );
-+reg_def F20_H        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()  );
-+reg_def F20_J        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) );
-+reg_def F20_K        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) );
-+reg_def F20_L        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) );
-+reg_def F20_M        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) );
-+reg_def F20_N        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) );
-+reg_def F20_O        ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) );
-+
-+reg_def F21          ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()          );
-+reg_def F21_H        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()  );
-+reg_def F21_J        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) );
-+reg_def F21_K        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) );
-+reg_def F21_L        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) );
-+reg_def F21_M        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) );
-+reg_def F21_N        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) );
-+reg_def F21_O        ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) );
-+
-+reg_def F22          ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()          );
-+reg_def F22_H        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()  );
-+reg_def F22_J        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) );
-+reg_def F22_K        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) );
-+reg_def F22_L        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) );
-+reg_def F22_M        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) );
-+reg_def F22_N        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) );
-+reg_def F22_O        ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) );
-+
-+reg_def F23          ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()          );
-+reg_def F23_H        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()  );
-+reg_def F23_J        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) );
-+reg_def F23_K        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) );
-+reg_def F23_L        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) );
-+reg_def F23_M        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) );
-+reg_def F23_N        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) );
-+reg_def F23_O        ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) );
-+
-+reg_def F24          ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()          );
-+reg_def F24_H        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next()  );
-+reg_def F24_J        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) );
-+reg_def F24_K        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) );
-+reg_def F24_L        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) );
-+reg_def F24_M        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) );
-+reg_def F24_N        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) );
-+reg_def F24_O        ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) );
-+
-+reg_def F25          ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()          );
-+reg_def F25_H        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next()  );
-+reg_def F25_J        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) );
-+reg_def F25_K        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) );
-+reg_def F25_L        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) );
-+reg_def F25_M        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) );
-+reg_def F25_N        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) );
-+reg_def F25_O        ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) );
-+
-+reg_def F26          ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()          );
-+reg_def F26_H        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next()  );
-+reg_def F26_J        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) );
-+reg_def F26_K        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) );
-+reg_def F26_L        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) );
-+reg_def F26_M        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) );
-+reg_def F26_N        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) );
-+reg_def F26_O        ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) );
-+
-+reg_def F27          ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()          );
-+reg_def F27_H        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next()  );
-+reg_def F27_J        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) );
-+reg_def F27_K        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) );
-+reg_def F27_L        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) );
-+reg_def F27_M        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) );
-+reg_def F27_N        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) );
-+reg_def F27_O        ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) );
-+
-+reg_def F28          ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()          );
-+reg_def F28_H        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next()  );
-+reg_def F28_J        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) );
-+reg_def F28_K        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) );
-+reg_def F28_L        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) );
-+reg_def F28_M        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) );
-+reg_def F28_N        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) );
-+reg_def F28_O        ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) );
-+
-+reg_def F29          ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()          );
-+reg_def F29_H        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next()  );
-+reg_def F29_J        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) );
-+reg_def F29_K        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) );
-+reg_def F29_L        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) );
-+reg_def F29_M        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) );
-+reg_def F29_N        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) );
-+reg_def F29_O        ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) );
-+
-+reg_def F30          ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()          );
-+reg_def F30_H        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next()  );
-+reg_def F30_J        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) );
-+reg_def F30_K        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) );
-+reg_def F30_L        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) );
-+reg_def F30_M        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) );
-+reg_def F30_N        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) );
-+reg_def F30_O        ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) );
-+
-+reg_def F31          ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()          );
-+reg_def F31_H        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next()  );
-+reg_def F31_J        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) );
-+reg_def F31_K        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) );
-+reg_def F31_L        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) );
-+reg_def F31_M        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) );
-+reg_def F31_N        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) );
-+reg_def F31_O        ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) );
++  reg_def F0    ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()          );
++  reg_def F0_H  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next()  );
++  reg_def F0_J  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(2) );
++  reg_def F0_K  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(3) );
++  reg_def F0_L  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(4) );
++  reg_def F0_M  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(5) );
++  reg_def F0_N  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(6) );
++  reg_def F0_O  ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next(7) );
++
++  reg_def F1    ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()          );
++  reg_def F1_H  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next()  );
++  reg_def F1_J  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(2) );
++  reg_def F1_K  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(3) );
++  reg_def F1_L  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(4) );
++  reg_def F1_M  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(5) );
++  reg_def F1_N  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(6) );
++  reg_def F1_O  ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next(7) );
++
++  reg_def F2    ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()          );
++  reg_def F2_H  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next()  );
++  reg_def F2_J  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(2) );
++  reg_def F2_K  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(3) );
++  reg_def F2_L  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(4) );
++  reg_def F2_M  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(5) );
++  reg_def F2_N  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(6) );
++  reg_def F2_O  ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next(7) );
++
++  reg_def F3    ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()          );
++  reg_def F3_H  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next()  );
++  reg_def F3_J  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(2) );
++  reg_def F3_K  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(3) );
++  reg_def F3_L  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(4) );
++  reg_def F3_M  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(5) );
++  reg_def F3_N  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(6) );
++  reg_def F3_O  ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next(7) );
++
++  reg_def F4    ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()          );
++  reg_def F4_H  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next()  );
++  reg_def F4_J  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(2) );
++  reg_def F4_K  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(3) );
++  reg_def F4_L  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(4) );
++  reg_def F4_M  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(5) );
++  reg_def F4_N  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(6) );
++  reg_def F4_O  ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next(7) );
++
++  reg_def F5    ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()          );
++  reg_def F5_H  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next()  );
++  reg_def F5_J  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(2) );
++  reg_def F5_K  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(3) );
++  reg_def F5_L  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(4) );
++  reg_def F5_M  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(5) );
++  reg_def F5_N  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(6) );
++  reg_def F5_O  ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next(7) );
++
++  reg_def F6    ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()          );
++  reg_def F6_H  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next()  );
++  reg_def F6_J  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(2) );
++  reg_def F6_K  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(3) );
++  reg_def F6_L  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(4) );
++  reg_def F6_M  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(5) );
++  reg_def F6_N  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(6) );
++  reg_def F6_O  ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next(7) );
++
++  reg_def F7    ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()          );
++  reg_def F7_H  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next()  );
++  reg_def F7_J  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(2) );
++  reg_def F7_K  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(3) );
++  reg_def F7_L  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(4) );
++  reg_def F7_M  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(5) );
++  reg_def F7_N  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(6) );
++  reg_def F7_O  ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next(7) );
++
++  reg_def F8    ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()          );
++  reg_def F8_H  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next()  );
++  reg_def F8_J  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(2) );
++  reg_def F8_K  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(3) );
++  reg_def F8_L  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(4) );
++  reg_def F8_M  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(5) );
++  reg_def F8_N  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(6) );
++  reg_def F8_O  ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next(7) );
++
++  reg_def F9    ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()          );
++  reg_def F9_H  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next()  );
++  reg_def F9_J  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(2) );
++  reg_def F9_K  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(3) );
++  reg_def F9_L  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(4) );
++  reg_def F9_M  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(5) );
++  reg_def F9_N  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(6) );
++  reg_def F9_O  ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next(7) );
++
++  reg_def F10   ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()          );
++  reg_def F10_H ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next()  );
++  reg_def F10_J ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(2) );
++  reg_def F10_K ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(3) );
++  reg_def F10_L ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(4) );
++  reg_def F10_M ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(5) );
++  reg_def F10_N ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(6) );
++  reg_def F10_O ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next(7) );
++
++  reg_def F11   ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()          );
++  reg_def F11_H ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next()  );
++  reg_def F11_J ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(2) );
++  reg_def F11_K ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(3) );
++  reg_def F11_L ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(4) );
++  reg_def F11_M ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(5) );
++  reg_def F11_N ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(6) );
++  reg_def F11_O ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next(7) );
++
++  reg_def F12   ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()          );
++  reg_def F12_H ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next()  );
++  reg_def F12_J ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(2) );
++  reg_def F12_K ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(3) );
++  reg_def F12_L ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(4) );
++  reg_def F12_M ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(5) );
++  reg_def F12_N ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(6) );
++  reg_def F12_O ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next(7) );
++
++  reg_def F13   ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()          );
++  reg_def F13_H ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next()  );
++  reg_def F13_J ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(2) );
++  reg_def F13_K ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(3) );
++  reg_def F13_L ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(4) );
++  reg_def F13_M ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(5) );
++  reg_def F13_N ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(6) );
++  reg_def F13_O ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next(7) );
++
++  reg_def F14   ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()          );
++  reg_def F14_H ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next()  );
++  reg_def F14_J ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(2) );
++  reg_def F14_K ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(3) );
++  reg_def F14_L ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(4) );
++  reg_def F14_M ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(5) );
++  reg_def F14_N ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(6) );
++  reg_def F14_O ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next(7) );
++
++  reg_def F15   ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()          );
++  reg_def F15_H ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next()  );
++  reg_def F15_J ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(2) );
++  reg_def F15_K ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(3) );
++  reg_def F15_L ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(4) );
++  reg_def F15_M ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(5) );
++  reg_def F15_N ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(6) );
++  reg_def F15_O ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next(7) );
++
++  reg_def F16   ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()          );
++  reg_def F16_H ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next()  );
++  reg_def F16_J ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(2) );
++  reg_def F16_K ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(3) );
++  reg_def F16_L ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(4) );
++  reg_def F16_M ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(5) );
++  reg_def F16_N ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(6) );
++  reg_def F16_O ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next(7) );
++
++  reg_def F17   ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()          );
++  reg_def F17_H ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next()  );
++  reg_def F17_J ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(2) );
++  reg_def F17_K ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(3) );
++  reg_def F17_L ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(4) );
++  reg_def F17_M ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(5) );
++  reg_def F17_N ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(6) );
++  reg_def F17_O ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next(7) );
++
++  reg_def F18   ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()          );
++  reg_def F18_H ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next()  );
++  reg_def F18_J ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(2) );
++  reg_def F18_K ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(3) );
++  reg_def F18_L ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(4) );
++  reg_def F18_M ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(5) );
++  reg_def F18_N ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(6) );
++  reg_def F18_O ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next(7) );
++
++  reg_def F19   ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()          );
++  reg_def F19_H ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next()  );
++  reg_def F19_J ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(2) );
++  reg_def F19_K ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(3) );
++  reg_def F19_L ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(4) );
++  reg_def F19_M ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(5) );
++  reg_def F19_N ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(6) );
++  reg_def F19_O ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next(7) );
++
++  reg_def F20   ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()          );
++  reg_def F20_H ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next()  );
++  reg_def F20_J ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(2) );
++  reg_def F20_K ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(3) );
++  reg_def F20_L ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(4) );
++  reg_def F20_M ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(5) );
++  reg_def F20_N ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(6) );
++  reg_def F20_O ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next(7) );
++
++  reg_def F21   ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()          );
++  reg_def F21_H ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next()  );
++  reg_def F21_J ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(2) );
++  reg_def F21_K ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(3) );
++  reg_def F21_L ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(4) );
++  reg_def F21_M ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(5) );
++  reg_def F21_N ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(6) );
++  reg_def F21_O ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next(7) );
++
++  reg_def F22   ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()          );
++  reg_def F22_H ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next()  );
++  reg_def F22_J ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(2) );
++  reg_def F22_K ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(3) );
++  reg_def F22_L ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(4) );
++  reg_def F22_M ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(5) );
++  reg_def F22_N ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(6) );
++  reg_def F22_O ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next(7) );
++
++  reg_def F23   ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()          );
++  reg_def F23_H ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next()  );
++  reg_def F23_J ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(2) );
++  reg_def F23_K ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(3) );
++  reg_def F23_L ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(4) );
++  reg_def F23_M ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(5) );
++  reg_def F23_N ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(6) );
++  reg_def F23_O ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next(7) );
++
++  reg_def F24   ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()          );
++  reg_def F24_H ( SOC, SOE, Op_RegF, 24, F24->as_VMReg()->next()  );
++  reg_def F24_J ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(2) );
++  reg_def F24_K ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(3) );
++  reg_def F24_L ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(4) );
++  reg_def F24_M ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(5) );
++  reg_def F24_N ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(6) );
++  reg_def F24_O ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next(7) );
++
++  reg_def F25   ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()          );
++  reg_def F25_H ( SOC, SOE, Op_RegF, 25, F25->as_VMReg()->next()  );
++  reg_def F25_J ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(2) );
++  reg_def F25_K ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(3) );
++  reg_def F25_L ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(4) );
++  reg_def F25_M ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(5) );
++  reg_def F25_N ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(6) );
++  reg_def F25_O ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next(7) );
++
++  reg_def F26   ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()          );
++  reg_def F26_H ( SOC, SOE, Op_RegF, 26, F26->as_VMReg()->next()  );
++  reg_def F26_J ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(2) );
++  reg_def F26_K ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(3) );
++  reg_def F26_L ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(4) );
++  reg_def F26_M ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(5) );
++  reg_def F26_N ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(6) );
++  reg_def F26_O ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next(7) );
++
++  reg_def F27   ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()          );
++  reg_def F27_H ( SOC, SOE, Op_RegF, 27, F27->as_VMReg()->next()  );
++  reg_def F27_J ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(2) );
++  reg_def F27_K ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(3) );
++  reg_def F27_L ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(4) );
++  reg_def F27_M ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(5) );
++  reg_def F27_N ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(6) );
++  reg_def F27_O ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next(7) );
++
++  reg_def F28   ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()          );
++  reg_def F28_H ( SOC, SOE, Op_RegF, 28, F28->as_VMReg()->next()  );
++  reg_def F28_J ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(2) );
++  reg_def F28_K ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(3) );
++  reg_def F28_L ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(4) );
++  reg_def F28_M ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(5) );
++  reg_def F28_N ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(6) );
++  reg_def F28_O ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next(7) );
++
++  reg_def F29   ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()          );
++  reg_def F29_H ( SOC, SOE, Op_RegF, 29, F29->as_VMReg()->next()  );
++  reg_def F29_J ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(2) );
++  reg_def F29_K ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(3) );
++  reg_def F29_L ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(4) );
++  reg_def F29_M ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(5) );
++  reg_def F29_N ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(6) );
++  reg_def F29_O ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next(7) );
++
++  reg_def F30   ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()          );
++  reg_def F30_H ( SOC, SOE, Op_RegF, 30, F30->as_VMReg()->next()  );
++  reg_def F30_J ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(2) );
++  reg_def F30_K ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(3) );
++  reg_def F30_L ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(4) );
++  reg_def F30_M ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(5) );
++  reg_def F30_N ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(6) );
++  reg_def F30_O ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next(7) );
++
++  reg_def F31   ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()          );
++  reg_def F31_H ( SOC, SOE, Op_RegF, 31, F31->as_VMReg()->next()  );
++  reg_def F31_J ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(2) );
++  reg_def F31_K ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(3) );
++  reg_def F31_L ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(4) );
++  reg_def F31_M ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(5) );
++  reg_def F31_N ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(6) );
++  reg_def F31_O ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next(7) );
 +
 +
 +// ----------------------------
@@ -18840,7 +19101,13 @@ index 0000000000..3c65f7518d
 +                     S8, S8_H
 +                     RA, RA_H,
 +                     SP, SP_H, // stack_pointer
-+                     FP, FP_H  // frame_pointer
++                     FP, FP_H, // frame_pointer
++
++                     // non-allocatable registers
++                     T7, T7_H,
++                     TP, TP_H,
++                     RX, RX_H,
++                     R0, R0_H,
 +                 );
 +
 +// F23 is scratch reg
@@ -18874,7 +19141,11 @@ index 0000000000..3c65f7518d
 +                     F28, F28_H, F28_J, F28_K, F28_L, F28_M, F28_N, F28_O,
 +                     F29, F29_H, F29_J, F29_K, F29_L, F29_M, F29_N, F29_O,
 +                     F30, F30_H, F30_J, F30_K, F30_L, F30_M, F30_N, F30_O,
-+                     F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O);
++                     F31, F31_H, F31_J, F31_K, F31_L, F31_M, F31_N, F31_O,
++
++                     // non-allocatable registers
++                     F23, F23_H, F23_J, F23_K, F23_L, F23_M, F23_N, F23_O,
++                  );
 +
 +reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
 +reg_class s0_reg( S0 );
@@ -18938,10 +19209,10 @@ index 0000000000..3c65f7518d
 +
 +//reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, A7, A6, A5, A4, T5, A3, A2, A1, A0, T0 );
 +
-+reg_class int_reg(
++reg_class all_reg32(
 +                     S8,
 +                     S7,
-+                  /* S5, S5_heapbase */
++                     S5, /* S5_heapbase */
 +                  /* S6, S6 TREG     */
 +                     S4,
 +                     S3,
@@ -18956,6 +19227,7 @@ index 0000000000..3c65f7518d
 +                     T3,
 +                     T2,
 +                     T1,
++                     T0,
 +                     A7,
 +                     A6,
 +                     A5,
@@ -18963,35 +19235,17 @@ index 0000000000..3c65f7518d
 +                     A3,
 +                     A2,
 +                     A1,
-+                     A0,
-+                     T0 );
++                     A0 );
++
++reg_class int_reg %{
++  return _ANY_REG32_mask;
++%}
 +
 +reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, T6, T5, T0 );
 +
-+reg_class p_reg(
-+                 S8, S8_H,
-+                 S7, S7_H,
-+                 S0, S0_H,
-+                 S1, S1_H,
-+                 S2, S2_H,
-+                 S4, S4_H,
-+                 S3, S3_H,
-+                 T8, T8_H,
-+                 T6, T6_H,
-+                 T5, T5_H,
-+                 T2, T2_H,
-+                 T3, T3_H,
-+                 T1, T1_H,
-+                 A7, A7_H,
-+                 A6, A6_H,
-+                 A5, A5_H,
-+                 A4, A4_H,
-+                 A3, A3_H,
-+                 A2, A2_H,
-+                 A1, A1_H,
-+                 A0, A0_H,
-+                 T0, T0_H
-+               );
++reg_class p_reg %{
++  return _PTR_REG_mask;
++%}
 +
 +reg_class no_T8_p_reg(
 +                 S7, S7_H,
@@ -19027,11 +19281,11 @@ index 0000000000..3c65f7518d
 +                 T0, T0_H
 +               );
 +
-+reg_class long_reg(
++reg_class all_reg(
 +                    S8, S8_H,
 +                    S7, S7_H,
 +                 /* S6, S6_H,  S6 TREG     */
-+                 /* S5, S5_H,  S5_heapbase */
++                    S5, S5_H, /* S5_heapbase */
 +                    S4, S4_H,
 +                    S3, S3_H,
 +                    S2, S2_H,
@@ -19045,6 +19299,7 @@ index 0000000000..3c65f7518d
 +                    T3, T3_H,
 +                    T2, T2_H,
 +                    T1, T1_H,
++                    T0, T0_H,
 +                    A7, A7_H,
 +                    A6, A6_H,
 +                    A5, A5_H,
@@ -19052,11 +19307,14 @@ index 0000000000..3c65f7518d
 +                    A3, A3_H,
 +                    A2, A2_H,
 +                    A1, A1_H,
-+                    A0, A0_H,
-+                    T0, T0_H
++                    A0, A0_H
 +                  );
 +
 +
++reg_class long_reg %{
++  return _ANY_REG_mask;
++%}
++
 +// Floating point registers.
 +// F31 are not used as temporary registers in D2I
 +reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F24, F25, F26, F27, F28, F29, F30, F31);
@@ -19212,6 +19470,10 @@ index 0000000000..3c65f7518d
 +// To keep related declarations/definitions/uses close together,
 +// we switch between source %{ }% and source_hpp %{ }% freely as needed.
 +
++extern RegMask _ANY_REG32_mask;
++extern RegMask _ANY_REG_mask;
++extern RegMask _PTR_REG_mask;
++
 +class CallStubImpl {
 +
 +  //--------------------------------------------------------------
@@ -19288,6 +19550,22 @@ index 0000000000..3c65f7518d
 +#define T7 RT7
 +#define T8 RT8
 +
++RegMask _ANY_REG32_mask;
++RegMask _ANY_REG_mask;
++RegMask _PTR_REG_mask;
++
++void reg_mask_init() {
++  _ANY_REG32_mask = _ALL_REG32_mask;
++  _ANY_REG_mask = _ALL_REG_mask;
++  _PTR_REG_mask = _ALL_REG_mask;
++
++  if (UseCompressedOops && (Universe::narrow_ptrs_base() != NULL)) {
++    _ANY_REG32_mask.Remove(OptoReg::as_OptoReg(r28->as_VMReg()));
++    _ANY_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++    _PTR_REG_mask.SUBTRACT(_S5_LONG_REG_mask);
++  }
++}
++
 +// Emit exception handler code.
 +// Stuff framesize into a register and call a VM stub routine.
 +int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
@@ -19363,7 +19641,7 @@ index 0000000000..3c65f7518d
 +bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
 +  const int safety_zone = 3 * BytesPerInstWord;
 +  int offs = offset - br_size + 4;
-+  // To be conservative on LOONGARCH
++  // To be conservative on LoongArch
 +  // branch node should be end with:
 +  //   branch inst
 +  offs = (offs < 0 ? offs - safety_zone : offs + safety_zone) >> 2;
@@ -19487,7 +19765,7 @@ index 0000000000..3c65f7518d
 +  return MIN2(size,max_size);
 +}
 +
-+// LOONGARCH supports misaligned vectors store/load? FIXME
++// LoongArch supports misaligned vectors store/load? FIXME
 +const bool Matcher::misaligned_vectors_ok() {
 +  return false;
 +  //return !AlignVector; // can be changed by flag
@@ -19570,7 +19848,7 @@ index 0000000000..3c65f7518d
 +  return FP_REG_mask();
 +}
 +
-+// LOONGARCH doesn't support AES intrinsics
++// LoongArch doesn't support AES intrinsics
 +const bool Matcher::pass_original_key_for_aes() {
 +  return false;
 +}
@@ -20268,7 +20546,7 @@ index 0000000000..3c65f7518d
 +}
 +
 +uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
-+  return MachNode::size(ra_); // too many variables; just compute it the hard way  fujie debug
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
 +}
 +
 +int MachEpilogNode::reloc() const {
@@ -20315,9 +20593,6 @@ index 0000000000..3c65f7518d
 +  }
 +}
 +
-+
-+//static int sizeof_FFree_Float_Stack_All = -1;
-+
 +int MachCallRuntimeNode::ret_addr_offset() {
 +  // pcaddu18i
 +  // jirl
@@ -21683,26 +21958,6 @@ index 0000000000..3c65f7518d
 +  interface(REG_INTER);
 +%}
 +
-+/*
-+operand mV0RegI() %{
-+  constraint(ALLOC_IN_RC(v0_reg));
-+  match(RegI);
-+  match(mRegI);
-+
-+  format %{ "V0" %}
-+  interface(REG_INTER);
-+%}
-+
-+operand mV1RegI() %{
-+  constraint(ALLOC_IN_RC(v1_reg));
-+  match(RegI);
-+  match(mRegI);
-+
-+  format %{ "V1" %}
-+  interface(REG_INTER);
-+%}
-+*/
-+
 +operand mRegN() %{
 +  constraint(ALLOC_IN_RC(int_reg));
 +  match(RegN);
@@ -21747,17 +22002,6 @@ index 0000000000..3c65f7518d
 +  interface(REG_INTER);
 +%}
 +
-+/*
-+operand t9_RegN() %{
-+  constraint(ALLOC_IN_RC(t9_reg));
-+  match(RegN);
-+  match(mRegN);
-+
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+*/
-+
 +operand a0_RegN() %{
 +  constraint(ALLOC_IN_RC(a0_reg));
 +  match(RegN);
@@ -21902,26 +22146,6 @@ index 0000000000..3c65f7518d
 +  interface(REG_INTER);
 +%}
 +
-+/*
-+operand v0_RegN() %{
-+  constraint(ALLOC_IN_RC(v0_reg));
-+  match(RegN);
-+  match(mRegN);
-+
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+
-+operand v1_RegN() %{
-+  constraint(ALLOC_IN_RC(v1_reg));
-+  match(RegN);
-+  match(mRegN);
-+
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+*/
-+
 +// Pointer Register
 +operand mRegP() %{
 +  constraint(ALLOC_IN_RC(p_reg));
@@ -22070,19 +22294,6 @@ index 0000000000..3c65f7518d
 +  interface(REG_INTER);
 +%}
 +
-+/*
-+operand t9_RegP()
-+%{
-+  constraint(ALLOC_IN_RC(t9_long_reg));
-+  match(RegP);
-+  match(mRegP);
-+  match(no_T8_mRegP);
-+
-+  format %{ %}
-+  interface(REG_INTER);
-+%}
-+*/
-+
 +operand a0_RegP()
 +%{
 +  constraint(ALLOC_IN_RC(a0_long_reg));
@@ -22194,24 +22405,6 @@ index 0000000000..3c65f7518d
 +  interface(REG_INTER);
 +%}
 +
-+/*
-+operand mSPRegP(mRegP reg) %{
-+  constraint(ALLOC_IN_RC(sp_reg));
-+  match(reg);
-+
-+  format %{ "SP"  %}
-+  interface(REG_INTER);
-+%}
-+
-+operand mFPRegP(mRegP reg) %{
-+  constraint(ALLOC_IN_RC(fp_reg));
-+  match(reg);
-+
-+  format %{ "FP"  %}
-+  interface(REG_INTER);
-+%}
-+*/
-+
 +operand mRegL() %{
 +  constraint(ALLOC_IN_RC(long_reg));
 +  match(RegL);
@@ -23856,7 +24049,7 @@ index 0000000000..3c65f7518d
 +  effect(USE labl);
 +
 +  ins_cost(300);
-+  format %{ "J$cop    $labl  #loongarch uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_long" %}
 +
 +  ins_encode %{
 +    Label*    L =  $labl$$label;
@@ -24316,7 +24509,7 @@ index 0000000000..3c65f7518d
 +  effect(USE labl);
 +
 +  ins_cost(300);
-+  format %{ "J$cop    $labl  #loongarch uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++  format %{ "J$cop    $labl  #LoongArch uses T0 as equivalent to eflag @jmpCon_flags_short" %}
 +
 +  ins_encode %{
 +    Label    &L =  *($labl$$label);
@@ -26159,7 +26352,7 @@ index 0000000000..3c65f7518d
 +instruct addP_reg_reg(mRegP dst, mRegP src1, mRegLorI2L src2) %{
 +  match(Set dst (AddP src1 src2));
 +
-+  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg" %}
++  format %{ "ADD    $dst, $src1, $src2 #@addP_reg_reg" %}
 +
 +  ins_encode %{
 +    Register  dst = $dst$$Register;
@@ -26189,7 +26382,7 @@ index 0000000000..3c65f7518d
 +instruct addP_reg_imm12(mRegP dst, mRegP src1,  immL12 src2) %{
 +  match(Set dst (AddP src1 src2));
 +
-+  format %{ "daddi   $dst, $src1, $src2 #@addP_reg_imm12" %}
++  format %{ "ADD   $dst, $src1, $src2 #@addP_reg_imm12" %}
 +  ins_encode %{
 +    Register src1 = $src1$$Register;
 +    long     src2 = $src2$$constant;
@@ -26416,27 +26609,6 @@ index 0000000000..3c65f7518d
 +  ins_pipe( ialu_mult );
 +%}
 +
-+instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{
-+  match(Set dst (AddI (MulI src1 src2) src3));
-+
-+  ins_cost(999);
-+  format %{ "madd   $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %}
-+  ins_encode %{
-+  //TODO: LA
-+  guarantee(0, "LA not implemented yet");
-+#if 0
-+     Register src1 = $src1$$Register;
-+     Register src2 = $src2$$Register;
-+     Register src3 = $src3$$Register;
-+     Register dst  = $dst$$Register;
-+
-+     __ mul_w(dst, src1, src2);
-+     __ add_w(dst, dst, src3);
-+#endif
-+  %}
-+  ins_pipe( ialu_mult );
-+%}
-+
 +instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
 +  match(Set dst (DivI src1 src2));
 +
@@ -26463,7 +26635,6 @@ index 0000000000..3c65f7518d
 +     FloatRegister src2 = $src2$$FloatRegister;
 +     FloatRegister dst  = $dst$$FloatRegister;
 +
-+    /* Here do we need to trap an exception manually ? */
 +    __ fdiv_s(dst, src1, src2);
 +  %}
 +  ins_pipe( pipe_slow );
@@ -26479,7 +26650,6 @@ index 0000000000..3c65f7518d
 +     FloatRegister src2 = $src2$$FloatRegister;
 +     FloatRegister dst  = $dst$$FloatRegister;
 +
-+    /* Here do we need to trap an exception manually ? */
 +    __ fdiv_d(dst, src1, src2);
 +  %}
 +  ins_pipe( pipe_slow );
@@ -28174,7 +28344,7 @@ index 0000000000..3c65f7518d
 +instruct prefetchAlloc(memory mem) %{
 +  match(PrefetchAllocation mem);
 +  ins_cost(125);
-+  format %{ "pref $mem\t# Prefetch allocation @ prefetchAlloc" %}
++  format %{ "preld $mem\t# Prefetch allocation @ prefetchAlloc" %}
 +  ins_encode %{
 +    int  base = $mem$$base;
 +    int  index = $mem$$index;
@@ -28484,7 +28654,7 @@ index 0000000000..3c65f7518d
 +  match(Set dst src);
 +
 +  ins_cost(125);
-+  format %{ "lwc1   $dst, $src\t# float stk @ loadSSF" %}
++  format %{ "fld_s   $dst, $src\t# float stk @ loadSSF" %}
 +  ins_encode %{
 +    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSF) !");
 +    __ fld_s($dst$$FloatRegister, SP, $src$$disp);
@@ -28497,7 +28667,7 @@ index 0000000000..3c65f7518d
 +  match(Set dst src);
 +
 +  ins_cost(100);
-+  format %{ "swc1    $dst, $src\t# float stk @ storeSSF" %}
++  format %{ "fst_s    $dst, $src\t# float stk @ storeSSF" %}
 +  ins_encode %{
 +    guarantee( Assembler::is_simm($dst$$disp, 12), "disp too long (storeSSF) !");
 +    __ fst_s($src$$FloatRegister, SP, $dst$$disp);
@@ -28511,7 +28681,7 @@ index 0000000000..3c65f7518d
 +  match(Set dst src);
 +
 +  ins_cost(125);
-+  format %{ "ldc1   $dst, $src\t# double stk @ loadSSD" %}
++  format %{ "fld_d   $dst, $src\t# double stk @ loadSSD" %}
 +  ins_encode %{
 +    guarantee( Assembler::is_simm($src$$disp, 12), "disp too long (loadSSD) !");
 +    __ fld_d($dst$$FloatRegister, SP, $src$$disp);
@@ -28581,8 +28751,10 @@ index 0000000000..3c65f7518d
 +  // Use the following format syntax
 +  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
 +  ins_encode %{
-+    // Here we should emit illtrap!
-+    __ brk(18);
++    if (is_reachable()) {
++      // Here we should emit illtrap!
++      __ stop("ShouldNotReachHere");
++    }
 +  %}
 +  ins_pipe( pipe_jump );
 +%}
@@ -28964,30 +29136,6 @@ index 0000000000..3c65f7518d
 +%}
 +
 +//----------Max and Min--------------------------------------------------------
-+// Min Instructions
-+////
-+//   *** Min and Max using the conditional move are slower than the
-+//   *** branch version on a Pentium III.
-+// // Conditional move for min
-+//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
-+//  effect( USE_DEF op2, USE op1, USE cr );
-+//  format %{ "CMOVlt $op2,$op1\t! min" %}
-+//  opcode(0x4C,0x0F);
-+//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
-+//  ins_pipe( pipe_cmov_reg );
-+//%}
-+//
-+//// Min Register with Register (P6 version)
-+//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
-+//  predicate(VM_Version::supports_cmov() );
-+//  match(Set op2 (MinI op1 op2));
-+//  ins_cost(200);
-+//  expand %{
-+//    eFlagsReg cr;
-+//    compI_eReg(cr,op1,op2);
-+//    cmovI_reg_lt(op2,op1,cr);
-+//  %}
-+//%}
 +
 +// Min Register with Register (generic version)
 +instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
@@ -29009,30 +29157,6 @@ index 0000000000..3c65f7518d
 +  ins_pipe( pipe_slow );
 +%}
 +
-+// Max Register with Register
-+//   *** Min and Max using the conditional move are slower than the
-+//   *** branch version on a Pentium III.
-+// // Conditional move for max
-+//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
-+//  effect( USE_DEF op2, USE op1, USE cr );
-+//  format %{ "CMOVgt $op2,$op1\t! max" %}
-+//  opcode(0x4F,0x0F);
-+//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
-+//  ins_pipe( pipe_cmov_reg );
-+//%}
-+//
-+// // Max Register with Register (P6 version)
-+//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
-+//  predicate(VM_Version::supports_cmov() );
-+//  match(Set op2 (MaxI op1 op2));
-+//  ins_cost(200);
-+//  expand %{
-+//    eFlagsReg cr;
-+//    compI_eReg(cr,op1,op2);
-+//    cmovI_reg_gt(op2,op1,cr);
-+//  %}
-+//%}
-+
 +// Max Register with Register (generic version)
 +instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
 +  match(Set dst (MaxI dst src));
@@ -32450,10 +32574,10 @@ index 0000000000..3c65f7518d
 +
 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
 new file mode 100644
-index 0000000000..f8670f5081
+index 0000000000..74cda82963
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.cpp
-@@ -0,0 +1,4750 @@
+@@ -0,0 +1,4531 @@
 +/*
 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
@@ -32491,6 +32615,7 @@ index 0000000000..f8670f5081
 +#include "interpreter/interpreter.hpp"
 +#include "memory/resourceArea.hpp"
 +#include "memory/universe.hpp"
++#include "nativeInst_loongarch.hpp"
 +#include "prims/methodHandles.hpp"
 +#include "runtime/biasedLocking.hpp"
 +#include "runtime/interfaceSupport.inline.hpp"
@@ -32504,6 +32629,7 @@ index 0000000000..f8670f5081
 +
 +#ifdef COMPILER2
 +#include "opto/compile.hpp"
++#include "opto/intrinsicnode.hpp"
 +#endif
 +
 +#define T0 RT0
@@ -32595,6 +32721,24 @@ index 0000000000..f8670f5081
 +    MacroAssembler masm(&cb);
 +    masm.pcaddi(as_Register(low(stub_inst, 5)), offs);
 +    return;
++  } else if (high(stub_inst, 7) == pcaddu12i_op) {
++    // pc-relative
++    jlong offs = target - branch;
++    guarantee(is_simm(offs, 32), "Not signed 32-bit offset");
++    jint si12, si20;
++    jint& stub_instNext = *(jint*)(branch+4);
++    split_simm32(offs, si12, si20);
++    CodeBuffer cb(branch, 2 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.pcaddu12i(as_Register(low(stub_inst, 5)), si20);
++    masm.addi_d(as_Register(low((stub_instNext), 5)), as_Register(low((stub_instNext) >> 5, 5)), si12);
++    return;
++  } else if (high(stub_inst, 7) == lu12i_w_op) {
++    // long call (absolute)
++    CodeBuffer cb(branch, 3 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.call_long(target);
++    return;
 +  }
 +
 +  stub_inst = patched_branch(target - branch, stub_inst, 0);
@@ -33013,7 +33157,7 @@ index 0000000000..f8670f5081
 +
 +  // The bias pattern is present in the object's header. Need to check
 +  // whether the bias owner and the epoch are both still current.
-+  // Note that because there is no current thread register on MIPS we
++  // Note that because there is no current thread register on LA we
 +  // need to store off the mark word we read out of the object to
 +  // avoid reloading it and needing to recheck invariants below. This
 +  // store is unfortunate but it makes the overall code shorter and
@@ -33415,55 +33559,6 @@ index 0000000000..f8670f5081
 +  popad();
 +}
 +
-+void MacroAssembler::print_reg(Register reg) {
-+  void * cur_pc = pc();
-+  pushad();
-+
-+  li(A0, (long)reg->name());
-+  if (reg == SP)
-+    addi_d(A1, SP, wordSize * 23); //23 registers saved in pushad()
-+  else if (reg == A0)
-+    ld_d(A1, SP, wordSize * 19); //A0 has been modified by li(A0, (long)reg->name()). Ugly Code!
-+  else
-+    move(A1, reg);
-+  li(A2, (long)cur_pc);
-+  push(S2);
-+  li(AT, -(StackAlignmentInBytes));
-+  move(S2, SP);     // use S2 as a sender SP holder
-+  andr(SP, SP, AT); // align stack as required by ABI
-+  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_reg_with_pc),relocInfo::runtime_call_type);
-+  move(SP, S2);     // use S2 as a sender SP holder
-+  pop(S2);
-+  popad();
-+}
-+
-+void MacroAssembler::print_reg(FloatRegister reg) {
-+  //TODO: LA
-+  guarantee(0, "LA not implemented yet");
-+#if 0
-+  void * cur_pc = pc();
-+  pushad();
-+  li(A0, (long)reg->name());
-+  push(S2);
-+  move(AT, -(StackAlignmentInBytes));
-+  move(S2, SP);     // use S2 as a sender SP holder
-+  andr(SP, SP, AT); // align stack as required by ABI
-+  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_str),relocInfo::runtime_call_type);
-+  move(SP, S2);     // use S2 as a sender SP holder
-+  pop(S2);
-+  popad();
-+
-+  pushad();
-+  move(FP, SP);
-+  move(AT, -(StackAlignmentInBytes));
-+  andr(SP , SP , AT);
-+  mov_d(F12, reg);
-+  call(CAST_FROM_FN_PTR(address, SharedRuntime::print_double),relocInfo::runtime_call_type);
-+  move(SP, FP);
-+  popad();
-+#endif
-+}
-+
 +void MacroAssembler::increment(Register reg, int imm) {
 +  if (!imm) return;
 +  if (is_simm(imm, 12)) {
@@ -34211,8 +34306,6 @@ index 0000000000..f8670f5081
 +
 +  // make sure klass is 'reasonable'
 +  // add for compressedoops
-+  reinit_heapbase();
-+  // add for compressedoops
 +  load_klass(SCR2, A1);
 +  beqz(SCR2, error);                        // if klass is NULL it is broken
 +  // return if everything seems ok
@@ -34405,6 +34498,7 @@ index 0000000000..f8670f5081
 +  guarantee(0, "LA not implemented yet");
 +}
 +
++#ifdef COMPILER2
 +// Fast_Lock and Fast_Unlock used by C2
 +
 +// Because the transitions from emitted code to the runtime
@@ -34785,6 +34879,7 @@ index 0000000000..f8670f5081
 +      if (EmitSync & 32768) { nop() ; }
 +    }
 +}
++#endif // COMPILER2
 +
 +void MacroAssembler::align(int modulus) {
 +  while (offset() % modulus != 0) nop();
@@ -35161,7 +35256,6 @@ index 0000000000..f8670f5081
 +  // Cannot assert, unverified entry point counts instructions (see .ad file)
 +  // vtableStubs also counts instructions in pd_code_size_limit.
 +  // Also do not verify_oop as this is called by verify_oop.
-+  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
 +  if (Universe::narrow_oop_shift() != 0) {
 +    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
 +    if (Universe::narrow_oop_base() != NULL) {
@@ -35438,7 +35532,6 @@ index 0000000000..f8670f5081
 +  // Skip to start of data.
 +  addi_d(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
 +
-+  // OpenJDK8 never compresses klass pointers in secondary-super array.
 +  Label Loop, subtype;
 +  bind(Loop);
 +  beq(temp2_reg, R0, *L_failure);
@@ -35606,216 +35699,7 @@ index 0000000000..f8670f5081
 +  ld_d(method_result, AT, base + vtableEntry::method_offset_in_bytes());
 +}
 +
-+void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
-+  switch (type) {
-+    case T_LONG:
-+      st_ptr(src_reg, tmp_reg, disp);
-+      break;
-+    case T_ARRAY:
-+    case T_OBJECT:
-+      if (UseCompressedOops && !wide) {
-+        st_w(src_reg, tmp_reg, disp);
-+      } else {
-+        st_ptr(src_reg, tmp_reg, disp);
-+      }
-+      break;
-+    case T_ADDRESS:
-+      st_ptr(src_reg, tmp_reg, disp);
-+      break;
-+    case T_INT:
-+      st_w(src_reg, tmp_reg, disp);
-+      break;
-+    case T_CHAR:
-+    case T_SHORT:
-+      st_h(src_reg, tmp_reg, disp);
-+      break;
-+    case T_BYTE:
-+    case T_BOOLEAN:
-+      st_b(src_reg, tmp_reg, disp);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
-+  Register tmp_reg = T4;
-+  Register index_reg = addr.index();
-+  if (index_reg == NOREG) {
-+    tmp_reg = NOREG;
-+  }
-+
-+  int scale = addr.scale();
-+  if (tmp_reg != NOREG && scale >= 0) {
-+    slli_d(tmp_reg, index_reg, scale);
-+  }
-+
-+  int disp = addr.disp();
-+  bool disp_is_simm16 = true;
-+  if (!Assembler::is_simm16(disp)) {
-+    disp_is_simm16 = false;
-+  }
-+
-+  Register base_reg = addr.base();
-+  if (tmp_reg != NOREG) {
-+    assert_different_registers(tmp_reg, base_reg, index_reg);
-+  }
-+
-+  if (tmp_reg != NOREG) {
-+    add_d(tmp_reg, base_reg, tmp_reg);
-+    if (!disp_is_simm16) {
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
-+  } else {
-+    if (!disp_is_simm16) {
-+      tmp_reg = T4;
-+      assert_different_registers(tmp_reg, base_reg);
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
-+  }
-+}
-+
-+void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
-+  switch (type) {
-+    case T_DOUBLE:
-+      fst_d(src_reg, tmp_reg, disp);
-+      break;
-+    case T_FLOAT:
-+      fst_s(src_reg, tmp_reg, disp);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
-+  Register tmp_reg = T4;
-+  Register index_reg = addr.index();
-+  if (index_reg == NOREG) {
-+    tmp_reg = NOREG;
-+  }
-+
-+  int scale = addr.scale();
-+  if (tmp_reg != NOREG && scale >= 0) {
-+    slli_d(tmp_reg, index_reg, scale);
-+  }
-+
-+  int disp = addr.disp();
-+  bool disp_is_simm16 = true;
-+  if (!Assembler::is_simm16(disp)) {
-+    disp_is_simm16 = false;
-+  }
-+
-+  Register base_reg = addr.base();
-+  if (tmp_reg != NOREG) {
-+    assert_different_registers(tmp_reg, base_reg, index_reg);
-+  }
-+
-+  if (tmp_reg != NOREG) {
-+    add_d(tmp_reg, base_reg, tmp_reg);
-+    if (!disp_is_simm16) {
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
-+  } else {
-+    if (!disp_is_simm16) {
-+      tmp_reg = T4;
-+      assert_different_registers(tmp_reg, base_reg);
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
-+  }
-+}
-+
-+void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
-+  switch (type) {
-+    case T_LONG:
-+      ld_ptr(dst_reg, tmp_reg, disp);
-+      break;
-+    case T_ARRAY:
-+    case T_OBJECT:
-+      if (UseCompressedOops && !wide) {
-+        ld_wu(dst_reg, tmp_reg, disp);
-+      } else {
-+        ld_ptr(dst_reg, tmp_reg, disp);
-+      }
-+      break;
-+    case T_ADDRESS:
-+      if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
-+        ld_wu(dst_reg, tmp_reg, disp);
-+      } else {
-+        ld_ptr(dst_reg, tmp_reg, disp);
-+      }
-+      break;
-+    case T_INT:
-+      ld_w(dst_reg, tmp_reg, disp);
-+      break;
-+    case T_CHAR:
-+      ld_hu(dst_reg, tmp_reg, disp);
-+      break;
-+    case T_SHORT:
-+      ld_h(dst_reg, tmp_reg, disp);
-+      break;
-+    case T_BYTE:
-+    case T_BOOLEAN:
-+      ld_b(dst_reg, tmp_reg, disp);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
-+  int code_offset = 0;
-+  Register tmp_reg = T4;
-+  Register index_reg = addr.index();
-+  if (index_reg == NOREG) {
-+    tmp_reg = NOREG;
-+  }
-+  int scale = addr.scale();
-+  if (tmp_reg != NOREG && scale >= 0) {
-+    slli_d(tmp_reg, index_reg, scale);
-+  }
-+
-+  int disp = addr.disp();
-+  bool disp_is_simm16 = true;
-+  if (!Assembler::is_simm16(disp)) {
-+    disp_is_simm16 = false;
-+  }
-+
-+  Register base_reg = addr.base();
-+  if (tmp_reg != NOREG) {
-+    assert_different_registers(tmp_reg, base_reg, index_reg);
-+  }
-+
-+  if (tmp_reg != NOREG) {
-+    add_d(tmp_reg, base_reg, tmp_reg);
-+    if (!disp_is_simm16) {
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    code_offset = offset();
-+    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
-+  } else {
-+    if (!disp_is_simm16) {
-+      tmp_reg = T4;
-+      assert_different_registers(tmp_reg, base_reg);
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    code_offset = offset();
-+    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
-+  }
-+
-+  return code_offset;
-+}
-+
++#ifdef COMPILER2
 +// Compare strings, used for char[] and byte[].
 +void MacroAssembler::string_compare(Register str1, Register str2,
 +                                    Register cnt1, Register cnt2, Register result,
@@ -35908,63 +35792,7 @@ index 0000000000..f8670f5081
 +
 +  bind(True);
 +}
-+
-+void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
-+  switch (type) {
-+    case T_DOUBLE:
-+      fld_d(dst_reg, tmp_reg, disp);
-+      break;
-+    case T_FLOAT:
-+      fld_s(dst_reg, tmp_reg, disp);
-+      break;
-+    default:
-+      ShouldNotReachHere();
-+  }
-+}
-+
-+int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
-+  int code_offset = 0;
-+  Register tmp_reg = T4;
-+  Register index_reg = addr.index();
-+  if (index_reg == NOREG) {
-+    tmp_reg = NOREG;
-+  }
-+
-+  int scale = addr.scale();
-+  if (tmp_reg != NOREG && scale >= 0) {
-+    slli_d(tmp_reg, index_reg, scale);
-+  }
-+
-+  int disp = addr.disp();
-+  bool disp_is_simm16 = true;
-+  if (!Assembler::is_simm16(disp)) {
-+    disp_is_simm16 = false;
-+  }
-+  Register base_reg = addr.base();
-+  if (tmp_reg != NOREG) {
-+    assert_different_registers(tmp_reg, base_reg, index_reg);
-+  }
-+
-+  if (tmp_reg != NOREG) {
-+    add_d(tmp_reg, base_reg, tmp_reg);
-+    if (!disp_is_simm16) {
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    code_offset = offset();
-+    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
-+  } else {
-+    if (!disp_is_simm16) {
-+      tmp_reg = T4;
-+      assert_different_registers(tmp_reg, base_reg);
-+      li(tmp_reg, disp);
-+      add_d(tmp_reg, base_reg, tmp_reg);
-+    }
-+    code_offset = offset();
-+    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
-+  }
-+  return code_offset;
-+}
++#endif // COMPILER2
 +
 +void MacroAssembler::load_byte_map_base(Register reg) {
 +  jbyte *byte_map_base =
@@ -36751,6 +36579,7 @@ index 0000000000..f8670f5081
 +  }
 +}
 +
++#ifdef COMPILER2
 +void MacroAssembler::reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode) {
 +  switch (type) {
 +    case T_BYTE:
@@ -36956,6 +36785,7 @@ index 0000000000..f8670f5081
 +    ShouldNotReachHere();
 +  }
 +}
++#endif // COMPILER2
 +
 +/**
 + * Emits code to update CRC-32 with a byte value according to constants in table
@@ -37105,6 +36935,7 @@ index 0000000000..f8670f5081
 +  bind(L_exit);
 +}
 +
++#ifdef COMPILER2
 +void MacroAssembler::cmp_branch_short(int flag, Register op1, Register op2, Label& L, bool is_signed) {
 +
 +    switch(flag) {
@@ -37192,6 +37023,7 @@ index 0000000000..f8670f5081
 +        Unimplemented();
 +    }
 +}
++#endif // COMPILER2
 +
 +void MacroAssembler::membar(Membar_mask_bits hint){
 +  address prev = pc() - NativeInstruction::sync_instruction_size;
@@ -37204,12 +37036,85 @@ index 0000000000..f8670f5081
 +    dbar(hint);
 +  }
 +}
++
++// Code for BigInteger::mulAdd intrinsic
++// out     = A0
++// in      = A1
++// offset  = A2  (already out.length-offset)
++// len     = A3
++// k       = A4
++//
++// pseudo code from java implementation:
++// long kLong = k & LONG_MASK;
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j = len - 1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++                             Register len, Register k) {
++  Label L_tail_loop, L_unroll, L_end;
++
++  move(SCR2, out);
++  move(out, R0); // should clear out
++  bge(R0, len, L_end);
++
++  alsl_d(offset, offset, SCR2, LogBytesPerInt - 1);
++  alsl_d(in, len, in, LogBytesPerInt - 1);
++
++  const int unroll = 16;
++  li(SCR2, unroll);
++  blt(len, SCR2, L_tail_loop);
++
++  bind(L_unroll);
++
++    addi_d(in, in, -unroll * BytesPerInt);
++    addi_d(offset, offset, -unroll * BytesPerInt);
++
++    for (int i = unroll - 1; i >= 0; i--) {
++      ld_wu(SCR1, in, i * BytesPerInt);
++      mulw_d_wu(SCR1, SCR1, k);
++      add_d(out, out, SCR1); // out as scratch
++      ld_wu(SCR1, offset, i * BytesPerInt);
++      add_d(SCR1, SCR1, out);
++      st_w(SCR1, offset, i * BytesPerInt);
++      srli_d(out, SCR1, 32); // keep carry
++    }
++
++    sub_w(len, len, SCR2);
++    bge(len, SCR2, L_unroll);
++
++  bge(R0, len, L_end); // check tail
++
++  bind(L_tail_loop);
++
++    addi_d(in, in, -BytesPerInt);
++    ld_wu(SCR1, in, 0);
++    mulw_d_wu(SCR1, SCR1, k);
++    add_d(out, out, SCR1); // out as scratch
++
++    addi_d(offset, offset, -BytesPerInt);
++    ld_wu(SCR1, offset, 0);
++    add_d(SCR1, SCR1, out);
++    st_w(SCR1, offset, 0);
++
++    srli_d(out, SCR1, 32); // keep carry
++
++    addi_w(len, len, -1);
++    blt(R0, len, L_tail_loop);
++
++  bind(L_end);
++}
++
 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
 new file mode 100644
-index 0000000000..48d4ad07fd
+index 0000000000..d49cf2e680
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.hpp
-@@ -0,0 +1,848 @@
+@@ -0,0 +1,820 @@
 +/*
 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -37235,8 +37140,8 @@ index 0000000000..48d4ad07fd
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
 +
 +#include "asm/assembler.hpp"
 +#include "runtime/rtmLocking.hpp"
@@ -37591,9 +37496,6 @@ index 0000000000..48d4ad07fd
 +  static void debug(char* msg/*, RegistersForDebugging* regs*/);
 +  static void debug64(char* msg, int64_t pc, int64_t regs[]);
 +
-+  void print_reg(Register reg);
-+  void print_reg(FloatRegister reg);
-+
 +  void untested()                                { stop("untested"); }
 +
 +  void unimplemented(const char* what = "");
@@ -37663,34 +37565,6 @@ index 0000000000..48d4ad07fd
 +#endif
 +
 +
-+  // Arithmetics
-+  // Regular vs. d* versions
-+#if 0
-+  inline void addu_long(Register rd, Register rs, Register rt) {
-+  //TODO: LA
-+  guarantee(0, "LA not implemented yet");
-+    add_d(rd, rs, rt);
-+  }
-+#endif
-+  inline void addu_long(Register rd, Register rs, long imm32_64) {
-+  //TODO: LA
-+  guarantee(0, "LA not implemented yet");
-+#if 0
-+    addi_d(rd, rs, imm32_64);
-+#endif
-+  }
-+
-+  void round_to(Register reg, int modulus) {
-+  //TODO: LA
-+  guarantee(0, "LA not implemented yet");
-+#if 0
-+    assert_different_registers(reg, AT);
-+    increment(reg, modulus - 1);
-+    move(AT, - modulus);
-+    andr(reg, reg, AT);
-+#endif
-+  }
-+
 +  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
 +  void increment(Register reg, int imm);
 +  void decrement(Register reg, int imm);
@@ -37764,6 +37638,9 @@ index 0000000000..48d4ad07fd
 +  void patchable_call(address target, address call_size = 0);
 +
 +  // Floating
++  void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi,
++                          address pio2, address dsin_coef, address dcos_coef);
++
 +  // Data
 +
 +  // Load and store values by size and signed-ness
@@ -37799,10 +37676,6 @@ index 0000000000..48d4ad07fd
 +  // convert big endian integer to little endian integer
 +  void swap(Register reg);
 +
-+  // implement the x86 instruction semantic
-+  // if c_reg == *dest then *dest <= x_reg
-+  // else c_reg <= *dest
-+  // the AT indicate if xchg occurred, 1 for xchged, else  0
 +  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
 +               bool retold, bool barrier);
 +  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
@@ -37812,7 +37685,6 @@ index 0000000000..48d4ad07fd
 +  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
 +                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
 +
-+  //pop & push, added by aoqi
 +  void extend_sign(Register rh, Register rl) { /*stop("extend_sign");*/ guarantee(0, "LA not implemented yet");}
 +  void neg(Register reg) { /*dsubu(reg, R0, reg);*/ guarantee(0, "LA not implemented yet");}
 +  void push (Register reg)      { addi_d(SP, SP, -8); st_d  (reg, SP, 0); }
@@ -37839,15 +37711,6 @@ index 0000000000..48d4ad07fd
 +  void mov_metadata(Register dst, Metadata* obj);
 +  void mov_metadata(Address dst, Metadata* obj);
 +
-+  void store_for_type_by_register(Register src_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
-+  void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type);
-+  void store_for_type(Register src_reg,      Address addr, BasicType type = T_INT, bool wide = false);
-+  void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT);
-+  void load_for_type_by_register(Register dst_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
-+  void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type);
-+  int load_for_type(Register dst_reg,      Address addr, BasicType type = T_INT, bool wide = false);
-+  int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT);
-+
 +  // Load the base of the cardtable byte map into reg.
 +  void load_byte_map_base(Register reg);
 +
@@ -37872,6 +37735,7 @@ index 0000000000..48d4ad07fd
 +  //FIXME
 +  void empty_FPU_stack(){/*need implemented*/};
 +
++#ifdef COMPILER2
 +  // Compare strings.
 +  void string_compare(Register str1, Register str2,
 +                      Register cnt1, Register cnt2, Register result,
@@ -37881,12 +37745,13 @@ index 0000000000..48d4ad07fd
 +  void arrays_equals(Register str1, Register str2,
 +                     Register cnt, Register tmp1, Register tmp2, Register result,
 +                     bool is_char);
++#endif
 +
 +  // method handles (JSR 292)
 +  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
 +
 +
-+// LA added:
++  // LA added:
 +  void jr  (Register reg)        { jirl(R0, reg, 0); }
 +  void jalr(Register reg)        { jirl(RA, reg, 0); }
 +  void nop ()                    { andi(R0, R0, 0); }
@@ -37949,6 +37814,10 @@ index 0000000000..48d4ad07fd
 +    code()->clear_last_insn();
 +  }
 +
++  // Code for java.math.BigInteger::mulAdd intrinsic.
++  void mul_add(Register out, Register in, Register offset,
++               Register len, Register k);
++
 +#undef VIRTUAL
 +
 +public:
@@ -37995,8 +37864,10 @@ index 0000000000..48d4ad07fd
 +    loadstore_t(reg, base, index, scale, disp, type);
 +  }
 +
++#ifdef COMPILER2
 +  void reduce(Register dst, Register src, FloatRegister vsrc, FloatRegister tmp1, FloatRegister tmp2, BasicType type, int opcode, int vector_size);
 +  void reduce(FloatRegister dst, FloatRegister src, FloatRegister vsrc, FloatRegister tmp, BasicType type, int opcode, int vector_size);
++#endif
 +
 +private:
 +  template <typename T>
@@ -38013,9 +37884,15 @@ index 0000000000..48d4ad07fd
 +  void loadstore(FloatRegister reg, Register base, int disp, int type);
 +  void loadstore(FloatRegister reg, Register base, Register disp, int type);
 +
++#ifdef COMPILER2
 +  void reduce_ins_v(FloatRegister vec1, FloatRegister vec2, FloatRegister vec3, BasicType type, int opcode);
 +  void reduce_ins_r(Register reg1, Register reg2, Register reg3, BasicType type, int opcode);
 +  void reduce_ins_f(FloatRegister reg1, FloatRegister reg2, FloatRegister reg3, BasicType type, int opcode);
++#endif
++  void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef);
++  void generate_kernel_cos(FloatRegister x, address dcos_coef);
++  void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2);
++  void generate__kernel_rem_pio2(address two_over_pi, address pio2);
 +};
 +
 +/**
@@ -38057,16 +37934,16 @@ index 0000000000..48d4ad07fd
 +  Label _branches;
 +};
 +
-+#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp
 new file mode 100644
-index 0000000000..f98d93174f
+index 0000000000..49302590c3
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch.inline.hpp
 @@ -0,0 +1,34 @@
 +/*
 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2017, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -38089,24 +37966,1655 @@ index 0000000000..f98d93174f
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
-+#define CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++#ifndef CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
 +
 +#include "asm/assembler.inline.hpp"
 +#include "asm/macroAssembler.hpp"
 +#include "asm/codeBuffer.hpp"
 +#include "code/codeCache.hpp"
 +
-+#endif // CPU_LOONGARCH_VM_MACROASSEMBLER_LOONGARCH_INLINE_HPP
++#endif // CPU_LOONGARCH_MACROASSEMBLER_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
+new file mode 100644
+index 0000000000..3ed4c36651
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/macroAssembler_loongarch_trig.cpp
+@@ -0,0 +1,1625 @@
++/* Copyright (c) 2018, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "macroAssembler_loongarch.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T4 RT4
++#define T5 RT5
++#define T6 RT6
++#define T7 RT7
++#define T8 RT8
++
++// The following code is a optimized version of fdlibm sin/cos implementation
++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for LOONGARCH64.
++
++// Please refer to sin/cos approximation via polynomial and
++// trigonometric argument reduction techniques to the following literature:
++//
++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin,
++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond,
++// Nathalie Revol, Damien Stehlé, and Serge Torres:
++// Handbook of floating-point arithmetic.
++// Springer Science & Business Media, 2009.
++// [2] K. C. Ng
++// Argument Reduction for Huge Arguments: Good to the Last Bit
++// July 13, 1992, SunPro
++//
++// HOW TO READ THIS CODE:
++// This code consists of several functions. Each function has following header:
++// 1) Description
++// 2) C-pseudo code with differences from fdlibm marked by comments starting
++//        with "NOTE". Check unmodified fdlibm code in
++//        share/runtime/SharedRuntimeTrig.cpp
++// 3) Brief textual description of changes between fdlibm and current
++//        implementation along with optimization notes (if applicable)
++// 4) Assumptions, input and output
++// 5) (Optional) additional notes about intrinsic implementation
++// Each function is separated in blocks which follow the pseudo-code structure
++//
++// HIGH-LEVEL ALGORITHM DESCRIPTION:
++//    - entry point: generate_dsin_dcos(...);
++//    - check corner cases: NaN, INF, tiny argument.
++//    - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos)
++//    -- else proceed to argument reduction routine (__ieee754_rem_pio2) and
++//           use reduced argument to get result via kernel_sin/kernel_cos
++//
++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM:
++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version
++// has these int values converted to double representation to load converted
++// double values directly (see stubRoutines_aarch4::_two_over_pi)
++// 2) Several loops are unrolled and vectorized: see comments in code after
++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2
++// 3) fdlibm npio2_hw table now has "prefix" with constants used in
++// calculation. These constants are loaded from npio2_hw table instead of
++// constructing it in code (see stubRoutines_loongarch64.cpp)
++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef
++// and cos_coef to use the same optimization as in 3). It allows to load most of
++// required constants via single instruction
++//
++//
++//
++///* __ieee754_rem_pio2(x,y)
++// *
++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2)
++// * x is input argument, y[] is hi and low parts of reduced argument (x)
++// * uses __kernel_rem_pio2()
++// */
++// // use tables(see stubRoutines_loongarch64.cpp): two_over_pi and modified npio2_hw
++//
++// BEGIN __ieee754_rem_pio2 PSEUDO CODE
++//
++//static int __ieee754_rem_pio2(double x, double *y) {
++//  double z,w,t,r,fn;
++//  double tx[3];
++//  int e0,i,j,nx,n,ix,hx,i0;
++//
++//  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
++//  hx = *(i0+(int*)&x);          /* high word of x */
++//  ix = hx&0x7fffffff;
++//  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
++//    if(hx>0) {
++//      z = x - pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z - pio2_1t;
++//        y[1] = (z-y[0])-pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z -= pio2_2;
++//        y[0] = z - pio2_2t;
++//        y[1] = (z-y[0])-pio2_2t;
++//      }
++//      return 1;
++//    } else {    /* negative x */
++//      z = x + pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z + pio2_1t;
++//        y[1] = (z-y[0])+pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z += pio2_2;
++//        y[0] = z + pio2_2t;
++//        y[1] = (z-y[0])+pio2_2t;
++//      }
++//      return -1;
++//    }
++//  }
++//  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
++//    t  = fabsd(x);
++//    n  = (int) (t*invpio2+half);
++//    fn = (double)n;
++//    r  = t-fn*pio2_1;
++//    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
++//    // NOTE: y[0] = r-w; is moved from if/else below to be before "if"
++//    y[0] = r-w;
++//    if(n<32&&ix!=npio2_hw[n-1]) {
++//      // y[0] = r-w;       /* quick check no cancellation */ // NOTE: moved earlier
++//    } else {
++//      j  = ix>>20;
++//      // y[0] = r-w; // NOTE: moved earlier
++//      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//      if(i>16) {  /* 2nd iteration needed, good to 118 */
++//        t  = r;
++//        w  = fn*pio2_2;
++//        r  = t-w;
++//        w  = fn*pio2_2t-((t-r)-w);
++//        y[0] = r-w;
++//        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
++//          t  = r;       /* will cover all possible cases */
++//          w  = fn*pio2_3;
++//          r  = t-w;
++//          w  = fn*pio2_3t-((t-r)-w);
++//          y[0] = r-w;
++//        }
++//      }
++//    }
++//    y[1] = (r-y[0])-w;
++//    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//    else         return n;
++//  }
++//  /*
++//   * all other (large) arguments
++//   */
++//  // NOTE: this check is removed, because it was checked in dsin/dcos
++//  // if(ix>=0x7ff00000) {          /* x is inf or NaN */
++//  //  y[0]=y[1]=x-x; return 0;
++//  // }
++//  /* set z = scalbn(|x|,ilogb(x)-23) */
++//  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
++//  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
++//  *(i0+(int*)&z) = ix - (e0<<20);
++//
++//  // NOTE: "for" loop below in unrolled. See comments in asm code
++//  for(i=0;i<2;i++) {
++//    tx[i] = (double)((int)(z));
++//    z     = (z-tx[i])*two24A;
++//  }
++//
++//  tx[2] = z;
++//  nx = 3;
++//
++//  // NOTE: while(tx[nx-1]==zeroA) nx--;  is unrolled. See comments in asm code
++//  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
++//
++//  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
++//  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//  return n;
++//}
++//
++// END __ieee754_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
++//     1. INF/NaN check for huge argument is removed in comparison with fdlibm
++//     code, because this check is already done in dcos/dsin code
++//     2. Most constants are now loaded from table instead of direct initialization
++//     3. Two loops are unrolled
++// Assumptions:
++//     1. Assume |X| >= PI/4
++//     2. Assume SCR1 = 0x3fe921fb00000000  (~ PI/4)
++//     3. Assume ix = A3
++// Input and output:
++//     1. Input: X = A0
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2) {
++  const int64_t PIO2_1t = 0x3DD0B4611A626331ULL;
++  const int64_t PIO2_2  = 0x3DD0B4611A600000ULL;
++  const int64_t PIO2_2t = 0x3BA3198A2E037073ULL;
++  Label X_IS_NEGATIVE, X_IS_MEDIUM_OR_LARGE, X_IS_POSITIVE_LONG_PI, LARGE_ELSE,
++        REDUCTION_DONE, X_IS_MEDIUM_BRANCH_DONE, X_IS_LARGE, NX_SET,
++        X_IS_NEGATIVE_LONG_PI;
++  Register X = A0, n = A2, ix = A3, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v24 = FT8, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v31 = FT15;
++
++  push2(S0, S1);
++
++    // initializing constants first
++    li(SCR1, 0x3ff921fb54400000); // PIO2_1
++    li(SCR2, 0x4002d97c); // 3*PI/4 high word
++    movgr2fr_d(v1, SCR1); // v1 = PIO2_1
++    bge(ix, SCR2, X_IS_MEDIUM_OR_LARGE);
++
++    block_comment("if(ix<0x4002d97c) {...  /* |x| ~< 3pi/4 */ "); {
++      blt(X, R0, X_IS_NEGATIVE);
++
++      block_comment("if(hx>0) {"); {
++        fsub_d(v2, v0, v1); // v2 = z = x - pio2_1
++        srli_d(SCR1, SCR1, 32);
++        li(n, 1);
++        beq(ix, SCR1, X_IS_POSITIVE_LONG_PI);
++
++        block_comment("case: hx > 0 &&  ix!=0x3ff921fb {"); { /* 33+53 bit pi is good enough */
++          li(SCR2, PIO2_1t);
++          movgr2fr_d(v27, SCR2);
++          fsub_d(v4, v2, v27); // v4 = y[0] = z - pio2_1t;
++          fsub_d(v5, v2, v4);
++          fsub_d(v5, v5, v27); // v5 = y[1] = (z-y[0])-pio2_1t
++          b(REDUCTION_DONE);
++        }
++
++        block_comment("case: hx > 0 &*& ix==0x3ff921fb {"); { /* near pi/2, use 33+33+53 bit pi */
++          bind(X_IS_POSITIVE_LONG_PI);
++            li(SCR1, PIO2_2);
++            li(SCR2, PIO2_2t);
++            movgr2fr_d(v27, SCR1);
++            movgr2fr_d(v6, SCR2);
++            fsub_d(v2, v2, v27); // z-= pio2_2
++            fsub_d(v4, v2, v6);  // y[0] = z - pio2_2t
++            fsub_d(v5, v2, v4);
++            fsub_d(v5, v5, v6);  // v5 = (z - y[0]) - pio2_2t
++            b(REDUCTION_DONE);
++        }
++      }
++
++      block_comment("case: hx <= 0)"); {
++        bind(X_IS_NEGATIVE);
++          fadd_d(v2, v0, v1); // v2 = z = x + pio2_1
++          srli_d(SCR1, SCR1, 32);
++          li(n, -1);
++          beq(ix, SCR1, X_IS_NEGATIVE_LONG_PI);
++
++          block_comment("case: hx <= 0 && ix!=0x3ff921fb) {"); { /* 33+53 bit pi is good enough */
++            li(SCR2, PIO2_1t);
++            movgr2fr_d(v27, SCR2);
++            fadd_d(v4, v2, v27); // v4 = y[0] = z + pio2_1t;
++            fsub_d(v5, v2, v4);
++            fadd_d(v5, v5, v27); // v5 = y[1] = (z-y[0]) + pio2_1t
++            b(REDUCTION_DONE);
++          }
++
++          block_comment("case: hx <= 0 && ix==0x3ff921fb"); { /* near pi/2, use 33+33+53 bit pi */
++            bind(X_IS_NEGATIVE_LONG_PI);
++              li(SCR1, PIO2_2);
++              li(SCR2, PIO2_2t);
++              movgr2fr_d(v27, SCR1);
++              movgr2fr_d(v6, SCR2);
++              fadd_d(v2, v2, v27); // z += pio2_2
++              fadd_d(v4, v2, v6);  // y[0] = z + pio2_2t
++              fsub_d(v5, v2, v4);
++              fadd_d(v5, v5, v6);  // v5 = (z - y[0]) + pio2_2t
++              b(REDUCTION_DONE);
++          }
++      }
++  }
++  bind(X_IS_MEDIUM_OR_LARGE);
++    li(SCR1, 0x413921fb);
++    blt(SCR1, ix, X_IS_LARGE); // ix < = 0x413921fb ?
++
++    block_comment("|x| ~<= 2^19*(pi/2), medium size"); {
++      li(ih, npio2_hw);
++      fld_d(v4, ih, 0);
++      fld_d(v5, ih, 8);
++      fld_d(v6, ih, 16);
++      fld_d(v7, ih, 24);
++      fabs_d(v31, v0);           // v31 = t = |x|
++      addi_d(ih, ih, 64);
++      fmadd_d(v2, v31, v5, v4);  // v2 = t * invpio2 + half (invpio2 = 53 bits of 2/pi, half = 0.5)
++      ftintrz_w_d(vt, v2);       // n = (int) v2
++      movfr2gr_s(n, vt);
++      vfrintrz_d(v2, v2);
++      fnmsub_d(v3, v2, v6, v31); // v3 = r = t - fn * pio2_1
++      fmul_d(v26, v2, v7);       // v26 = w = fn * pio2_1t
++      fsub_d(v4, v3, v26);       // y[0] = r - w. Calculated before branch
++      li(SCR1, 32);
++      blt(SCR1, n, LARGE_ELSE);
++      addi_w(tmp5, n, -1);       // tmp5 = n - 1
++      alsl_d(tmp5, tmp5, ih, 2 - 1);
++      ld_w(jv, tmp5, 0);
++      bne(ix, jv, X_IS_MEDIUM_BRANCH_DONE);
++
++      block_comment("else block for if(n<32&&ix!=npio2_hw[n-1])"); {
++        bind(LARGE_ELSE);
++          movfr2gr_d(jx, v4);
++          srli_d(tmp5, ix, 20);                    // j = ix >> 20
++          slli_d(jx, jx, 1);
++          srli_d(tmp3, jx, 32 + 20 + 1);           // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++          sub_d(tmp3, tmp5, tmp3);
++
++          block_comment("if(i>16)"); {
++            li(SCR1, 16);
++            bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++            // i > 16. 2nd iteration needed
++            fld_d(v6, ih, -32);
++            fld_d(v7, ih, -24);
++            fmov_d(v28, v3);                        // t = r
++            fmul_d(v29, v2, v6);                    // w = v29 = fn * pio2_2
++            fsub_d(v3, v28, v29);                   // r = t - w
++            fsub_d(v31, v28, v3);                   // v31 = (t - r)
++            fsub_d(v31, v29, v31);                  // v31 = w - (t - r) = - ((t - r) - w)
++            fmadd_d(v26, v2, v7, v31);              // v26 = w = fn*pio2_2t - ((t - r) - w)
++            fsub_d(v4, v3, v26);                    // y[0] = r - w
++            movfr2gr_d(jx, v4);
++            slli_d(jx, jx, 1);
++            srli_d(tmp3, jx, 32 + 20 + 1);          // r7 = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++            sub_d(tmp3, tmp5, tmp3);
++
++            block_comment("if(i>49)"); {
++              li(SCR1, 49);
++              bge(SCR1, tmp3, X_IS_MEDIUM_BRANCH_DONE);
++              // 3rd iteration need, 151 bits acc
++              fld_d(v6, ih, -16);
++              fld_d(v7, ih, -8);
++              fmov_d(v28, v3);                      // save "r"
++              fmul_d(v29, v2, v6);                  // v29 = fn * pio2_3
++              fsub_d(v3, v28, v29);                 // r = r - w
++              fsub_d(v31, v28, v3);                 // v31 = (t - r)
++              fsub_d(v31, v29, v31);                // v31 = w - (t - r) = - ((t - r) - w)
++              fmadd_d(v26, v2, v7, v31);            // v26 = w = fn*pio2_3t - ((t - r) - w)
++              fsub_d(v4, v3, v26);                  // y[0] = r - w
++            }
++          }
++      }
++    block_comment("medium x tail"); {
++      bind(X_IS_MEDIUM_BRANCH_DONE);
++        fsub_d(v5, v3, v4);                         // v5 = y[1] = (r - y[0])
++        fsub_d(v5, v5, v26);                        // v5 = y[1] = (r - y[0]) - w
++        blt(R0, X, REDUCTION_DONE);
++        fneg_d(v4, v4);
++        sub_w(n, R0, n);
++        fneg_d(v5, v5);
++        b(REDUCTION_DONE);
++    }
++  }
++
++  block_comment("all other (large) arguments"); {
++    bind(X_IS_LARGE);
++      srli_d(SCR1, ix, 20);                        // ix >> 20
++      li(tmp5, 0x4170000000000000);
++      addi_w(SCR1, SCR1, -1046);                   // e0
++      movgr2fr_d(v24, tmp5);                       // init two24A value
++      slli_w(jv, SCR1, 20);                        // ix - (e0<<20)
++      sub_w(jv, ix, jv);
++      slli_d(jv, jv, 32);
++      addi_w(SCR2, SCR1, -3);
++      bstrins_d(jv, X, 31, 0);                     // jv = z
++      li(i, 24);
++      movgr2fr_d(v26, jv);                         // v26 = z
++
++      block_comment("unrolled for(i=0;i<2;i++) {tx[i] = (double)((int)(z));z = (z-tx[i])*two24A;}"); {
++        // tx[0,1,2] = v6,v7,v26
++        vfrintrz_d(v6, v26);                       // v6 = (double)((int)v26)
++        div_w(jv, SCR2, i);                        // jv = (e0 - 3)/24
++        fsub_d(v26, v26, v6);
++        addi_d(SP, SP, -560);
++        fmul_d(v26, v26, v24);
++        vfrintrz_d(v7, v26);                       // v7 = (double)((int)v26)
++        li(jx, 2); // calculate jx as nx - 1, which is initially 2. Not a part of unrolled loop
++        fsub_d(v26, v26, v7);
++      }
++
++      block_comment("nx calculation with unrolled while(tx[nx-1]==zeroA) nx--;"); {
++        vxor_v(vt, vt, vt);
++        fcmp_cne_d(FCC0, v26, vt);                 // if NE then jx == 2. else it's 1 or 0
++        addi_d(iqBase, SP, 480);                   // base of iq[]
++        fmul_d(v3, v26, v24);
++        bcnez(FCC0, NX_SET);
++        fcmp_cne_d(FCC0, v7, vt);                  // v7 == 0 => jx = 0. Else jx = 1
++        movcf2gr(jx, FCC0);
++      }
++    bind(NX_SET);
++      generate__kernel_rem_pio2(two_over_pi, pio2);
++      // now we have y[0] = v4, y[1] = v5 and n = r2
++      bge(X, R0, REDUCTION_DONE);
++      fneg_d(v4, v4);
++      fneg_d(v5, v5);
++      sub_w(n, R0, n);
++  }
++  bind(REDUCTION_DONE);
++
++  pop2(S0, S1);
++}
++
++///*
++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
++// * double x[],y[]; int e0,nx,prec; int ipio2[];
++// *
++// * __kernel_rem_pio2 return the last three digits of N with
++// *              y = x - N*pi/2
++// * so that |y| < pi/2.
++// *
++// * The method is to compute the integer (mod 8) and fraction parts of
++// * (2/pi)*x without doing the full multiplication. In general we
++// * skip the part of the product that are known to be a huge integer (
++// * more accurately, = 0 mod 8 ). Thus the number of operations are
++// * independent of the exponent of the input.
++// *
++// * NOTE: 2/pi int representation is converted to double
++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[].
++// *
++// * Input parameters:
++// *      x[]     The input value (must be positive) is broken into nx
++// *              pieces of 24-bit integers in double precision format.
++// *              x[i] will be the i-th 24 bit of x. The scaled exponent
++// *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
++// *              match x's up to 24 bits.
++// *
++// *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
++// *                      e0 = ilogb(z)-23
++// *                      z  = scalbn(z,-e0)
++// *              for i = 0,1,2
++// *                      x[i] = floor(z)
++// *                      z    = (z-x[i])*2**24
++// *
++// *
++// *      y[]     ouput result in an array of double precision numbers.
++// *              The dimension of y[] is:
++// *                      24-bit  precision       1
++// *                      53-bit  precision       2
++// *                      64-bit  precision       2
++// *                      113-bit precision       3
++// *              The actual value is the sum of them. Thus for 113-bit
++// *              precsion, one may have to do something like:
++// *
++// *              long double t,w,r_head, r_tail;
++// *              t = (long double)y[2] + (long double)y[1];
++// *              w = (long double)y[0];
++// *              r_head = t+w;
++// *              r_tail = w - (r_head - t);
++// *
++// *      e0      The exponent of x[0]
++// *
++// *      nx      dimension of x[]
++// *
++// *      prec    an interger indicating the precision:
++// *                      0       24  bits (single)
++// *                      1       53  bits (double)
++// *                      2       64  bits (extended)
++// *                      3       113 bits (quad)
++// *
++// *      NOTE: ipio2[] array below is converted to double representation
++// *      //ipio2[]
++// *      //        integer array, contains the (24*i)-th to (24*i+23)-th
++// *      //        bit of 2/pi after binary point. The corresponding
++// *      //        floating value is
++// *
++// *                      ipio2[i] * 2^(-24(i+1)).
++// *
++// * Here is the description of some local variables:
++// *
++// *      jk      jk+1 is the initial number of terms of ipio2[] needed
++// *              in the computation. The recommended value is 2,3,4,
++// *              6 for single, double, extended,and quad.
++// *
++// *      jz      local integer variable indicating the number of
++// *              terms of ipio2[] used.
++// *
++// *      jx      nx - 1
++// *
++// *      jv      index for pointing to the suitable ipio2[] for the
++// *              computation. In general, we want
++// *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
++// *              is an integer. Thus
++// *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
++// *              Hence jv = max(0,(e0-3)/24).
++// *
++// *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
++// *
++// *      q[]     double array with integral value, representing the
++// *              24-bits chunk of the product of x and 2/pi.
++// *
++// *      q0      the corresponding exponent of q[0]. Note that the
++// *              exponent for q[i] would be q0-24*i.
++// *
++// *      PIo2[]  double precision array, obtained by cutting pi/2
++// *              into 24 bits chunks.
++// *
++// *      f[]     ipio2[] in floating point
++// *
++// *      iq[]    integer array by breaking up q[] in 24-bits chunk.
++// *
++// *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
++// *
++// *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
++// *              it also indicates the *sign* of the result.
++// *
++// */
++//
++// Use PIo2 table(see stubRoutines_loongarch64.cpp)
++//
++// BEGIN __kernel_rem_pio2 PSEUDO CODE
++//
++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) {
++//  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
++//  double z,fw,f[20],fq[20],q[20];
++//
++//  /* initialize jk*/
++//  // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4.
++//  jp = jk; // NOTE: always 4
++//
++//  /* determine jx,jv,q0, note that 3>q0 */
++//  jx =  nx-1;
++//  jv = (e0-3)/24; if(jv<0) jv=0;
++//  q0 =  e0-24*(jv+1);
++//
++//  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
++//  j = jv-jx; m = jx+jk;
++//
++//  // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It
++//  //       allows the use of wider loads/stores
++//  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j];
++//
++//  // NOTE: unrolled and vectorized "for". See comments in asm code
++//  /* compute q[0],q[1],...q[jk] */
++//  for (i=0;i<=jk;i++) {
++//    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++//  }
++//
++//  jz = jk;
++//recompute:
++//  /* distill q[] into iq[] reversingly */
++//  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++//    fw    =  (double)((int)(twon24* z));
++//    iq[i] =  (int)(z-two24B*fw);
++//    z     =  q[j-1]+fw;
++//  }
++//
++//  /* compute n */
++//  z  = scalbnA(z,q0);           /* actual value of z */
++//  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
++//  n  = (int) z;
++//  z -= (double)n;
++//  ih = 0;
++//  if(q0>0) {    /* need iq[jz-1] to determine n */
++//    i  = (iq[jz-1]>>(24-q0)); n += i;
++//    iq[jz-1] -= i<<(24-q0);
++//    ih = iq[jz-1]>>(23-q0);
++//  }
++//  else if(q0==0) ih = iq[jz-1]>>23;
++//  else if(z>=0.5) ih=2;
++//
++//  if(ih>0) {    /* q > 0.5 */
++//    n += 1; carry = 0;
++//    for(i=0;i<jz ;i++) {        /* compute 1-q */
++//      j = iq[i];
++//      if(carry==0) {
++//        if(j!=0) {
++//          carry = 1; iq[i] = 0x1000000- j;
++//        }
++//      } else  iq[i] = 0xffffff - j;
++//    }
++//    if(q0>0) {          /* rare case: chance is 1 in 12 */
++//      switch(q0) {
++//      case 1:
++//        iq[jz-1] &= 0x7fffff; break;
++//      case 2:
++//        iq[jz-1] &= 0x3fffff; break;
++//      }
++//    }
++//    if(ih==2) {
++//      z = one - z;
++//      if(carry!=0) z -= scalbnA(one,q0);
++//    }
++//  }
++//
++//  /* check if recomputation is needed */
++//  if(z==zeroB) {
++//    j = 0;
++//    for (i=jz-1;i>=jk;i--) j |= iq[i];
++//    if(j==0) { /* need recomputation */
++//      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
++//
++//      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
++//        f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++//        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++//        q[i] = fw;
++//      }
++//      jz += k;
++//      goto recompute;
++//    }
++//  }
++//
++//  /* chop off zero terms */
++//  if(z==0.0) {
++//    jz -= 1; q0 -= 24;
++//    while(iq[jz]==0) { jz--; q0-=24;}
++//  } else { /* break z into 24-bit if necessary */
++//    z = scalbnA(z,-q0);
++//    if(z>=two24B) {
++//      fw = (double)((int)(twon24*z));
++//      iq[jz] = (int)(z-two24B*fw);
++//      jz += 1; q0 += 24;
++//      iq[jz] = (int) fw;
++//    } else iq[jz] = (int) z ;
++//  }
++//
++//  /* convert integer "bit" chunk to floating-point value */
++//  fw = scalbnA(one,q0);
++//  for(i=jz;i>=0;i--) {
++//    q[i] = fw*(double)iq[i]; fw*=twon24;
++//  }
++//
++//  /* compute PIo2[0,...,jp]*q[jz,...,0] */
++//  for(i=jz;i>=0;i--) {
++//    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++//    fq[jz-i] = fw;
++//  }
++//
++//  // NOTE: switch below is eliminated, because prec is always 2 for doubles
++//  /* compress fq[] into y[] */
++//  //switch(prec) {
++//  //case 0:
++//  //  fw = 0.0;
++//  //  for (i=jz;i>=0;i--) fw += fq[i];
++//  //  y[0] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 1:
++//  //case 2:
++//    fw = 0.0;
++//    for (i=jz;i>=0;i--) fw += fq[i];
++//    y[0] = (ih==0)? fw: -fw;
++//    fw = fq[0]-fw;
++//    for (i=1;i<=jz;i++) fw += fq[i];
++//    y[1] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 3:       /* painful */
++//  //  for (i=jz;i>0;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  // fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (i=jz;i>1;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  //    fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
++//  //  if(ih==0) {
++//  //    y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
++//  //  } else {
++//  //    y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
++//  //  }
++//  //}
++//  return n&7;
++//}
++//
++// END __kernel_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. One loop is unrolled and vectorized (see comments in code)
++//     2. One loop is split into 2 loops (see comments in code)
++//     3. Non-double code is removed(last switch). Sevaral variables became
++//         constants because of that (see comments in code)
++//     4. Use of jx, which is nx-1 instead of nx
++// Assumptions:
++//     1. Assume |X| >= PI/4
++// Input and output:
++//     1. Input: X = A0, jx == nx - 1 == A6, e0 == SCR1
++//     2. Return n in A2, y[0] == y0 == FA4, y[1] == y1 == FA5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) {
++  Label Q_DONE, JX_IS_0, JX_IS_2, COMP_INNER_LOOP, RECOMP_FOR2, Q0_ZERO_CMP_LT,
++        RECOMP_CHECK_DONE_NOT_ZERO, Q0_ZERO_CMP_DONE, COMP_FOR, Q0_ZERO_CMP_EQ,
++        INIT_F_ZERO, RECOMPUTE, IH_FOR_INCREMENT, IH_FOR_STORE, RECOMP_CHECK_DONE,
++        Z_IS_LESS_THAN_TWO24B, Z_IS_ZERO, FW_Y1_NO_NEGATION,
++        RECOMP_FW_UPDATED, Z_ZERO_CHECK_DONE, FW_FOR1, IH_AFTER_SWITCH, IH_HANDLED,
++        CONVERTION_FOR, FW_Y0_NO_NEGATION, FW_FOR1_DONE, FW_FOR2, FW_FOR2_DONE,
++        IH_FOR, SKIP_F_LOAD, RECOMP_FOR1, RECOMP_FIRST_FOR, INIT_F_COPY,
++        RECOMP_FOR1_CHECK;
++  Register tmp2 = A1, n = A2, jv = A4, tmp5 = A5, jx = A6,
++           tmp3 = A7, iqBase = T0, ih = T1, i = T2, tmp1 = T3,
++           jz = S0, j = T5, twoOverPiBase = T6, tmp4 = S1, qBase = T8;
++  FloatRegister v0 = FA0, v1 = FA1, v2 = FA2, v3 = FA3, v4 = FA4, v5 = FA5, v6 = FA6, v7 = FA7,
++                vt = FT1, v17 = FT2, v18 = FT3, v19 = FT4, v20 = FT5, v21 = FT6, v22 = FT7, v24 = FT8,
++                v25 = FT9, v26 = FT10, v27 = FT11, v28 = FT12, v29 = FT13, v30 = FT14, v31 = FT15;
++    // jp = jk == init_jk[prec] = init_jk[2] == {2,3,4,6}[2] == 4
++    // jx = nx - 1
++    li(twoOverPiBase, two_over_pi);
++    slti(SCR2, jv, 0);
++    addi_w(tmp4, jx, 4); // tmp4 = m = jx + jk = jx + 4. jx is in {0,1,2} so m is in [4,5,6]
++    masknez(jv, jv, SCR2);
++    if (UseLASX)
++      xvxor_v(v26, v26, v26);
++    else
++      vxor_v(v26, v26, v26);
++    addi_w(tmp5, jv, 1);                    // jv+1
++    sub_w(j, jv, jx);
++    addi_d(qBase, SP, 320);                 // base of q[]
++    mul_w(SCR2, i, tmp5);                   // q0 =  e0-24*(jv+1)
++    sub_w(SCR1, SCR1, SCR2);
++    // use double f[20], fq[20], q[20], iq[20] on stack, which is
++    // (20 + 20 + 20) x 8 + 20 x 4 = 560 bytes. From lower to upper addresses it
++    // will contain f[20], fq[20], q[20], iq[20]
++    // now initialize f[20] indexes 0..m (inclusive)
++    // for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];
++    move(tmp5, SP);
++
++    block_comment("for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; // (double) ipio2[j];"); {
++        xorr(i, i, i);
++        bge(j, R0, INIT_F_COPY);
++      bind(INIT_F_ZERO);
++        if (UseLASX) {
++          xvst(v26, tmp5, 0);
++        } else {
++          vst(v26, tmp5, 0);
++          vst(v26, tmp5, 16);
++        }
++        addi_d(tmp5, tmp5, 32);
++        addi_w(i, i, 4);
++        addi_w(j, j, 4);
++        blt(j, R0, INIT_F_ZERO);
++        sub_w(i, i, j);
++        move(j, R0);
++      bind(INIT_F_COPY);
++        alsl_d(tmp1, j, twoOverPiBase, 3 - 1); // ipio2[j] start address
++        if (UseLASX) {
++          xvld(v18, tmp1, 0);
++          xvld(v19, tmp1, 32);
++        } else {
++          vld(v18, tmp1, 0);
++          vld(v19, tmp1, 16);
++          vld(v20, tmp1, 32);
++          vld(v21, tmp1, 48);
++        }
++        alsl_d(tmp5, i, SP, 3 - 1);
++        if (UseLASX) {
++          xvst(v18, tmp5, 0);
++          xvst(v19, tmp5, 32);
++        } else {
++          vst(v18, tmp5, 0);
++          vst(v19, tmp5, 16);
++          vst(v20, tmp5, 32);
++          vst(v21, tmp5, 48);
++        }
++    }
++    // v18..v21 can actually contain f[0..7]
++    beqz(i, SKIP_F_LOAD); // i == 0 => f[i] == f[0] => already loaded
++    if (UseLASX) {
++      xvld(v18, SP, 0);   // load f[0..7]
++      xvld(v19, SP, 32);
++    } else {
++      vld(v18, SP, 0);    // load f[0..7]
++      vld(v19, SP, 16);
++      vld(v20, SP, 32);
++      vld(v21, SP, 48);
++    }
++  bind(SKIP_F_LOAD);
++    // calculate 2^q0 and 2^-q0, which we'll need further.
++    // q0 is exponent. So, calculate biased exponent(q0+1023)
++    sub_w(tmp4, R0, SCR1);
++    addi_w(tmp5, SCR1, 1023);
++    addi_w(tmp4, tmp4, 1023);
++    // Unroll following for(s) depending on jx in [0,1,2]
++    // for (i=0;i<=jk;i++) {
++    //   for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++    // }
++    // Unrolling for jx == 0 case:
++    //   q[0] = x[0] * f[0]
++    //   q[1] = x[0] * f[1]
++    //   q[2] = x[0] * f[2]
++    //   q[3] = x[0] * f[3]
++    //   q[4] = x[0] * f[4]
++    //
++    // Vectorization for unrolled jx == 0 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[0]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[0]
++    //   q[4] = f[4] * x[0]
++    //
++    // Unrolling for jx == 1 case:
++    //   q[0] = x[0] * f[1] + x[1] * f[0]
++    //   q[1] = x[0] * f[2] + x[1] * f[1]
++    //   q[2] = x[0] * f[3] + x[1] * f[2]
++    //   q[3] = x[0] * f[4] + x[1] * f[3]
++    //   q[4] = x[0] * f[5] + x[1] * f[4]
++    //
++    // Vectorization for unrolled jx == 1 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[1]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[1]
++    //   q[4] = f[4] * x[1]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[0]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[0]
++    //   q[4] += f[5] * x[0]
++    //
++    // Unrolling for jx == 2 case:
++    //   q[0] = x[0] * f[2] + x[1] * f[1] + x[2] * f[0]
++    //   q[1] = x[0] * f[3] + x[1] * f[2] + x[2] * f[1]
++    //   q[2] = x[0] * f[4] + x[1] * f[3] + x[2] * f[2]
++    //   q[3] = x[0] * f[5] + x[1] * f[4] + x[2] * f[3]
++    //   q[4] = x[0] * f[6] + x[1] * f[5] + x[2] * f[4]
++    //
++    // Vectorization for unrolled jx == 2 case:
++    //   {q[0], q[1]} = {f[0], f[1]} * x[2]
++    //   {q[2], q[3]} = {f[2], f[3]} * x[2]
++    //   q[4] = f[4] * x[2]
++    //   {q[0], q[1]} += {f[1], f[2]} * x[1]
++    //   {q[2], q[3]} += {f[3], f[4]} * x[1]
++    //   q[4] += f[5] * x[1]
++    //   {q[0], q[1]} += {f[2], f[3]} * x[0]
++    //   {q[2], q[3]} += {f[4], f[5]} * x[0]
++    //   q[4] += f[6] * x[0]
++  block_comment("unrolled and vectorized computation of q[0]..q[jk]"); {
++      li(SCR2, 1);
++      slli_d(tmp5, tmp5, 52);                  // now it's 2^q0 double value
++      slli_d(tmp4, tmp4, 52);                  // now it's 2^-q0 double value
++      if (UseLASX)
++        xvpermi_d(v6, v6, 0);
++      else
++        vreplvei_d(v6, v6, 0);
++      blt(jx, SCR2, JX_IS_0);
++      addi_d(i, SP, 8);
++      if (UseLASX) {
++        xvld(v26, i, 0);                       // load f[1..4]
++        xvpermi_d(v3, v3, 0);
++        xvpermi_d(v7, v7, 0);
++        xvpermi_d(v20, v19, 85);
++        xvpermi_d(v21, v19, 170);
++      } else {
++        vld(v26, i, 0);                        // load f[1..4]
++        vld(v27, i, 16);
++        vreplvei_d(v3, v3, 0);
++        vreplvei_d(v7, v7, 0);
++        vreplvei_d(vt, v20, 1);
++        vreplvei_d(v21, v21, 0);
++      }
++      blt(SCR2, jx, JX_IS_2);
++      // jx == 1
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v7);                // f[0,3] * x[1]
++        fmul_d(v30, v19, v7);                  // f[4] * x[1]
++        xvfmadd_d(v28, v26, v6, v28);
++        fmadd_d(v30, v6, v20, v30);            // v30 += f[5] * x[0]
++      } else {
++        vfmul_d(v28, v18, v7);                 // f[0,1] * x[1]
++        vfmul_d(v29, v19, v7);                 // f[2,3] * x[1]
++        fmul_d(v30, v20, v7);                  // f[4] * x[1]
++        vfmadd_d(v28, v26, v6, v28);
++        vfmadd_d(v29, v27, v6, v29);
++        fmadd_d(v30, v6, vt, v30);             // v30 += f[5] * x[0]
++      }
++      b(Q_DONE);
++    bind(JX_IS_2);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v3);                // f[0,3] * x[2]
++        fmul_d(v30, v19, v3);                  // f[4] * x[2]
++        xvfmadd_d(v28, v26, v7, v28);
++        fmadd_d(v30, v7, v20, v30);            // v30 += f[5] * x[1]
++        xvpermi_q(v18, v19, 3);
++        xvfmadd_d(v28, v18, v6, v28);
++      } else {
++        vfmul_d(v28, v18, v3);                 // f[0,1] * x[2]
++        vfmul_d(v29, v19, v3);                 // f[2,3] * x[2]
++        fmul_d(v30, v20, v3);                  // f[4] * x[2]
++        vfmadd_d(v28, v26, v7, v28);
++        vfmadd_d(v29, v27, v7, v29);
++        fmadd_d(v30, v7, vt, v30);             // v30 += f[5] * x[1]
++        vfmadd_d(v28, v19, v6, v28);
++        vfmadd_d(v29, v20, v6, v29);
++      }
++      fmadd_d(v30, v6, v21, v30);              // v30 += f[6] * x[0]
++      b(Q_DONE);
++    bind(JX_IS_0);
++      if (UseLASX) {
++        xvfmul_d(v28, v18, v6);                // f[0,1] * x[0]
++        fmul_d(v30, v19, v6);                  // f[4] * x[0]
++      } else {
++        vfmul_d(v28, v18, v6);                 // f[0,1] * x[0]
++        vfmul_d(v29, v19, v6);                 // f[2,3] * x[0]
++        fmul_d(v30, v20, v6);                  // f[4] * x[0]
++      }
++    bind(Q_DONE);
++      if (UseLASX) {
++        xvst(v28, qBase, 0);                   // save calculated q[0]...q[jk]
++      } else {
++        vst(v28, qBase, 0);                    // save calculated q[0]...q[jk]
++        vst(v29, qBase, 16);
++      }
++      fst_d(v30, qBase, 32);
++  }
++  li(i, 0x3E70000000000000);
++  li(jz, 4);
++  movgr2fr_d(v17, i);                          // v17 = twon24
++  movgr2fr_d(v30, tmp5);                       // 2^q0
++  vldi(v21, -960);                             // 0.125 (0x3fc0000000000000)
++  vldi(v20, -992);                             // 8.0   (0x4020000000000000)
++  movgr2fr_d(v22, tmp4);                       // 2^-q0
++
++  block_comment("recompute loop"); {
++    bind(RECOMPUTE);
++      //  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++      //    fw    =  (double)((int)(twon24* z));
++      //    iq[i] =  (int)(z-two24A*fw);
++      //    z     =  q[j-1]+fw;
++      //  }
++      block_comment("distill q[] into iq[] reversingly"); {
++          xorr(i, i, i);
++          move(j, jz);
++          alsl_d(tmp2, jz, qBase, 3 - 1);                  // q[jz] address
++          fld_d(v18, tmp2, 0);                             // z = q[j] and moving address to q[j-1]
++          addi_d(tmp2, tmp2, -8);
++        bind(RECOMP_FIRST_FOR);
++          fld_d(v27, tmp2, 0);
++          addi_d(tmp2, tmp2, -8);
++          fmul_d(v29, v17, v18);                           // twon24*z
++          vfrintrz_d(v29, v29);                            // (double)(int)
++          fnmsub_d(v28, v24, v29, v18);                    // v28 = z-two24A*fw
++          ftintrz_w_d(vt, v28);                            // (int)(z-two24A*fw)
++          alsl_d(SCR2, i, iqBase, 2 - 1);
++          fst_s(vt, SCR2, 0);
++          fadd_d(v18, v27, v29);
++          addi_w(i, i, 1);
++          addi_w(j, j, -1);
++          blt(R0, j, RECOMP_FIRST_FOR);
++      }
++      // compute n
++      fmul_d(v18, v18, v30);
++      fmul_d(v2, v18, v21);
++      vfrintrm_d(v2, v2);                                  // v2 = floor(v2) == rounding towards -inf
++      fnmsub_d(v18, v2, v20, v18);                         // z -= 8.0*floor(z*0.125);
++      li(ih, 2);
++      vfrintrz_d(v2, v18);                                 // v2 = (double)((int)z)
++      ftintrz_w_d(vt, v18);                                // n  = (int) z;
++      movfr2gr_s(n, vt);
++      fsub_d(v18, v18, v2);                                // z -= (double)n;
++
++      block_comment("q0-dependent initialization"); {
++          blt(SCR1, R0, Q0_ZERO_CMP_LT);                   // if (q0 > 0)
++          addi_w(j, jz, -1);                               // j = jz - 1
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          ld_w(tmp2, SCR2, 0);                             // tmp2 = iq[jz-1]
++          beq(SCR1, R0, Q0_ZERO_CMP_EQ);
++          li(tmp4, 24);
++          sub_w(tmp4, tmp4, SCR1);                         // == 24 - q0
++          srl_w(i, tmp2, tmp4);                            // i = iq[jz-1] >> (24-q0)
++          sll_w(tmp5, i, tmp4);
++          sub_w(tmp2, tmp2, tmp5);                         // iq[jz-1] -= i<<(24-q0);
++          alsl_d(SCR2, j, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);                             // store iq[jz-1]
++          addi_w(SCR2, tmp4, -1);                          // == 23 - q0
++          add_w(n, n, i);                                  // n+=i
++          srl_w(ih, tmp2, SCR2);                           // ih = iq[jz-1] >> (23-q0)
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_EQ);
++          srli_d(ih, tmp2, 23);                            // ih = iq[z-1] >> 23
++          b(Q0_ZERO_CMP_DONE);
++        bind(Q0_ZERO_CMP_LT);
++          vldi(v4, -928);                                  // 0.5 (0x3fe0000000000000)
++          fcmp_clt_d(FCC0, v18, v4);
++          movcf2gr(SCR2, FCC0);
++          masknez(ih, ih, SCR2);                           // if (z<0.5) ih = 0
++      }
++    bind(Q0_ZERO_CMP_DONE);
++      bge(R0, ih, IH_HANDLED);
++
++    block_comment("if(ih>) {"); {
++      // use rscratch2 as carry
++
++      block_comment("for(i=0;i<jz ;i++) {...}"); {
++          addi_w(n, n, 1);
++          xorr(i, i, i);
++          xorr(SCR2, SCR2, SCR2);
++        bind(IH_FOR);
++          alsl_d(j, i, iqBase, 2 - 1);
++          ld_w(j, j, 0);                                   // j = iq[i]
++          li(tmp3, 0x1000000);
++          sub_w(tmp3, tmp3, SCR2);
++          bnez(SCR2, IH_FOR_STORE);
++          beqz(j, IH_FOR_INCREMENT);
++          li(SCR2, 1);
++        bind(IH_FOR_STORE);
++          sub_w(tmp3, tmp3, j);
++          alsl_d(tmp1, i, iqBase, 2 - 1);
++          st_w(tmp3, tmp1, 0);                             // iq[i] = 0xffffff - j
++        bind(IH_FOR_INCREMENT);
++          addi_w(i, i, 1);
++          blt(i, jz, IH_FOR);
++      }
++
++      block_comment("if(q0>0) {"); {
++        bge(R0, SCR1, IH_AFTER_SWITCH);
++        // tmp3 still has iq[jz-1] value. no need to reload
++        // now, zero high tmp3 bits (rscratch1 number of bits)
++        li(j, 0xffffffff);
++        addi_w(i, jz, -1);                                 // set i to jz-1
++        srl_d(j, j, SCR1);
++        srli_w(tmp1, j, 8);
++        andr(tmp3, tmp3, tmp1);                            // we have 24-bit-based constants
++        alsl_d(tmp1, i, iqBase, 2 - 1);
++        st_w(tmp3, tmp1, 0);                               // save iq[jz-1]
++      }
++      bind(IH_AFTER_SWITCH);
++        li(tmp1, 2);
++        bne(ih, tmp1, IH_HANDLED);
++
++        block_comment("if(ih==2) {"); {
++          vldi(v25, -912);                                 // 1.0 (0x3ff0000000000000)
++          fsub_d(v18, v25, v18);                           // z = one - z;
++          beqz(SCR2, IH_HANDLED);
++          fsub_d(v18, v18, v30);                           // z -= scalbnA(one,q0);
++        }
++    }
++    bind(IH_HANDLED);
++      // check if recomputation is needed
++      vxor_v(vt, vt, vt);
++      fcmp_cne_d(FCC0, v18, vt);
++      bcnez(FCC0, RECOMP_CHECK_DONE_NOT_ZERO);
++
++      block_comment("if(z==zeroB) {"); {
++
++        block_comment("for (i=jz-1;i>=jk;i--) j |= iq[i];"); {
++            addi_w(i, jz, -1);
++            xorr(j, j, j);
++            b(RECOMP_FOR1_CHECK);
++          bind(RECOMP_FOR1);
++            alsl_d(tmp1, i, iqBase, 2 - 1);
++            ld_w(tmp1, tmp1, 0);
++            orr(j, j, tmp1);
++            addi_w(i, i, -1);
++          bind(RECOMP_FOR1_CHECK);
++            li(SCR2, 4);
++            bge(i, SCR2, RECOMP_FOR1);
++        }
++        bnez(j, RECOMP_CHECK_DONE);
++
++        block_comment("if(j==0) {"); {
++            // for(k=1;iq[jk-k]==0;k++); // let's unroll it. jk == 4. So, read
++            // iq[3], iq[2], iq[1], iq[0] until non-zero value
++            ld_d(tmp1, iqBase, 0);                 // iq[0..3]
++            ld_d(tmp3, iqBase, 8);
++            li(j, 2);
++            masknez(tmp1, tmp1, tmp3);             // set register for further consideration
++            orr(tmp1, tmp1, tmp3);
++            masknez(j, j, tmp3);                   // set initial k. Use j as k
++            srli_d(SCR2, tmp1, 32);
++            sltu(SCR2, R0, SCR2);
++            addi_w(i, jz, 1);
++            add_w(j, j, SCR2);
++
++          block_comment("for(i=jz+1;i<=jz+k;i++) {...}"); {
++              add_w(jz, i, j); // i = jz+1, j = k-1. j+i = jz+k (which is a new jz)
++            bind(RECOMP_FOR2);
++              add_w(tmp1, jv, i);
++              alsl_d(SCR2, tmp1, twoOverPiBase, 3 - 1);
++              fld_d(v29, SCR2, 0);
++              add_w(tmp2, jx, i);
++              alsl_d(SCR2, tmp2, SP, 3 - 1);
++              fst_d(v29, SCR2, 0);
++              // f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++              // since jx = 0, 1 or 2 we can unroll it:
++              // for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++              // f[jx+i-j] == (for first iteration) f[jx+i], which is already v29
++              alsl_d(tmp2, tmp2, SP, 3 - 1);     // address of f[jx+i]
++              fld_d(v4, tmp2, -16);              // load f[jx+i-2] and f[jx+i-1]
++              fld_d(v5, tmp2, -8);
++              fmul_d(v26, v6, v29); // initial fw
++              beqz(jx, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v7, v5, v26);
++              li(SCR2, 1);
++              beq(jx, SCR2, RECOMP_FW_UPDATED);
++              fmadd_d(v26, v3, v4, v26);
++            bind(RECOMP_FW_UPDATED);
++              alsl_d(SCR2, i, qBase, 3 - 1);
++              fst_d(v26, SCR2, 0);               // q[i] = fw;
++              addi_w(i, i, 1);
++              bge(jz, i, RECOMP_FOR2);           // jz here is "old jz" + k
++          }
++            b(RECOMPUTE);
++        }
++      }
++    }
++    bind(RECOMP_CHECK_DONE);
++      // chop off zero terms
++      vxor_v(vt, vt, vt);
++      fcmp_ceq_d(FCC0, v18, vt);
++      bcnez(FCC0, Z_IS_ZERO);
++
++      block_comment("else block of if(z==0.0) {"); {
++        bind(RECOMP_CHECK_DONE_NOT_ZERO);
++          fmul_d(v18, v18, v22);
++          fcmp_clt_d(FCC0, v18, v24);                        // v24 is stil two24A
++          bcnez(FCC0, Z_IS_LESS_THAN_TWO24B);
++          fmul_d(v1, v18, v17);                              // twon24*z
++          vfrintrz_d(v1, v1);                                // v1 = (double)(int)(v1)
++          fnmsub_d(v2, v24, v1, v18);
++          ftintrz_w_d(vt, v1);                               // (int)fw
++          movfr2gr_s(tmp3, vt);
++          ftintrz_w_d(vt, v2);                               // double to int
++          movfr2gr_s(tmp2, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp2, SCR2, 0);
++          addi_w(SCR1, SCR1, 24);
++          addi_w(jz, jz, 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) fw
++          b(Z_ZERO_CHECK_DONE);
++        bind(Z_IS_LESS_THAN_TWO24B);
++          ftintrz_w_d(vt, v18);                              // (int)z
++          movfr2gr_s(tmp3, vt);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          st_w(tmp3, SCR2, 0);                               // iq[jz] = (int) z
++          b(Z_ZERO_CHECK_DONE);
++      }
++
++      block_comment("if(z==0.0) {"); {
++        bind(Z_IS_ZERO);
++          addi_w(jz, jz, -1);
++          alsl_d(SCR2, jz, iqBase, 2 - 1);
++          ld_w(tmp1, SCR2, 0);
++          addi_w(SCR1, SCR1, -24);
++          beqz(tmp1, Z_IS_ZERO);
++      }
++      bind(Z_ZERO_CHECK_DONE);
++        // convert integer "bit" chunk to floating-point value
++        // v17 = twon24
++        // update v30, which was scalbnA(1.0, <old q0>);
++        addi_w(tmp2, SCR1, 1023); // biased exponent
++        slli_d(tmp2, tmp2, 52);   // put at correct position
++        move(i, jz);
++        movgr2fr_d(v30, tmp2);
++
++        block_comment("for(i=jz;i>=0;i--) {q[i] = fw*(double)iq[i]; fw*=twon24;}"); {
++          bind(CONVERTION_FOR);
++            alsl_d(SCR2, i, iqBase, 2 - 1);
++            fld_s(v31, SCR2, 0);
++            vffintl_d_w(v31, v31);
++            fmul_d(v31, v31, v30);
++            alsl_d(SCR2, i, qBase, 3 - 1);
++            fst_d(v31, SCR2, 0);
++            fmul_d(v30, v30, v17);
++            addi_w(i, i, -1);
++            bge(i, R0, CONVERTION_FOR);
++        }
++        addi_d(SCR2, SP, 160);   // base for fq
++        // reusing twoOverPiBase
++        li(twoOverPiBase, pio2);
++
++      block_comment("compute PIo2[0,...,jp]*q[jz,...,0]. for(i=jz;i>=0;i--) {...}"); {
++          move(i, jz);
++          move(tmp2, R0); // tmp2 will keep jz - i == 0 at start
++        bind(COMP_FOR);
++          // for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++          vxor_v(v30, v30, v30);
++          alsl_d(tmp5, i, qBase, 3 - 1); // address of q[i+k] for k==0
++          li(tmp3, 4);
++          slti(tmp4, tmp2, 5);
++          alsl_d(tmp1, i, qBase, 3 - 1); // used as q[i] address
++          masknez(tmp3, tmp3, tmp4);     // min(jz - i, jp);
++          maskeqz(tmp4, tmp2, tmp4);
++          orr(tmp3, tmp3, tmp4);
++          move(tmp4, R0);                // used as k
++
++          block_comment("for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];"); {
++            bind(COMP_INNER_LOOP);
++              alsl_d(tmp5, tmp4, tmp1, 3 - 1);
++              fld_d(v18, tmp5, 0);                                      // q[i+k]
++              alsl_d(tmp5, tmp4, twoOverPiBase, 3 - 1);
++              fld_d(v19, tmp5, 0);                                      // PIo2[k]
++              fmadd_d(v30, v18, v19, v30);                              // fw += PIo2[k]*q[i+k];
++              addi_w(tmp4, tmp4, 1);                                    // k++
++              bge(tmp3, tmp4, COMP_INNER_LOOP);
++          }
++          alsl_d(tmp5, tmp2, SCR2, 3 - 1);
++          fst_d(v30, tmp5, 0);                                          // fq[jz-i]
++          addi_d(tmp2, tmp2, 1);
++          addi_w(i, i, -1);
++          bge(i, R0, COMP_FOR);
++      }
++
++      block_comment("switch(prec) {...}. case 2:"); {
++        // compress fq into y[]
++        // remember prec == 2
++
++        block_comment("for (i=jz;i>=0;i--) fw += fq[i];"); {
++            vxor_v(v4, v4, v4);
++            move(i, jz);
++          bind(FW_FOR1);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, -1);
++            fadd_d(v4, v4, v1);
++            bge(i, R0, FW_FOR1);
++        }
++        bind(FW_FOR1_DONE);
++          // v1 contains fq[0]. so, keep it so far
++          fsub_d(v5, v1, v4); // fw = fq[0] - fw
++          beqz(ih, FW_Y0_NO_NEGATION);
++          fneg_d(v4, v4);
++        bind(FW_Y0_NO_NEGATION);
++
++        block_comment("for (i=1;i<=jz;i++) fw += fq[i];"); {
++            li(i, 1);
++            blt(jz, i, FW_FOR2_DONE);
++          bind(FW_FOR2);
++            alsl_d(tmp5, i, SCR2, 3 - 1);
++            fld_d(v1, tmp5, 0);
++            addi_w(i, i, 1);
++            fadd_d(v5, v5, v1);
++            bge(jz, i, FW_FOR2);
++        }
++        bind(FW_FOR2_DONE);
++          beqz(ih, FW_Y1_NO_NEGATION);
++          fneg_d(v5, v5);
++        bind(FW_Y1_NO_NEGATION);
++          addi_d(SP, SP, 560);
++      }
++}
++
++///* __kernel_sin( x, y, iy)
++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
++// *
++// * Algorithm
++// *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
++// *      3. sin(x) is approximated by a polynomial of degree 13 on
++// *         [0,pi/4]
++// *                               3            13
++// *              sin(x) ~ x + S1*x + ... + S6*x
++// *         where
++// *
++// *      |sin(x)         2     4     6     8     10     12  |     -58
++// *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
++// *      |  x                                               |
++// *
++// *      4. sin(x+y) = sin(x) + sin'(x')*y
++// *                  ~ sin(x) + (1-x*x/2)*y
++// *         For better accuracy, let
++// *                   3      2      2      2      2
++// *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
++// *         then                   3    2
++// *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
++// */
++//static const double
++//S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
++//S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
++//S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
++//S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
++//S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
++//S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
++//
++// NOTE: S1..S6 were moved into a table: StubRoutines::la::_dsin_coef
++//
++// BEGIN __kernel_sin PSEUDO CODE
++//
++//static double __kernel_sin(double x, double y, bool iy)
++//{
++//        double z,r,v;
++//
++//        // NOTE: not needed. moved to dsin/dcos
++//        //int ix;
++//        //ix = high(x)&0x7fffffff;                /* high word of x */
++//
++//        // NOTE: moved to dsin/dcos
++//        //if(ix<0x3e400000)                       /* |x| < 2**-27 */
++//        //   {if((int)x==0) return x;}            /* generate inexact */
++//
++//        z       =  x*x;
++//        v       =  z*x;
++//        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
++//        if(iy==0) return x+v*(S1+z*r);
++//        else      return x-((z*(half*y-v*r)-y)-v*S1);
++//}
++//
++// END __kernel_sin PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dsin_coef
++//     3. C code parameter "int iy" was modified to "bool iyIsOne", because
++//         iy is always 0 or 1. Also, iyIsOne branch was moved into
++//         generation phase instead of taking it during code execution
++// Input ans output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, iyIsOne
++//         = flag to use low argument low part or not, dsin_coef = coefficients
++//         table address
++//     3. Return sin(x) value in FA0
++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef) {
++  FloatRegister y = FA5, z = FA6, v = FA7, r = FT0, s1 = FT1, s2 = FT2,
++                s3 = FT3, s4 = FT4, s5 = FT5, s6 = FT6, half = FT7;
++  li(SCR2, dsin_coef);
++  fld_d(s5, SCR2, 32);
++  fld_d(s6, SCR2, 40);
++  fmul_d(z, x, x); // z =  x*x;
++  fld_d(s1, SCR2, 0);
++  fld_d(s2, SCR2, 8);
++  fld_d(s3, SCR2, 16);
++  fld_d(s4, SCR2, 24);
++  fmul_d(v, z, x); // v =  z*x;
++
++  block_comment("calculate r =  S2+z*(S3+z*(S4+z*(S5+z*S6)))"); {
++    fmadd_d(r, z, s6, s5);
++    // initialize "half" in current block to utilize 2nd FPU. However, it's
++    // not a part of this block
++    vldi(half, -928);       // 0.5 (0x3fe0000000000000)
++    fmadd_d(r, z, r, s4);
++    fmadd_d(r, z, r, s3);
++    fmadd_d(r, z, r, s2);
++  }
++
++  if (!iyIsOne) {
++    // return x+v*(S1+z*r);
++    fmadd_d(s1, z, r, s1);
++    fmadd_d(FA0, v, s1, x);
++  } else {
++    // return x-((z*(half*y-v*r)-y)-v*S1);
++    fmul_d(s6, half, y);    // half*y
++    fnmsub_d(s6, v, r, s6); // half*y-v*r
++    fnmsub_d(s6, z, s6, y); // y - z*(half*y-v*r) = - (z*(half*y-v*r)-y)
++    fmadd_d(s6, v, s1, s6); // - (z*(half*y-v*r)-y) + v*S1 == -((z*(half*y-v*r)-y)-v*S1)
++    fadd_d(FA0, x, s6);
++  }
++}
++
++///*
++// * __kernel_cos( x,  y )
++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// *
++// * Algorithm
++// *      1. Since cos(-x) = cos(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
++// *      3. cos(x) is approximated by a polynomial of degree 14 on
++// *         [0,pi/4]
++// *                                       4            14
++// *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
++// *         where the remez error is
++// *
++// *      |              2     4     6     8     10    12     14 |     -58
++// *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
++// *      |                                                      |
++// *
++// *                     4     6     8     10    12     14
++// *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
++// *             cos(x) = 1 - x*x/2 + r
++// *         since cos(x+y) ~ cos(x) - sin(x)*y
++// *                        ~ cos(x) - x*y,
++// *         a correction term is necessary in cos(x) and hence
++// *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
++// *         For better accuracy when x > 0.3, let qx = |x|/4 with
++// *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
++// *         Then
++// *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
++// *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
++// *         magnitude of the latter is at least a quarter of x*x/2,
++// *         thus, reducing the rounding error in the subtraction.
++// */
++//
++//static const double
++//C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
++//C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
++//C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
++//C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
++//C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
++//C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
++//
++// NOTE: C1..C6 were moved into a table: StubRoutines::la::_dcos_coef
++//
++// BEGIN __kernel_cos PSEUDO CODE
++//
++//static double __kernel_cos(double x, double y)
++//{
++//  double a,h,z,r,qx=0;
++//
++//  // NOTE: ix is already initialized in dsin/dcos. Reuse value from register
++//  //int ix;
++//  //ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
++//
++//  // NOTE: moved to dsin/dcos
++//  //if(ix<0x3e400000) {                   /* if x < 2**27 */
++//  //  if(((int)x)==0) return one;         /* generate inexact */
++//  //}
++//
++//  z  = x*x;
++//  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
++//  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
++//    return one - (0.5*z - (z*r - x*y));
++//  else {
++//    if(ix > 0x3fe90000) {               /* x > 0.78125 */
++//      qx = 0.28125;
++//    } else {
++//      set_high(&qx, ix-0x00200000); /* x/4 */
++//      set_low(&qx, 0);
++//    }
++//    h = 0.5*z-qx;
++//    a = one-qx;
++//    return a - (h - (z*r-x*y));
++//  }
++//}
++//
++// END __kernel_cos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dcos_coef
++// Input and output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, dcos_coef
++//        = coefficients table address
++//     3. Return cos(x) value in FA0
++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) {
++  Register ix = A3;
++  FloatRegister qx = FA1, h = FA2, a = FA3, y = FA5, z = FA6, r = FA7, C1 = FT0,
++        C2 = FT1, C3 = FT2, C4 = FT3, C5 = FT4, C6 = FT5, one = FT6, half = FT7;
++  Label IX_IS_LARGE, SET_QX_CONST, DONE, QX_SET;
++    li(SCR2, dcos_coef);
++    fld_d(C1, SCR2, 0);
++    fld_d(C2, SCR2, 8);
++    fld_d(C3, SCR2, 16);
++    fld_d(C4, SCR2, 24);
++    fld_d(C5, SCR2, 32);
++    fld_d(C6, SCR2, 40);
++    fmul_d(z, x, x);                               // z=x^2
++    block_comment("calculate r = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))))"); {
++      fmadd_d(r, z, C6, C5);
++      vldi(half, -928);                            // 0.5 (0x3fe0000000000000)
++      fmadd_d(r, z, r, C4);
++      fmul_d(y, x, y);
++      fmadd_d(r, z, r, C3);
++      li(SCR1, 0x3FD33333);
++      fmadd_d(r, z, r, C2);
++      fmul_d(x, z, z);                             // x = z^2
++      fmadd_d(r, z, r, C1);                        // r = C1+z(C2+z(C4+z(C5+z*C6)))
++    }
++    // need to multiply r by z to have "final" r value
++    vldi(one, -912);                               // 1.0 (0x3ff0000000000000)
++    bge(ix, SCR1, IX_IS_LARGE);
++    block_comment("if(ix < 0x3FD33333) return one - (0.5*z - (z*r - x*y))"); {
++      // return 1.0 - (0.5*z - (z*r - x*y)) = 1.0 - (0.5*z + (x*y - z*r))
++      fnmsub_d(FA0, x, r, y);
++      fmadd_d(FA0, half, z, FA0);
++      fsub_d(FA0, one, FA0);
++      b(DONE);
++    }
++  block_comment("if(ix >= 0x3FD33333)"); {
++    bind(IX_IS_LARGE);
++      li(SCR2, 0x3FE90000);
++      blt(SCR2, ix, SET_QX_CONST);
++      block_comment("set_high(&qx, ix-0x00200000); set_low(&qx, 0);"); {
++        li(SCR2, 0x00200000);
++        sub_w(SCR2, ix, SCR2);
++        slli_d(SCR2, SCR2, 32);
++        movgr2fr_d(qx, SCR2);
++      }
++      b(QX_SET);
++    bind(SET_QX_CONST);
++      block_comment("if(ix > 0x3fe90000) qx = 0.28125;"); {
++        vldi(qx, -942);         // 0.28125 (0x3fd2000000000000)
++      }
++    bind(QX_SET);
++      fmsub_d(C6, x, r, y);     // z*r - xy
++      fmsub_d(h, half, z, qx);  // h = 0.5*z - qx
++      fsub_d(a, one, qx);       // a = 1-qx
++      fsub_d(C6, h, C6);        // = h - (z*r - x*y)
++      fsub_d(FA0, a, C6);
++  }
++  bind(DONE);
++}
++
++// generate_dsin_dcos creates stub for dsin and dcos
++// Generation is done via single call because dsin and dcos code is almost the
++// same(see C code below). These functions work as follows:
++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27
++// 2) perform argument reduction if required
++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial
++//
++// BEGIN dsin/dcos PSEUDO CODE
++//
++//dsin_dcos(jdouble x, bool isCos) {
++//  double y[2],z=0.0;
++//  int n, ix;
++//
++//  /* High word of x. */
++//  ix = high(x);
++//
++//  /* |x| ~< pi/4 */
++//  ix &= 0x7fffffff;
++//  if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0);
++//
++//  /* sin/cos(Inf or NaN) is NaN */
++//  else if (ix>=0x7ff00000) return x-x;
++//  else if (ix<0x3e400000) {                   /* if ix < 2**27 */
++//    if(((int)x)==0) return isCos ? one : x;         /* generate inexact */
++//  }
++//  /* argument reduction needed */
++//  else {
++//    n = __ieee754_rem_pio2(x,y);
++//    switch(n&3) {
++//    case 0: return isCos ?  __kernel_cos(y[0],y[1])      :  __kernel_sin(y[0],y[1], true);
++//    case 1: return isCos ? -__kernel_sin(y[0],y[1],true) :  __kernel_cos(y[0],y[1]);
++//    case 2: return isCos ? -__kernel_cos(y[0],y[1])      : -__kernel_sin(y[0],y[1], true);
++//    default:
++//      return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]);
++//    }
++//  }
++//}
++// END dsin/dcos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos
++//     2. Final switch use equivalent bit checks(tbz/tbnz)
++// Input ans output:
++//     1. Input for generated function: X = A0
++//     2. Input for generator: isCos = generate sin or cos, npio2_hw = address
++//         of npio2_hw table, two_over_pi = address of two_over_pi table,
++//         pio2 = address if pio2 table, dsin_coef = address if dsin_coef table,
++//         dcos_coef = address of dcos_coef table
++//     3. Return result in FA0
++// NOTE: general purpose register names match local variable names in C code
++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
++                                        address two_over_pi, address pio2,
++                                        address dsin_coef, address dcos_coef) {
++  Label DONE, ARG_REDUCTION, TINY_X, RETURN_SIN, EARLY_CASE;
++  Register X = A0, absX = A1, n = A2, ix = A3;
++  FloatRegister y0 = FA4, y1 = FA5;
++
++    block_comment("check |x| ~< pi/4, NaN, Inf and |x| < 2**-27 cases"); {
++      movfr2gr_d(X, FA0);
++      li(SCR2, 0x3e400000);
++      li(SCR1, 0x3fe921fb);                          // high word of pi/4.
++      bstrpick_d(absX, X, 62, 0);                    // absX
++      li(T0, 0x7ff0000000000000);
++      srli_d(ix, absX, 32);                          // set ix
++      blt(ix, SCR2, TINY_X);                         // handle tiny x (|x| < 2^-27)
++      bge(SCR1, ix, EARLY_CASE);                     // if(ix <= 0x3fe921fb) return
++      blt(absX, T0, ARG_REDUCTION);
++      // X is NaN or INF(i.e. 0x7FF* or 0xFFF*). Return NaN (mantissa != 0).
++      // Set last bit unconditionally to make it NaN
++      ori(T0, T0, 1);
++      movgr2fr_d(FA0, T0);
++      jr(RA);
++    }
++  block_comment("kernel_sin/kernel_cos: if(ix<0x3e400000) {<fast return>}"); {
++    bind(TINY_X);
++      if (isCos) {
++        vldi(FA0, -912);                             // 1.0 (0x3ff0000000000000)
++      }
++      jr(RA);
++  }
++  bind(ARG_REDUCTION); /* argument reduction needed */
++    block_comment("n = __ieee754_rem_pio2(x,y);"); {
++      generate__ieee754_rem_pio2(npio2_hw, two_over_pi, pio2);
++    }
++    block_comment("switch(n&3) {case ... }"); {
++      if (isCos) {
++        srli_w(T0, n, 1);
++        xorr(absX, n, T0);
++        andi(T0, n, 1);
++        bnez(T0, RETURN_SIN);
++      } else {
++        andi(T0, n, 1);
++        beqz(T0, RETURN_SIN);
++      }
++      generate_kernel_cos(y0, dcos_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    bind(RETURN_SIN);
++      generate_kernel_sin(y0, true, dsin_coef);
++      if (isCos) {
++        andi(T0, absX, 1);
++        beqz(T0, DONE);
++      } else {
++        andi(T0, n, 2);
++        beqz(T0, DONE);
++      }
++      fneg_d(FA0, FA0);
++      jr(RA);
++    }
++  bind(EARLY_CASE);
++    vxor_v(y1, y1, y1);
++    if (isCos) {
++      generate_kernel_cos(FA0, dcos_coef);
++    } else {
++      generate_kernel_sin(FA0, false, dsin_coef);
++    }
++  bind(DONE);
++    jr(RA);
++}
 diff --git a/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp
 new file mode 100644
-index 0000000000..3fb9dce064
+index 0000000000..e517dcd415
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/methodHandles_loongarch.cpp
-@@ -0,0 +1,562 @@
+@@ -0,0 +1,564 @@
 +/*
 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -38136,6 +39644,8 @@ index 0000000000..3fb9dce064
 +#include "interpreter/interpreterRuntime.hpp"
 +#include "memory/allocation.inline.hpp"
 +#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/preserveException.hpp"
 +
 +#define __ _masm->
 +
@@ -38736,10 +40246,10 @@ index 0000000000..f84337424b
 +  }
 diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp
 new file mode 100644
-index 0000000000..4d0ab4644a
+index 0000000000..0ee3fba75b
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.cpp
-@@ -0,0 +1,482 @@
+@@ -0,0 +1,505 @@
 +/*
 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -38799,14 +40309,6 @@ index 0000000000..4d0ab4644a
 +  ICache::invalidate_range(addr, 8);
 +}
 +
-+static int illegal_instruction_bits = 0;
-+
-+int NativeInstruction::illegal_instruction() {
-+  //TODO: LA
-+  guarantee(0, "LA not implemented yet");
-+  return 0; // mute compiler
-+}
-+
 +bool NativeInstruction::is_int_branch() {
 +  int op = Assembler::high(insn_word(), 6);
 +  return op == Assembler::beqz_op || op == Assembler::bnez_op ||
@@ -38819,6 +40321,16 @@ index 0000000000..4d0ab4644a
 +  return Assembler::high(insn_word(), 6) == Assembler::bccondz_op;
 +}
 +
++bool NativeInstruction::is_lu12iw_lu32id() const {
++  return Assembler::high(int_at(0), 7)   == Assembler::lu12i_w_op &&
++         Assembler::high(int_at(4), 7)   == Assembler::lu32i_d_op;
++}
++
++bool NativeInstruction::is_pcaddu12i_add() const {
++  return Assembler::high(int_at(0), 7) == Assembler::pcaddu12i_op &&
++         Assembler::high(int_at(4), 10)   == Assembler::addi_d_op;
++}
++
 +bool NativeCall::is_bl() const {
 +  return Assembler::high(int_at(0), 6) == Assembler::bl_op;
 +}
@@ -38923,6 +40435,27 @@ index 0000000000..4d0ab4644a
 +  ICache::invalidate_range(addr_call, instruction_size);
 +}
 +
++// Generate a trampoline for a branch to dest.  If there's no need for a
++// trampoline, simply patch the call directly to dest.
++address NativeCall::trampoline_jump(CodeBuffer &cbuf, address dest) {
++  MacroAssembler a(&cbuf);
++  address stub = NULL;
++
++  if (a.far_branches()
++      && ! is_NativeCallTrampolineStub_at()) {
++    stub = a.emit_trampoline_stub(instruction_address() - cbuf.insts()->start(), dest);
++  }
++
++  if (stub == NULL) {
++    // If we generated no stub, patch this call directly to dest.
++    // This will happen if we don't need far branches or if there
++    // already was a trampoline.
++    set_destination(dest);
++  }
++
++  return stub;
++}
++
 +void NativeCall::print() {
 +  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
 +                p2i(instruction_address()), p2i(destination()));
@@ -39224,10 +40757,10 @@ index 0000000000..4d0ab4644a
 +}
 diff --git a/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
 new file mode 100644
-index 0000000000..fff7e67856
+index 0000000000..195a2df580
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/nativeInst_loongarch.hpp
-@@ -0,0 +1,533 @@
+@@ -0,0 +1,521 @@
 +/*
 + * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -39253,8 +40786,8 @@ index 0000000000..fff7e67856
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
++#define CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
 +
 +#include "asm/assembler.hpp"
 +#include "runtime/icache.hpp"
@@ -39297,6 +40830,10 @@ index 0000000000..fff7e67856
 +  bool is_jump();
 +  bool is_safepoint_poll();
 +
++  // Helper func for jvmci
++  bool is_lu12iw_lu32id() const;
++  bool is_pcaddu12i_add() const;
++
 +  // LoongArch has no instruction to generate a illegal instrucion exception?
 +  // But `break  11` is not illegal instruction for LoongArch.
 +  static int illegal_instruction();
@@ -39417,7 +40954,7 @@ index 0000000000..fff7e67856
 +  void set_destination_mt_safe(address dest, bool assert_lock = true);
 +
 +  address get_trampoline();
-+
++  address trampoline_jump(CodeBuffer &cbuf, address dest);
 +};
 +
 +inline NativeCall* nativeCall_at(address address) {
@@ -39443,9 +40980,12 @@ index 0000000000..fff7e67856
 +class NativeFarCall: public NativeInstruction {
 + public:
 +  enum loongarch_specific_constants {
-+    instruction_size      = 2 * BytesPerInstWord,
++    instruction_offset    = 0,
++    instruction_size      = 2 * BytesPerInstWord
 +  };
 +
++  address instruction_address() const { return addr_at(instruction_offset); }
++
 +  // We use MacroAssembler::patchable_call() for implementing a
 +  // call-anywhere instruction.
 +  bool is_short() const;
@@ -39542,20 +41082,6 @@ index 0000000000..fff7e67856
 +  }
 +};
 +
-+// An interface for accessing/manipulating native moves of the form:
-+//       lui   AT, split_high(offset)
-+//       addiu AT, split_low(offset)
-+//       add   reg, reg, AT
-+//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
-+//       [lw/sw/lwc1/swc1                    dest, reg, 4]
-+//     or
-+//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
-+//       [lw/sw/lwc1/swc1                    dest, reg, offset+4]
-+//
-+// Warning: These routines must be able to handle any instruction sequences
-+// that are generated as a result of the load/store byte,word,long
-+// macros.
-+
 +class NativeMovRegMem: public NativeInstruction {
 + public:
 +  enum loongarch_specific_constants {
@@ -39568,11 +41094,6 @@ index 0000000000..fff7e67856
 +  };
 +
 +  address instruction_address() const       { return addr_at(instruction_offset); }
-+  address next_instruction_address() const  {
-+    //TODO: LA
-+    guarantee(0, "LA not implemented yet");
-+    return NULL; // mute compiler
-+  }
 +
 +  int num_bytes_to_end_of_patch() const { return instruction_offset + instruction_size; }
 +
@@ -39760,10 +41281,10 @@ index 0000000000..fff7e67856
 +  assert(ni->is_NativeCallTrampolineStub_at(), "no call trampoline found");
 +  return (NativeCallTrampolineStub*)addr;
 +}
-+#endif // CPU_LOONGARCH_VM_NATIVEINST_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_NATIVEINST_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
 new file mode 100644
-index 0000000000..4703fd6b7a
+index 0000000000..e9f0fc280d
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/registerMap_loongarch.hpp
 @@ -0,0 +1,47 @@
@@ -39792,8 +41313,8 @@ index 0000000000..4703fd6b7a
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
 +
 +// machine-dependent implemention for register maps
 +  friend class frame;
@@ -39813,7 +41334,7 @@ index 0000000000..4703fd6b7a
 +  void pd_initialize() {}
 +  void pd_initialize_from(const RegisterMap* map) {}
 +
-+#endif // CPU_LOONGARCH_VM_REGISTERMAP_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_REGISTERMAP_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp b/src/hotspot/cpu/loongarch/register_definitions_loongarch.cpp
 new file mode 100644
 index 0000000000..58f40b747c
@@ -39925,13 +41446,13 @@ index 0000000000..58f40b747c
 +REGISTER_DEFINITION(FloatRegister, f31);
 diff --git a/src/hotspot/cpu/loongarch/register_loongarch.cpp b/src/hotspot/cpu/loongarch/register_loongarch.cpp
 new file mode 100644
-index 0000000000..e0ea958edf
+index 0000000000..54d90167a5
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/register_loongarch.cpp
 @@ -0,0 +1,59 @@
 +/*
 + * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -39957,9 +41478,9 @@ index 0000000000..e0ea958edf
 +#include "precompiled.hpp"
 +#include "register_loongarch.hpp"
 +
-+const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers * RegisterImpl::max_slots_per_register;
 +const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
-+                                                                 2 * FloatRegisterImpl::number_of_registers;
++                                                                  FloatRegisterImpl::number_of_registers * FloatRegisterImpl::max_slots_per_register;
 +
 +
 +const char* RegisterImpl::name() const {
@@ -39990,7 +41511,7 @@ index 0000000000..e0ea958edf
 +}
 diff --git a/src/hotspot/cpu/loongarch/register_loongarch.hpp b/src/hotspot/cpu/loongarch/register_loongarch.hpp
 new file mode 100644
-index 0000000000..3ff375bd3a
+index 0000000000..8d99dc9688
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/register_loongarch.hpp
 @@ -0,0 +1,428 @@
@@ -40019,8 +41540,8 @@ index 0000000000..3ff375bd3a
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
++#define CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
 +
 +#include "asm/register.hpp"
 +#include "utilities/formatBuffer.hpp"
@@ -40057,7 +41578,7 @@ index 0000000000..3ff375bd3a
 +  const char* name() const;
 +};
 +
-+// The integer registers of the LOONGARCH architecture
++// The integer registers of the LoongArch architecture
 +CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
 +
 +
@@ -40213,7 +41734,7 @@ index 0000000000..3ff375bd3a
 +  return (FloatRegister)(intptr_t) encoding;
 +}
 +
-+// The implementation of floating point registers for the loongarch architecture
++// The implementation of floating point registers for the LoongArch architecture
 +class FloatRegisterImpl: public AbstractRegisterImpl {
 + public:
 +  enum {
@@ -40357,7 +41878,7 @@ index 0000000000..3ff375bd3a
 +  return (ConditionalFlagRegister)(intptr_t) encoding;
 +}
 +
-+// The implementation of floating point registers for the loongarch architecture
++// The implementation of floating point registers for the LoongArch architecture
 +class ConditionalFlagRegisterImpl: public AbstractRegisterImpl {
 + public:
 +  enum {
@@ -40421,16 +41942,16 @@ index 0000000000..3ff375bd3a
 +  static const int max_fpr;
 +};
 +
-+#endif //CPU_LOONGARCH_VM_REGISTER_LOONGARCH_HPP
++#endif //CPU_LOONGARCH_REGISTER_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp
 new file mode 100644
-index 0000000000..f213cf6d7c
+index 0000000000..1caba43699
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.cpp
-@@ -0,0 +1,135 @@
+@@ -0,0 +1,132 @@
 +/*
 + * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -40456,9 +41977,11 @@ index 0000000000..f213cf6d7c
 +#include "precompiled.hpp"
 +#include "asm/macroAssembler.hpp"
 +#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
 +#include "nativeInst_loongarch.hpp"
 +#include "oops/compressedOops.inline.hpp"
 +#include "oops/oop.hpp"
++#include "oops/klass.inline.hpp"
 +#include "runtime/safepoint.hpp"
 +
 +
@@ -40495,7 +42018,7 @@ index 0000000000..f213cf6d7c
 +    }
 +  } else {
 +    // Note:  Use runtime_call_type relocations for call32_operand.
-+    assert(0, "call32_operand not supported in LOONGARCH64");
++    assert(0, "call32_operand not supported in LoongArch64");
 +  }
 +}
 +
@@ -40556,22 +42079,17 @@ index 0000000000..f213cf6d7c
 +void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
 +}
 +
-+/*
-+void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
-+}
-+*/
-+
 +void metadata_Relocation::pd_fix_value(address x) {
 +}
 diff --git a/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp
 new file mode 100644
-index 0000000000..614c5aebaa
+index 0000000000..c85ca4963f
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/relocInfo_loongarch.hpp
 @@ -0,0 +1,44 @@
 +/*
 + * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -40594,13 +42112,13 @@ index 0000000000..614c5aebaa
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
++#define CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
 +
 +  // machine-dependent parts of class relocInfo
 + private:
 +  enum {
-+    // Since LOONGARCH instructions are whole words,
++    // Since LoongArch instructions are whole words,
 +    // the two low-order offset bits can always be discarded.
 +    offset_unit        =  4,
 +
@@ -40612,7 +42130,7 @@ index 0000000000..614c5aebaa
 +
 +  static bool mustIterateImmediateOopsInCode() { return false; }
 +
-+#endif // CPU_LOONGARCH_VM_RELOCINFO_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_RELOCINFO_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/runtime_loongarch_64.cpp
 new file mode 100644
 index 0000000000..334c783b37
@@ -40812,13 +42330,13 @@ index 0000000000..334c783b37
 +}
 diff --git a/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
 new file mode 100644
-index 0000000000..66026dc2d4
+index 0000000000..736ed0a85f
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/sharedRuntime_loongarch_64.cpp
-@@ -0,0 +1,3576 @@
+@@ -0,0 +1,3621 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -40848,6 +42366,7 @@ index 0000000000..66026dc2d4
 +#include "code/icBuffer.hpp"
 +#include "code/vtableStubs.hpp"
 +#include "interpreter/interpreter.hpp"
++#include "nativeInst_loongarch.hpp"
 +#include "oops/compiledICHolder.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/vframeArray.hpp"
@@ -40855,6 +42374,9 @@ index 0000000000..66026dc2d4
 +#ifdef COMPILER2
 +#include "opto/runtime.hpp"
 +#endif
++#if INCLUDE_JVMCI
++#include "jvmci/jvmciJavaClasses.hpp"
++#endif
 +
 +#include <alloca.h>
 +
@@ -41486,7 +43008,7 @@ index 0000000000..66026dc2d4
 +    // number (all values in registers) or the maximum stack slot accessed.
 +    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
 +    // Convert 4-byte stack slots to words.
-+    // did LA need round? FIXME  aoqi
++    // did LA need round? FIXME
 +    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
 +    // Round up to miminum stack alignment, in wordSize
 +    comp_words_on_stack = round_to(comp_words_on_stack, 2);
@@ -41507,6 +43029,18 @@ index 0000000000..66026dc2d4
 +  // Pre-load the register-jump target early, to schedule it better.
 +  __ ld_d(T4, Rmethod, in_bytes(Method::from_compiled_offset()));
 +
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    // check if this call should be routed towards a specific entry point
++    __ ld_d(AT, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    Label no_alternative_target;
++    __ beqz(AT, no_alternative_target);
++    __ move(T4, AT);
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    __ bind(no_alternative_target);
++  }
++#endif // INCLUDE_JVMCI
++
 +  // Now generate the shuffle code.  Pick up all register args and move the
 +  // rest through the floating point stack top.
 +  for (int i = 0; i < total_args_passed; i++) {
@@ -41519,8 +43053,7 @@ index 0000000000..66026dc2d4
 +
 +    // Pick up 0, 1 or 2 words from SP+offset.
 +
-+    //FIXME. aoqi. just delete the assert
-+    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
 +    // Load in argument order going down.
 +    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
 +    // Point to interpreter value (vs. tag)
@@ -41891,7 +43424,7 @@ index 0000000000..66026dc2d4
 +    __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
 +  } else {
 +    if (dst.first() != src.first()){
-+      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
 +    }
 +  }
 +}
@@ -41908,7 +43441,6 @@ index 0000000000..66026dc2d4
 +
 +  // must pass a handle. First figure out the location we use as a handle
 +
-+  //FIXME, for LA, dst can be register
 +  if (src.first()->is_stack()) {
 +    // Oop is already on the stack as an argument
 +    Register rHandle = T5;
@@ -41920,14 +43452,11 @@ index 0000000000..66026dc2d4
 +    __ bind(nil);
 +    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
 +    else                       __ move( (dst.first())->as_Register(), rHandle);
-+    //if dst is register
-+    //FIXME, do LA need out preserve stack slots?
-+    int offset_in_older_frame = src.first()->reg2stack()
-+      + SharedRuntime::out_preserve_stack_slots();
++
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
 +    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
 +    if (is_receiver) {
-+      *receiver_offset = (offset_in_older_frame
-+          + framesize_in_slots) * VMRegImpl::stack_slot_size;
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
 +    }
 +  } else {
 +    // Oop is in an a register we must store it to the space we reserve
@@ -41948,7 +43477,6 @@ index 0000000000..66026dc2d4
 +    // Store the handle parameter
 +    if(dst.first()->is_stack())__ st_d( rHandle, SP, reg2offset_out(dst.first()));
 +    else                       __ move((dst.first())->as_Register(), rHandle);
-+    //if dst is register
 +
 +    if (is_receiver) {
 +      *receiver_offset = offset;
@@ -41994,13 +43522,13 @@ index 0000000000..66026dc2d4
 +      __ ld_d(AT, FP, reg2offset_in(src.first()));
 +      __ st_d(AT, SP, reg2offset_out(dst.first()));
 +    } else {
-+      __ ld_d( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
++      __ ld_d(dst.first()->as_Register(), FP, reg2offset_in(src.first()));
 +    }
 +  } else {
 +    if( dst.first()->is_stack()){
-+      __ st_d( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
++      __ st_d(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
 +    } else {
-+      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
++      __ move(dst.first()->as_Register(), src.first()->as_Register());
 +    }
 +  }
 +}
@@ -42845,8 +44373,6 @@ index 0000000000..66026dc2d4
 +    __ move(SP, S2);     // use S2 as a sender SP holder
 +    __ pop(S2);
 +    __ addi_d(SP, SP, wordSize);
-+    //add for compressedoops
-+    __ reinit_heapbase();
 +    // Restore any method result value
 +    restore_native_result(masm, ret_type, stack_slots);
 +
@@ -43038,8 +44564,6 @@ index 0000000000..66026dc2d4
 +        relocInfo::runtime_call_type);
 +    __ addi_d(SP, SP, 2*wordSize);
 +    __ move(SP, S2);
-+    //add for compressedoops
-+    __ reinit_heapbase();
 +#ifdef ASSERT
 +    {
 +      Label L;
@@ -43066,8 +44590,6 @@ index 0000000000..66026dc2d4
 +  save_native_result(masm, ret_type, stack_slots);
 +  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
 +      relocInfo::runtime_call_type);
-+  //add for compressedoops
-+  __ reinit_heapbase();
 +  restore_native_result(masm, ret_type, stack_slots);
 +  __ b(reguard_done);
 +
@@ -43668,8 +45190,14 @@ index 0000000000..66026dc2d4
 +  // allocate space for the code
 +  ResourceMark rm;
 +  // setup code generation tools
++  int pad = 0;
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    pad += 512; // Increase the buffer size when compiling for JVMCI
++  }
++#endif
 +  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
-+  CodeBuffer     buffer ("deopt_blob", 8000, 2048);//aoqi FIXME for debug
++  CodeBuffer     buffer ("deopt_blob", 8000+pad, 2048); // FIXME for debug
 +  MacroAssembler* masm  = new MacroAssembler( & buffer);
 +  int frame_size_in_words;
 +  OopMap* map = NULL;
@@ -43714,6 +45242,12 @@ index 0000000000..66026dc2d4
 +  __ b(cont);
 +
 +  int reexecute_offset = __ pc() - start;
++#if INCLUDE_JVMCI && !defined(COMPILER1)
++  if (EnableJVMCI && UseJVMCICompiler) {
++    // JVMCI does not use this kind of deoptimization
++    __ should_not_reach_here();
++  }
++#endif
 +
 +  // Reexecute case
 +  // return address is the pc describes what bci to do re-execute at
@@ -43723,6 +45257,44 @@ index 0000000000..66026dc2d4
 +  __ li(reason, Deoptimization::Unpack_reexecute);
 +  __ b(cont);
 +
++#if INCLUDE_JVMCI
++  Label after_fetch_unroll_info_call;
++  int implicit_exception_uncommon_trap_offset = 0;
++  int uncommon_trap_offset = 0;
++
++  if (EnableJVMCI) {
++    implicit_exception_uncommon_trap_offset = __ pc() - start;
++
++    __ ld_d(RA, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++    __ st_d(R0, Address(TREG, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++
++    uncommon_trap_offset = __ pc() - start;
++
++    // Save everything in sight.
++    (void) reg_save.save_live_registers(masm, additional_words, &frame_size_in_words);
++    __ addi_d(SP, SP, -additional_words * wordSize);
++    // fetch_unroll_info needs to call last_java_frame()
++    Label retaddr;
++    __ set_last_Java_frame(NOREG, NOREG, retaddr);
++
++    __ ld_w(A1, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++    __ li(AT, -1);
++    __ st_w(AT, Address(TREG, in_bytes(JavaThread::pending_deoptimization_offset())));
++
++    __ li(reason, (int32_t)Deoptimization::Unpack_reexecute);
++    __ move(A0, TREG);
++    __ move(A2, reason); // exec mode
++    __ call((address)Deoptimization::uncommon_trap, relocInfo::runtime_call_type);
++    __ bind(retaddr);
++    oop_maps->add_gc_map( __ pc()-start, map->deep_copy());
++    __ addi_d(SP, SP, additional_words * wordSize);
++
++    __ reset_last_Java_frame(false);
++
++    __ b(after_fetch_unroll_info_call);
++  } // EnableJVMCI
++#endif // INCLUDE_JVMCI
++
 +  int   exception_offset = __ pc() - start;
 +  // Prolog for exception case
 +
@@ -43806,6 +45378,12 @@ index 0000000000..66026dc2d4
 +#endif
 +  __ reset_last_Java_frame(false);
 +
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    __ bind(after_fetch_unroll_info_call);
++  }
++#endif
++
 +  // Load UnrollBlock into S7
 +  __ move(unroll, V0);
 +
@@ -43888,26 +45466,6 @@ index 0000000000..66026dc2d4
 +  __ ld_w(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
 +  __ sub_d(SP, SP, AT);
 +
-+  // Push interpreter frames in a loop
-+  //
-+  //Loop:
-+  //   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
-+  //   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
-+  //   0x000000555bd82d20: daddi t2, t2, 0xfffffff0 ; t2 -= 16
-+  //   0x000000555bd82d24: daddi sp, sp, 0xfffffff0
-+  //   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
-+  //   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
-+  //   0x000000555bd82d30: dadd fp, sp, zero        ; fp <- sp
-+  //   0x000000555bd82d34: dsub sp, sp, t2          ; sp -= t2
-+  //   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
-+  //   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
-+  //   0x000000555bd82d40: dadd s4, sp, zero        ; move(sender_sp, SP);
-+  //   0x000000555bd82d44: daddi t3, t3, 0xffffffff ; count --
-+  //   0x000000555bd82d48: daddi t1, t1, 0x4        ; sizes += 4
-+  //   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
-+  //   0x000000555bd82d50: daddi t0, t0, 0x4        ; <--- error    t0 += 8
-+  //
-+  // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
 +  Label loop;
 +  __ bind(loop);
 +  __ ld_d(T2, sizes, 0);    // Load frame size
@@ -43989,6 +45547,12 @@ index 0000000000..66026dc2d4
 +  masm->flush();
 +  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
 +  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++#if INCLUDE_JVMCI
++  if (EnableJVMCI) {
++    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
++    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
++  }
++#endif
 +}
 +
 +#ifdef COMPILER2
@@ -44311,7 +45875,7 @@ index 0000000000..66026dc2d4
 +  ResourceMark rm;
 +
 +  //CodeBuffer buffer(name, 1000, 512);
-+  //FIXME. aoqi. code_size
++  //FIXME. code_size
 +  CodeBuffer buffer(name, 2000, 2048);
 +  MacroAssembler* masm  = new MacroAssembler(&buffer);
 +
@@ -44361,8 +45925,7 @@ index 0000000000..66026dc2d4
 +  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
 +  __ bne(AT, R0, pending);
 +  // get the returned Method*
-+  //FIXME, do LA need this ?
-+  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
++  __ get_vm_result_2(Rmethod, thread);
 +  __ st_ptr(Rmethod, SP, reg_save.s3_offset());
 +  __ st_ptr(V0, SP, reg_save.t5_offset());
 +  reg_save.restore_live_registers(masm);
@@ -44394,10 +45957,10 @@ index 0000000000..66026dc2d4
 +extern "C" int SpinPause() {return 0;}
 diff --git a/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
 new file mode 100644
-index 0000000000..f784092519
+index 0000000000..7b5c23b8b5
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/stubGenerator_loongarch_64.cpp
-@@ -0,0 +1,4755 @@
+@@ -0,0 +1,4827 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -44462,7 +46025,6 @@ index 0000000000..f784092519
 +#define T8 RT8
 +
 +#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
-+//#define a__ ((Assembler*)_masm)->
 +
 +//#ifdef PRODUCT
 +//#define BLOCK_COMMENT(str) /* nothing */
@@ -44493,7 +46055,7 @@ index 0000000000..f784092519
 +  // -4 [ S1                   ]
 +  // -3 [ TSR(S2)              ]
 +  // -2 [ LVP(S7)              ]
-+  // -1 [ BCP(S1)              ]
++  // -1 [ BCP(S0)              ]
 +  //  0 [ saved fp             ] <--- fp_after_call
 +  //  1 [ return address       ]
 +  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
@@ -44512,7 +46074,10 @@ index 0000000000..f784092519
 +  //    [ argument word n-1    ] <--- sp
 +  //      ...
 +  //    [ argument word 0      ]
++  //-22 [ F31                  ]
 +  //      ...
++  //-15 [ F24                  ]
++  //-14 [ S8                   ]
 +  //-13 [ thread               ]
 +  //-12 [ result_type          ] <--- a2
 +  //-11 [ result               ] <--- a1
@@ -44525,7 +46090,7 @@ index 0000000000..f784092519
 +  // -4 [ S1                   ]
 +  // -3 [ TSR(S2)              ]
 +  // -2 [ LVP(S7)              ]
-+  // -1 [ BCP(S1)              ]
++  // -1 [ BCP(S0)              ]
 +  //  0 [ saved fp             ] <--- fp_after_call
 +  //  1 [ return address       ]
 +  //  2 [                      ] <--- old sp
@@ -44549,8 +46114,16 @@ index 0000000000..f784092519
 +    result_off         = -11,
 +    result_type_off    = -12,
 +    thread_off         = -13,
-+    total_off          = thread_off - 1,
 +    S8_off             = -14,
++    F24_off            = -15,
++    F25_off            = -16,
++    F26_off            = -17,
++    F27_off            = -18,
++    F28_off            = -19,
++    F29_off            = -20,
++    F30_off            = -21,
++    F31_off            = -22,
++    total_off          = F31_off,
 +  };
 +
 +  address generate_call_stub(address& return_address) {
@@ -44579,6 +46152,15 @@ index 0000000000..f784092519
 +    __ st_d(A7, FP, thread_off * wordSize);
 +    __ st_d(S8, FP, S8_off * wordSize);
 +
++    __ fst_d(F24, FP, F24_off * wordSize);
++    __ fst_d(F25, FP, F25_off * wordSize);
++    __ fst_d(F26, FP, F26_off * wordSize);
++    __ fst_d(F27, FP, F27_off * wordSize);
++    __ fst_d(F28, FP, F28_off * wordSize);
++    __ fst_d(F29, FP, F29_off * wordSize);
++    __ fst_d(F30, FP, F30_off * wordSize);
++    __ fst_d(F31, FP, F31_off * wordSize);
++
 +    __ li(S8, (long)Interpreter::dispatch_table(itos));
 +
 +#ifdef OPT_THREAD
@@ -44670,6 +46252,15 @@ index 0000000000..f784092519
 +    __ ld_d(S5, FP, S5_off * wordSize);
 +    __ ld_d(S6, FP, S6_off * wordSize);
 +
++    __ fld_d(F24, FP, F24_off * wordSize);
++    __ fld_d(F25, FP, F25_off * wordSize);
++    __ fld_d(F26, FP, F26_off * wordSize);
++    __ fld_d(F27, FP, F27_off * wordSize);
++    __ fld_d(F28, FP, F28_off * wordSize);
++    __ fld_d(F29, FP, F29_off * wordSize);
++    __ fld_d(F30, FP, F30_off * wordSize);
++    __ fld_d(F31, FP, F31_off * wordSize);
++
 +    __ leave();
 +
 +    // return
@@ -44814,7 +46405,6 @@ index 0000000000..f784092519
 +  address generate_verify_oop() {
 +    StubCodeMark mark(this, "StubRoutines", "verify_oop");
 +    address start = __ pc();
-+    __ reinit_heapbase();
 +    __ verify_oop_subroutine();
 +    address end = __ pc();
 +    return start;
@@ -47673,6 +49263,25 @@ index 0000000000..f784092519
 +    return start;
 +  }
 +
++  address generate_mulAdd() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "mulAdd");
++
++    address entry = __ pc();
++
++    const Register out     = A0;
++    const Register in      = A1;
++    const Register offset  = A2;
++    const Register len     = A3;
++    const Register k       = A4;
++
++    __ block_comment("Entry:");
++    __ mul_add(out, in, offset, len, k);
++    __ jr(RA);
++
++    return entry;
++  }
++
 +  // Arguments:
 +  //
 +  // Inputs:
@@ -48320,6 +49929,18 @@ index 0000000000..f784092519
 +    return start;
 +  }
 +
++  address generate_dsin_dcos(bool isCos) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", isCos ? "libmDcos" : "libmDsin");
++    address start = __ pc();
++    __ generate_dsin_dcos(isCos, (address)StubRoutines::la::_npio2_hw,
++                                 (address)StubRoutines::la::_two_over_pi,
++                                 (address)StubRoutines::la::_pio2,
++                                 (address)StubRoutines::la::_dsin_coef,
++                                 (address)StubRoutines::la::_dcos_coef);
++    return start;
++  }
++
 +  // add a function to implement SafeFetch32 and SafeFetchN
 +  void generate_safefetch(const char* name, int size, address* entry,
 +                          address* fault_pc, address* continuation_pc) {
@@ -49066,6 +50687,16 @@ index 0000000000..f784092519
 +      generate_throw_exception("delayed StackOverflowError throw_exception",
 +                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
 +                               false);
++
++    if (UseCRC32Intrinsics) {
++      // set table address before stub generation which use it
++      StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table;
++      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
++    }
++
++    if (UseCRC32CIntrinsics) {
++      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
++    }
 +  }
 +
 +  void generate_all() {
@@ -49092,6 +50723,14 @@ index 0000000000..f784092519
 +    generate_arraycopy_stubs();
 +#endif
 +
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dsin)) {
++      StubRoutines::_dsin = generate_dsin_dcos(/* isCos = */ false);
++    }
++
++    if (UseLSX && vmIntrinsics::is_intrinsic_available(vmIntrinsics::_dcos)) {
++      StubRoutines::_dcos = generate_dsin_dcos(/* isCos = */ true);
++    }
++
 +    // Safefetch stubs.
 +    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
 +                                                       &StubRoutines::_safefetch32_fault_pc,
@@ -49100,6 +50739,11 @@ index 0000000000..f784092519
 +                                                       &StubRoutines::_safefetchN_fault_pc,
 +                                                       &StubRoutines::_safefetchN_continuation_pc);
 +
++#ifdef COMPILER2
++    if (UseMulAddIntrinsic) {
++      StubRoutines::_mulAdd = generate_mulAdd();
++    }
++
 +    if (UseMontgomeryMultiplyIntrinsic) {
 +      StubCodeMark mark(this, "StubRoutines", "montgomeryMultiply");
 +      MontgomeryMultiplyGenerator g(_masm, false /* squaring */);
@@ -49113,6 +50757,7 @@ index 0000000000..f784092519
 +      // because it's faster for the sizes of modulus we care about.
 +      StubRoutines::_montgomerySquare = g.generate_multiply();
 +    }
++#endif
 +
 +    if (UseAESIntrinsics) {
 +      StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(false);
@@ -49128,16 +50773,6 @@ index 0000000000..f784092519
 +    if (UseSHA256Intrinsics) {
 +      generate_sha256_implCompress("sha256_implCompress", StubRoutines::_sha256_implCompress, StubRoutines::_sha256_implCompressMB);
 +    }
-+
-+    if (UseCRC32Intrinsics) {
-+      // set table address before stub generation which use it
-+      StubRoutines::_crc_table_adr = (address)StubRoutines::la::_crc_table;
-+      StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32();
-+    }
-+
-+    if (UseCRC32CIntrinsics) {
-+      StubRoutines::_updateBytesCRC32C = generate_updateBytesCRC32C();
-+    }
 +  }
 +
 + public:
@@ -49155,13 +50790,13 @@ index 0000000000..f784092519
 +}
 diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp
 new file mode 100644
-index 0000000000..6b6373c758
+index 0000000000..0ab07e1e9e
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch.hpp
-@@ -0,0 +1,60 @@
+@@ -0,0 +1,67 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -49184,8 +50819,8 @@ index 0000000000..6b6373c758
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
-+#define CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
++#ifndef CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
 +
 +// This file holds the platform specific parts of the StubRoutines
 +// definition. See stubRoutines.hpp for a description on how to
@@ -49210,6 +50845,13 @@ index 0000000000..6b6373c758
 +  // stack. The variable holds that location.
 +  static address _call_stub_compiled_return;
 +  static juint   _crc_table[];
++  // begin trigonometric tables block. See comments in .cpp file
++  static juint    _npio2_hw[];
++  static jdouble   _two_over_pi[];
++  static jdouble   _pio2[];
++  static jdouble   _dsin_coef[];
++  static jdouble  _dcos_coef[];
++  // end trigonometric tables block
 +
 +public:
 +  // Call back points for traps in compiled code
@@ -49218,283 +50860,197 @@ index 0000000000..6b6373c758
 +
 +};
 +
-+#endif // CPU_LOONGARCH_VM_STUBROUTINES_LOONGARCH_64_HPP
++#endif // CPU_LOONGARCH_STUBROUTINES_LOONGARCH_64_HPP
 diff --git a/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
 new file mode 100644
-index 0000000000..51c627c786
+index 0000000000..1a6ea3bcde
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/stubRoutines_loongarch_64.cpp
-@@ -0,0 +1,264 @@
-+/*
-+ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+#include "precompiled.hpp"
-+#include "runtime/deoptimization.hpp"
-+#include "runtime/frame.inline.hpp"
-+#include "runtime/stubRoutines.hpp"
-+#include "runtime/thread.inline.hpp"
-+
-+// a description of how to extend it, see the stubRoutines.hpp file.
-+
-+//find the last fp value
-+address StubRoutines::la::_call_stub_compiled_return                       = NULL;
-+
-+/**
-+ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
-+ */
-+ATTRIBUTE_ALIGNED(4096) juint StubRoutines::la::_crc_table[] =
-+{
-+    // Table 0
-+    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
-+    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
-+    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
-+    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
-+    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
-+    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
-+    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
-+    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
-+    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
-+    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
-+    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
-+    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
-+    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
-+    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
-+    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
-+    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
-+    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
-+    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
-+    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
-+    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
-+    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
-+    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
-+    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
-+    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
-+    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
-+    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
-+    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
-+    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
-+    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
-+    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
-+    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
-+    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
-+    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
-+    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
-+    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
-+    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
-+    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
-+    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
-+    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
-+    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
-+    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
-+    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
-+    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
-+    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
-+    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
-+    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
-+    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
-+    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
-+    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
-+    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
-+    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
-+    0x2d02ef8dUL,
-+
-+    // Table 1
-+    0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
-+    0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
-+    0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
-+    0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
-+    0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
-+    0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
-+    0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
-+    0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
-+    0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
-+    0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
-+    0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
-+    0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
-+    0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
-+    0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
-+    0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
-+    0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
-+    0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
-+    0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
-+    0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
-+    0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
-+    0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
-+    0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
-+    0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
-+    0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
-+    0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
-+    0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
-+    0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
-+    0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
-+    0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
-+    0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
-+    0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
-+    0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
-+    0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
-+    0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
-+    0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
-+    0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
-+    0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
-+    0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
-+    0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
-+    0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
-+    0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
-+    0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
-+    0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
-+    0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
-+    0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
-+    0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
-+    0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
-+    0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
-+    0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
-+    0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
-+    0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
-+    0x9324fd72UL,
-+
-+    // Table 2
-+    0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
-+    0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
-+    0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
-+    0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
-+    0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
-+    0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
-+    0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
-+    0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
-+    0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
-+    0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
-+    0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
-+    0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
-+    0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
-+    0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
-+    0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
-+    0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
-+    0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
-+    0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
-+    0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
-+    0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
-+    0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
-+    0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
-+    0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
-+    0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
-+    0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
-+    0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
-+    0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
-+    0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
-+    0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
-+    0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
-+    0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
-+    0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
-+    0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
-+    0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
-+    0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
-+    0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
-+    0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
-+    0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
-+    0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
-+    0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
-+    0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
-+    0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
-+    0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
-+    0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
-+    0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
-+    0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
-+    0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
-+    0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
-+    0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
-+    0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
-+    0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
-+    0xbe9834edUL,
-+
-+    // Table 3
-+    0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
-+    0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
-+    0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
-+    0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
-+    0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
-+    0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
-+    0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
-+    0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
-+    0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
-+    0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
-+    0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
-+    0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
-+    0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
-+    0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
-+    0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
-+    0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
-+    0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
-+    0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
-+    0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
-+    0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
-+    0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
-+    0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
-+    0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
-+    0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
-+    0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
-+    0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
-+    0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
-+    0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
-+    0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
-+    0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
-+    0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
-+    0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
-+    0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
-+    0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
-+    0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
-+    0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
-+    0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
-+    0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
-+    0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
-+    0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
-+    0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
-+    0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
-+    0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
-+    0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
-+    0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
-+    0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
-+    0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
-+    0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
-+    0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
-+    0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
-+    0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
-+    0xde0506f1UL,
-+    // Constants for Neon CRC232 implementation
-+    // k3 = 0x78ED02D5 = x^288 mod poly - bit reversed
-+    // k4 = 0xED627DAE = x^256 mod poly - bit reversed
-+    0x78ED02D5UL, 0xED627DAEUL,         // k4:k3
-+    0xED78D502UL, 0x62EDAE7DUL,         // byte swap
-+    0x02D578EDUL, 0x7DAEED62UL,         // word swap
-+    0xD502ED78UL, 0xAE7D62EDUL,         // byte swap of word swap
-+};
-diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
-new file mode 100644
-index 0000000000..05791e1a0c
---- /dev/null
-+++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
-@@ -0,0 +1,2214 @@
+@@ -0,0 +1,178 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::la::_call_stub_compiled_return                       = NULL;
++
++/**
++ *  crc_table[] from jdk/src/share/native/java/util/zip/zlib-1.2.5/crc32.h
++ */
++juint StubRoutines::la::_crc_table[] =
++{
++    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
++    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
++    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
++    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
++    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
++    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
++    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
++    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
++    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
++    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
++    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
++    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
++    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
++    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
++    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
++    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
++    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
++    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
++    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
++    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
++    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
++    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
++    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
++    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
++    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
++    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
++    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
++    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
++    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
++    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
++    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
++    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
++    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
++    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
++    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
++    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
++    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
++    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
++    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
++    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
++    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
++    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
++    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
++    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
++    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
++    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
++    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
++    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
++    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
++    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
++    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
++    0x2d02ef8dUL
++};
++
++ATTRIBUTE_ALIGNED(64) juint StubRoutines::la::_npio2_hw[] = {
++    // first, various coefficient values: 0.5, invpio2, pio2_1, pio2_1t, pio2_2,
++    // pio2_2t, pio2_3, pio2_3t
++    // This is a small optimization wich keeping double[8] values in int[] table
++    // to have less address calculation instructions
++    //
++    // invpio2:  53 bits of 2/pi (enough for cases when trigonometric argument is small)
++    // pio2_1:   first  33 bit of pi/2
++    // pio2_1t:  pi/2 - pio2_1
++    // pio2_2:   second 33 bit of pi/2
++    // pio2_2t:  pi/2 - (pio2_1+pio2_2)
++    // pio2_3:   third  33 bit of pi/2
++    // pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
++    0x00000000, 0x3fe00000, // 0.5
++    0x6DC9C883, 0x3FE45F30, // invpio2 = 6.36619772367581382433e-01
++    0x54400000, 0x3FF921FB, // pio2_1 = 1.57079632673412561417e+00
++    0x1A626331, 0x3DD0B461, // pio2_1t = 6.07710050650619224932e-11
++    0x1A600000, 0x3DD0B461, // pio2_2 = 6.07710050630396597660e-11
++    0x2E037073, 0x3BA3198A, // pio2_2t = 2.02226624879595063154e-21
++    0x2E000000, 0x3BA3198A, // pio2_3 = 2.02226624871116645580e-21
++    0x252049C1, 0x397B839A, // pio2_3t = 8.47842766036889956997e-32
++    // now, npio2_hw itself
++    0x3FF921FB, 0x400921FB, 0x4012D97C, 0x401921FB, 0x401F6A7A, 0x4022D97C,
++    0x4025FDBB, 0x402921FB, 0x402C463A, 0x402F6A7A, 0x4031475C, 0x4032D97C,
++    0x40346B9C, 0x4035FDBB, 0x40378FDB, 0x403921FB, 0x403AB41B, 0x403C463A,
++    0x403DD85A, 0x403F6A7A, 0x40407E4C, 0x4041475C, 0x4042106C, 0x4042D97C,
++    0x4043A28C, 0x40446B9C, 0x404534AC, 0x4045FDBB, 0x4046C6CB, 0x40478FDB,
++    0x404858EB, 0x404921FB
++};
++
++// Coefficients for sin(x) polynomial approximation: S1..S6.
++// See kernel_sin comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dsin_coef[] = {
++    -1.66666666666666324348e-01, // 0xBFC5555555555549
++     8.33333333332248946124e-03, // 0x3F8111111110F8A6
++    -1.98412698298579493134e-04, // 0xBF2A01A019C161D5
++     2.75573137070700676789e-06, // 0x3EC71DE357B1FE7D
++    -2.50507602534068634195e-08, // 0xBE5AE5E68A2B9CEB
++     1.58969099521155010221e-10  // 0x3DE5D93A5ACFD57C
++};
++
++// Coefficients for cos(x) polynomial approximation: C1..C6.
++// See kernel_cos comments in macroAssembler_loongarch64_trig.cpp for details
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_dcos_coef[] = {
++     4.16666666666666019037e-02, // c0x3FA555555555554C
++    -1.38888888888741095749e-03, // 0xBF56C16C16C15177
++     2.48015872894767294178e-05, // 0x3EFA01A019CB1590
++    -2.75573143513906633035e-07, // 0xBE927E4F809C52AD
++     2.08757232129817482790e-09, // 0x3E21EE9EBDB4B1C4
++    -1.13596475577881948265e-11  // 0xBDA8FAE9BE8838D4
++};
++
++// Table of constants for 2/pi, 396 Hex digits (476 decimal) of 2/pi.
++// Used in cases of very large argument. 396 hex digits is enough to support
++// required precision.
++// Converted to double to avoid unnecessary conversion in code
++// NOTE: table looks like original int table: {0xA2F983, 0x6E4E44,...} with
++//       only (double) conversion added
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_two_over_pi[] = {
++  (double)0xA2F983, (double)0x6E4E44, (double)0x1529FC, (double)0x2757D1, (double)0xF534DD, (double)0xC0DB62,
++  (double)0x95993C, (double)0x439041, (double)0xFE5163, (double)0xABDEBB, (double)0xC561B7, (double)0x246E3A,
++  (double)0x424DD2, (double)0xE00649, (double)0x2EEA09, (double)0xD1921C, (double)0xFE1DEB, (double)0x1CB129,
++  (double)0xA73EE8, (double)0x8235F5, (double)0x2EBB44, (double)0x84E99C, (double)0x7026B4, (double)0x5F7E41,
++  (double)0x3991D6, (double)0x398353, (double)0x39F49C, (double)0x845F8B, (double)0xBDF928, (double)0x3B1FF8,
++  (double)0x97FFDE, (double)0x05980F, (double)0xEF2F11, (double)0x8B5A0A, (double)0x6D1F6D, (double)0x367ECF,
++  (double)0x27CB09, (double)0xB74F46, (double)0x3F669E, (double)0x5FEA2D, (double)0x7527BA, (double)0xC7EBE5,
++  (double)0xF17B3D, (double)0x0739F7, (double)0x8A5292, (double)0xEA6BFB, (double)0x5FB11F, (double)0x8D5D08,
++  (double)0x560330, (double)0x46FC7B, (double)0x6BABF0, (double)0xCFBC20, (double)0x9AF436, (double)0x1DA9E3,
++  (double)0x91615E, (double)0xE61B08, (double)0x659985, (double)0x5F14A0, (double)0x68408D, (double)0xFFD880,
++  (double)0x4D7327, (double)0x310606, (double)0x1556CA, (double)0x73A8C9, (double)0x60E27B, (double)0xC08C6B,
++};
++
++// Pi over 2 value
++ATTRIBUTE_ALIGNED(64) jdouble StubRoutines::la::_pio2[] = {
++  1.57079625129699707031e+00, // 0x3FF921FB40000000
++  7.54978941586159635335e-08, // 0x3E74442D00000000
++  5.39030252995776476554e-15, // 0x3CF8469880000000
++  3.28200341580791294123e-22, // 0x3B78CC5160000000
++  1.27065575308067607349e-29, // 0x39F01B8380000000
++  1.22933308981111328932e-36, // 0x387A252040000000
++  2.73370053816464559624e-44, // 0x36E3822280000000
++  2.16741683877804819444e-51, // 0x3569F31D00000000
++};
+diff --git a/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
+new file mode 100644
+index 0000000000..ee2e522466
+--- /dev/null
++++ b/src/hotspot/cpu/loongarch/templateInterpreterGenerator_loongarch.cpp
+@@ -0,0 +1,2224 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -49525,6 +51081,7 @@ index 0000000000..05791e1a0c
 +#include "interpreter/bytecodeHistogram.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
 +#include "interpreter/templateInterpreterGenerator.hpp"
 +#include "interpreter/templateTable.hpp"
 +#include "oops/arrayOop.hpp"
@@ -49978,15 +51535,12 @@ index 0000000000..05791e1a0c
 +#endif // ASSERT
 +  // Restore bcp under the assumption that the current frame is still
 +  // interpreted
-+  // FIXME: please change the func restore_bcp
-+  // S0 is the conventional register for bcp
 +  __ restore_bcp();
 +
 +  // expression stack must be empty before entering the VM if an
 +  // exception happened
 +  __ empty_expression_stack();
 +  // throw exception
-+  // FIXME: why do not pass parameter thread ?
 +  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
 +  return entry;
 +}
@@ -50094,6 +51648,32 @@ index 0000000000..05791e1a0c
 +  __ st_d(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
 +  __ restore_bcp();
 +  __ restore_locals();
++
++#if INCLUDE_JVMCI
++  // Check if we need to take lock at entry of synchronized method.  This can
++  // only occur on method entry so emit it only for vtos with step 0.
++  if (EnableJVMCI && state == vtos && step == 0) {
++    Label L;
++    __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    __ beqz(AT, L);
++    // Clear flag.
++    __ st_b(R0, Address(TREG, JavaThread::pending_monitorenter_offset()));
++    // Take lock.
++    lock_method();
++    __ bind(L);
++  } else {
++#ifdef ASSERT
++    if (EnableJVMCI) {
++      Label L;
++      __ ld_b(AT, Address(TREG, JavaThread::pending_monitorenter_offset()));
++      __ beqz(AT, L);
++      __ stop("unexpected pending monitor in deopt entry");
++      __ bind(L);
++    }
++#endif
++  }
++#endif
++
 +  // handle exceptions
 +  {
 +    Label L;
@@ -50680,7 +52260,7 @@ index 0000000000..05791e1a0c
 +    Label L, Lstatic;
 +    __ ld_d(t,method,in_bytes(Method::const_offset()));
 +    __ ld_hu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
-+    // LOONGARCH ABI: caller does not reserve space for the register auguments.
++    // LoongArch ABI: caller does not reserve space for the register auguments.
 +    // A0 and A1(if needed)
 +    __ ld_w(AT, Rmethod, in_bytes(Method::access_flags_offset()));
 +    __ andi(AT, AT, JVM_ACC_STATIC);
@@ -50743,7 +52323,7 @@ index 0000000000..05791e1a0c
 +  //
 +  // if native function is static, and its second parameter has type length of double word,
 +  // and first parameter has type length of word, we have to reserve one word
-+  // for the first parameter, according to loongarch abi.
++  // for the first parameter, according to LoongArch abi.
 +  // if native function is not static, and its third parameter has type length of double word,
 +  // and second parameter has type length of word, we have to reserve one word for the second
 +  // parameter.
@@ -51612,20 +53192,6 @@ index 0000000000..05791e1a0c
 +  generate_and_dispatch(t);
 +}
 +
-+
-+/*
-+//-----------------------------------------------------------------------------
-+// Generation of individual instructions
-+
-+// helpers for generate_and_dispatch
-+
-+
-+InterpreterGenerator::InterpreterGenerator(StubQueue* code)
-+  : TemplateInterpreterGenerator(code) {
-+   generate_all(); // down here so it can be "virtual"
-+}
-+*/
-+
 +//-----------------------------------------------------------------------------
 +
 +// Non-product code
@@ -51711,13 +53277,13 @@ index 0000000000..05791e1a0c
 +#endif // !PRODUCT
 diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp
 new file mode 100644
-index 0000000000..a17ea5db5f
+index 0000000000..ddb38faf44
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch.hpp
 @@ -0,0 +1,43 @@
 +/*
 + * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -51740,8 +53306,8 @@ index 0000000000..a17ea5db5f
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
-+#define CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
++#ifndef CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
++#define CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
 +
 +  static void prepare_invoke(int byte_no,
 +                             Register method,         // linked method (or i-klass)
@@ -51757,16 +53323,16 @@ index 0000000000..a17ea5db5f
 +  static void index_check(Register array, Register index);
 +  static void index_check_without_pop(Register array, Register index);
 +
-+#endif // CPU_LOONGARCH_VM_TEMPLATETABLE_LOONGARCH_64_HPP
++#endif // CPU_LOONGARCH_TEMPLATETABLE_LOONGARCH_64_HPP
 diff --git a/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
 new file mode 100644
-index 0000000000..c259cb69b3
+index 0000000000..8ad7c5f76e
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/templateTable_loongarch_64.cpp
-@@ -0,0 +1,4335 @@
+@@ -0,0 +1,4147 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -51793,12 +53359,14 @@ index 0000000000..c259cb69b3
 +#include "asm/macroAssembler.hpp"
 +#include "interpreter/interpreter.hpp"
 +#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
 +#include "interpreter/templateTable.hpp"
 +#include "memory/universe.hpp"
 +#include "oops/methodData.hpp"
 +#include "oops/objArrayKlass.hpp"
 +#include "oops/oop.inline.hpp"
 +#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
 +#include "runtime/sharedRuntime.hpp"
 +#include "runtime/stubRoutines.hpp"
 +#include "runtime/synchronizer.hpp"
@@ -51820,7 +53388,7 @@ index 0000000000..c259cb69b3
 +// Platform-dependent initialization
 +
 +void TemplateTable::pd_initialize() {
-+  // No loongarch specific initialization
++  // No LoongArch specific initialization
 +}
 +
 +// Address computation: local variables
@@ -51851,8 +53419,8 @@ index 0000000000..c259cb69b3
 +static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
 +static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
 +
-+// At top of Java expression stack which may be different than sp().  It
-+// isn't for category 1 objects.
++// At top of Java expression stack which may be different than sp().
++// It isn't for category 1 objects.
 +static inline Address at_tos   () {
 +  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
 +  return tos;
@@ -52307,7 +53875,6 @@ index 0000000000..c259cb69b3
 +}
 +
 +// we compute the actual local variable address here
-+// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
 +void TemplateTable::locals_index(Register reg, int offset) {
 +  __ ld_bu(reg, at_bcp(offset));
 +  __ slli_d(reg, reg, Address::times_8);
@@ -52480,7 +54047,6 @@ index 0000000000..c259cb69b3
 +  // check index
 +  Label ok;
 +  __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes());
-+#ifndef OPT_RANGECHECK
 +  __ bltu(index, AT, ok);
 +
 +  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
@@ -52488,99 +54054,34 @@ index 0000000000..c259cb69b3
 +  if (A2 != index) __ move(A2, index);
 +  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
 +  __ bind(ok);
-+#else
-+  __ ld_w(AT, array, arrayOopDesc::length_offset_in_bytes());
-+  __ move(A2, index);
-+  __ stop("LA not implemented yet");
-+  //__ tgeu(A2, AT, 29);
-+#endif
 +}
 +
 +void TemplateTable::iaload() {
 +  transition(itos, itos);
-+  if(UseBoundCheckInstruction) {
-+    __ pop(SSR); //SSR:array    FSR： index
-+    __ alsl_d(FSR, FSR, SSR, 1);
-+    __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
-+
-+    __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
-+    __ alsl_d(AT, AT, SSR, 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
-+
-+    __ warn("iaload Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gslwle(FSR, FSR, AT);
-+  } else {
-+     index_check(SSR, FSR);
-+     __ alsl_d(FSR, FSR, SSR, 1);
-+     __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
-+  }
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, 1);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
 +}
 +
 +void TemplateTable::laload() {
 +  transition(itos, ltos);
-+  if(UseBoundCheckInstruction) {
-+    __ pop(SSR); //SSR:array    FSR： index
-+    __ alsl_d(FSR, FSR, SSR, Address::times_8 - 1);
-+    __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
-+
-+    __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
-+    __ alsl_d(AT, AT, SSR, Address::times_8 - 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
-+
-+    __ warn("laload Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gsldle(FSR, FSR, AT);
-+  } else {
-+    index_check(SSR, FSR);
-+    __ alsl_d(T4, FSR, SSR, Address::times_8 - 1);
-+    __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
-+  }
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, Address::times_8 - 1);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T4, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
 +}
 +
 +void TemplateTable::faload() {
 +  transition(itos, ftos);
-+  if(UseBoundCheckInstruction) {
-+    __ pop(SSR); //SSR:array    FSR： index
-+    __ shl(FSR, 2);
-+    __ add_d(FSR, SSR, FSR);
-+    __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
-+
-+    __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
-+    __ shl(AT, 2);
-+    __ add_d(AT, SSR, AT);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
-+
-+    __ warn("faload Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gslwlec1(FSF, FSR, AT);
-+  } else {
-+    index_check(SSR, FSR);
-+    __ shl(FSR, 2);
-+    __ add_d(FSR, SSR, FSR);
-+    __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
-+  }
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_4 - 1);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
 +}
 +
 +void TemplateTable::daload() {
 +  transition(itos, dtos);
-+  if(UseBoundCheckInstruction) {
-+    __ pop(SSR); //SSR:array    FSR： index
-+    __ alsl_d(FSR, FSR, SSR, 2);
-+    __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
-+
-+    __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
-+    __ alsl_d(AT, AT, SSR, 2);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
-+
-+    __ warn("daload Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gsldlec1(FSF, FSR, AT);
-+  } else {
-+    index_check(SSR, FSR);
-+    __ alsl_d(T4, FSR, SSR, 2);
-+    __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
-+  }
++  index_check(SSR, FSR);
++  __ alsl_d(T4, FSR, SSR, 2);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T4, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
 +}
 +
 +void TemplateTable::aaload() {
@@ -52596,23 +54097,9 @@ index 0000000000..c259cb69b3
 +
 +void TemplateTable::baload() {
 +  transition(itos, itos);
-+  if(UseBoundCheckInstruction) {
-+    __ pop(SSR); //SSR:array   FSR:index
-+    __ add_d(FSR, SSR, FSR);
-+    __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
-+
-+    __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes());
-+    __ add_d(AT, SSR, AT);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
-+
-+    __ warn("baload Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gslble(FSR, FSR, AT);
-+  } else {
-+    index_check(SSR, FSR);
-+    __ add_d(FSR, SSR, FSR);
-+    __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
-+  }
++  index_check(SSR, FSR);
++  __ add_d(FSR, SSR, FSR);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
 +}
 +
 +void TemplateTable::caload() {
@@ -52637,23 +54124,9 @@ index 0000000000..c259cb69b3
 +
 +void TemplateTable::saload() {
 +  transition(itos, itos);
-+  if(UseBoundCheckInstruction) {
-+    __ pop(SSR); //SSR:array    FSR： index
-+    __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
-+    __ addi_d(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
-+
-+    __ ld_w(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
-+    __ alsl_d(AT, AT, SSR, Address::times_2 - 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
-+
-+    __ warn("saload Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gslhle(FSR, FSR, AT);
-+  } else {
-+    index_check(SSR, FSR);
-+    __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
-+    __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
-+  }
++  index_check(SSR, FSR);
++  __ alsl_d(FSR, FSR, SSR, Address::times_2 - 1);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
 +}
 +
 +void TemplateTable::iload(int n) {
@@ -52831,92 +54304,36 @@ index 0000000000..c259cb69b3
 +void TemplateTable::iastore() {
 +  transition(itos, vtos);
 +  __ pop_i(SSR);   // T2: array  SSR: index
-+  if(UseBoundCheckInstruction) {
-+    __ pop_ptr(T2);
-+    __ alsl_d(SSR, SSR, T2, Address::times_4 - 1);
-+    __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
-+
-+    __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes());
-+    __ alsl_d(AT, AT, T2, Address::times_4 - 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
-+
-+    __ warn("iastore Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gsswle(FSR, SSR, AT);
-+  } else {
-+    index_check(T2, SSR);  // prefer index in SSR
-+    __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
-+    __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
-+  }
++  index_check(T2, SSR);  // prefer index in SSR
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
 +}
 +
 +// used register T2, T3
 +void TemplateTable::lastore() {
 +  transition(ltos, vtos);
 +  __ pop_i (T2);
-+  if(UseBoundCheckInstruction) {
-+    __ pop_ptr(T3);
-+    __ alsl_d(T2, T2, T3, Address::times_8 - 1);
-+    __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
-+
-+    __ ld_w(AT, T3, arrayOopDesc::length_offset_in_bytes());
-+    __ alsl_d(AT, AT, T3, Address::times_8 - 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
-+
-+    __ warn("lastore Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gssdle(FSR, T2, AT);
-+  } else {
-+    index_check(T3, T2);
-+    __ alsl_d(T3, T2, T3, Address::times_8 - 1);
-+    __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
-+  }
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
 +}
 +
 +// used register T2
 +void TemplateTable::fastore() {
 +  transition(ftos, vtos);
 +  __ pop_i(SSR);
-+  if(UseBoundCheckInstruction) {
-+    __ pop_ptr(T2);
-+    __ alsl_d(SSR, SSR, T2, Address::times_4 - 1);
-+    __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
-+
-+    __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes());
-+    __ alsl_d(AT, AT, T2, Address::times_4 - 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
-+
-+    __ warn("fastore Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gsswlec1(FSF, SSR, AT);
-+  } else {
-+    index_check(T2, SSR);
-+    __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
-+    __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
-+  }
++  index_check(T2, SSR);
++  __ alsl_d(T2, SSR, T2, Address::times_4 - 1);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
 +}
 +
 +// used register T2, T3
 +void TemplateTable::dastore() {
 +  transition(dtos, vtos);
 +  __ pop_i (T2);
-+  if(UseBoundCheckInstruction) {
-+    __ pop_ptr(T3);
-+    __ alsl_d(T2, T2, T3, Address::times_8 - 1);
-+    __ addi_d(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
-+
-+    __ ld_w(AT, T3, arrayOopDesc::length_offset_in_bytes());
-+    __ alsl_d(AT, AT, T3, Address::times_8 - 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
-+
-+    __ warn("dastore Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gssdlec1(FSF, T2, AT);
-+  } else {
-+    index_check(T3, T2);
-+    __ alsl_d(T3, T2, T3, Address::times_8 - 1);
-+    __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
-+  }
++  index_check(T3, T2);
++  __ alsl_d(T3, T2, T3, Address::times_8 - 1);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
 +}
 +
 +// used register : T2, T3, T8
@@ -52971,61 +54388,32 @@ index 0000000000..c259cb69b3
 +void TemplateTable::bastore() {
 +  transition(itos, vtos);
 +  __ pop_i(SSR);
-+  if(UseBoundCheckInstruction) {
-+    guarantee(false, "unimplemented yet!");
-+    __ pop_ptr(T2);
-+    __ add_d(SSR, T2, SSR);
-+    __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
-+
-+    __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes());
-+    __ add_d(AT, T2, AT);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
-+
-+    __ warn("bastore Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gssble(FSR, SSR, AT);
-+  } else {
-+    index_check(T2, SSR);
++  index_check(T2, SSR);
 +
-+    // Need to check whether array is boolean or byte
-+    // since both types share the bastore bytecode.
-+    __ load_klass(T4, T2);
-+    __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset()));
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(T4, T2);
++  __ ld_w(T4, T4, in_bytes(Klass::layout_helper_offset()));
 +
-+    int diffbit = Klass::layout_helper_boolean_diffbit();
-+    __ li(AT, diffbit);
++  int diffbit = Klass::layout_helper_boolean_diffbit();
++  __ li(AT, diffbit);
 +
-+    Label L_skip;
-+    __ andr(AT, T4, AT);
-+    __ beq(AT, R0, L_skip);
-+    __ andi(FSR, FSR, 0x1);
-+    __ bind(L_skip);
++  Label L_skip;
++  __ andr(AT, T4, AT);
++  __ beq(AT, R0, L_skip);
++  __ andi(FSR, FSR, 0x1);
++  __ bind(L_skip);
 +
-+    __ add_d(SSR, T2, SSR);
-+    __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
-+  }
++  __ add_d(SSR, T2, SSR);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
 +}
 +
 +void TemplateTable::castore() {
 +  transition(itos, vtos);
 +  __ pop_i(SSR);
-+  if(UseBoundCheckInstruction) {
-+    __ pop_ptr(T2);
-+    __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
-+    __ addi_d(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
-+
-+    __ ld_w(AT, T2, arrayOopDesc::length_offset_in_bytes());
-+    __ alsl_d(AT, AT, T2, Address::times_2 - 1);
-+    __ addi_d(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
-+
-+    __ warn("castore Unimplemented yet");
-+    __ stop("LA not implemented yet");
-+    //__ gsshle(FSR, SSR, AT);
-+  } else {
-+    index_check(T2, SSR);
-+    __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
-+    __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
-+  }
++  index_check(T2, SSR);
++  __ alsl_d(SSR, SSR, T2, Address::times_2 - 1);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
 +}
 +
 +void TemplateTable::sastore() {
@@ -53756,8 +55144,8 @@ index 0000000000..c259cb69b3
 +
 +      // pop the interpreter frame
 +      __ ld_d(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
-+      //FIXME, shall we keep the return address on the stack?
-+      __ leave();                                // remove frame anchor
++      // remove frame anchor
++      __ leave();
 +      __ move(LVP, RA);
 +      __ move(SP, A7);
 +
@@ -53765,7 +55153,7 @@ index 0000000000..c259cb69b3
 +      __ andr(SP , SP , AT);
 +
 +      // push the (possibly adjusted) return address
-+      //refer to osr_entry in c1_LIRAssembler_loongarch.cpp
++      // refer to osr_entry in c1_LIRAssembler_loongarch.cpp
 +      __ ld_d(AT, Rnext, nmethod::osr_entry_point_offset());
 +      __ jr(AT);
 +    }
@@ -54317,9 +55705,6 @@ index 0000000000..c259cb69b3
 +  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
 +  resolve_cache_and_index(byte_no, cache, index, index_size);
 +
-+  //assert(wordSize == 8, "adjust code below");
-+  // note we shift 4 not 2, for we get is the true inde
-+  // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
 +  __ alsl_d(AT, index, cache, Address::times_ptr - 1);
 +  __ ld_d(method, AT, method_offset);
 +
@@ -54353,8 +55738,8 @@ index 0000000000..c259cb69b3
 +
 +    // cache entry pointer
 +    __ addi_d(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
-+    __ shl(tmp3, LogBytesPerWord);
-+    __ add_d(tmp2, tmp2, tmp3);
++    __ alsl_d(tmp2, tmp3, tmp2, LogBytesPerWord - 1);
++
 +    if (is_static) {
 +      __ move(tmp1, R0);
 +    } else {
@@ -54363,10 +55748,9 @@ index 0000000000..c259cb69b3
 +    }
 +    // tmp1: object pointer or NULL
 +    // tmp2: cache entry pointer
-+    // tmp3: jvalue object on the stack
 +    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
 +                                       InterpreterRuntime::post_field_access),
-+               tmp1, tmp2, tmp3);
++               tmp1, tmp2);
 +    __ get_cache_and_index_at_bcp(cache, index, 1);
 +    __ bind(L1);
 +  }
@@ -54518,7 +55902,6 @@ index 0000000000..c259cb69b3
 +  __ li(AT, ltos);
 +  __ bne(flags, AT, notLong);
 +
-+  // FIXME : the load/store should be atomic, we have no simple method to do this in loongarch32
 +  // ltos
 +  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
 +  __ push(ltos);
@@ -54636,8 +56019,7 @@ index 0000000000..c259cb69b3
 +    }
 +    // cache entry pointer
 +    __ addi_d(tmp2, tmp2, in_bytes(cp_base_offset));
-+    __ shl(tmp4, LogBytesPerWord);
-+    __ add_d(tmp2, tmp2, tmp4);
++    __ alsl_d(tmp2, tmp4, tmp2, LogBytesPerWord - 1);
 +    // object (tos)
 +    __ move(tmp3, SP);
 +    // tmp1: object pointer set up above (NULL if static)
@@ -55096,7 +56478,6 @@ index 0000000000..c259cb69b3
 +      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
 +      break;
 +    case Bytecodes::_fast_agetfield:
-+      //add for compressedoops
 +      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
 +      __ verify_oop(FSR);
 +      break;
@@ -55368,7 +56749,7 @@ index 0000000000..c259cb69b3
 +void TemplateTable::fast_invokevfinal(int byte_no) {
 +  transition(vtos, vtos);
 +  assert(byte_no == f2_byte, "use this argument");
-+  __ stop("fast_invokevfinal not used on loongarch64");
++  __ stop("fast_invokevfinal not used on LoongArch64");
 +}
 +
 +// used registers : T0, T1, T2, T3, T1, A7
@@ -55552,7 +56933,6 @@ index 0000000000..c259cb69b3
 +   transition(vtos, vtos);
 +   assert(byte_no == f1_byte, "use this argument");
 +
-+   //const Register Rmethod   = T2;
 +   const Register T2_callsite = T2;
 +
 +   prepare_invoke(byte_no, Rmethod, T2_callsite);
@@ -55708,7 +57088,7 @@ index 0000000000..c259cb69b3
 +       __ bne(T1, FSR, loop); // dont clear header
 +    }
 +
-+    //klass in T3,
++    // klass in T3,
 +    // initialize object header only.
 +    __ bind(initialize_header);
 +    if (UseBiasedLocking) {
@@ -55748,7 +57128,7 @@ index 0000000000..c259cb69b3
 +void TemplateTable::newarray() {
 +  transition(itos, atos);
 +  __ ld_bu(A1, at_bcp(1));
-+  //type, count
++  // type, count
 +  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
 +  __ membar(__ StoreStore);
 +}
@@ -55815,7 +57195,6 @@ index 0000000000..c259cb69b3
 +  __ bind(resolved);
 +
 +  // get subklass in T2
-+  //add for compressedoops
 +  __ load_klass(T2, FSR);
 +  // Superklass in T3.  Subklass in T2.
 +  __ gen_subtype_check(T3, T2, ok_is_subtype);
@@ -55838,7 +57217,7 @@ index 0000000000..c259cb69b3
 +  __ bind(done);
 +}
 +
-+// i use T3 as cpool, T1 as tags, T2 as index
++// T3 as cpool, T1 as tags, T2 as index
 +// object always in FSR, superklass in T3, subklass in T2
 +void TemplateTable::instanceof() {
 +  transition(atos, itos);
@@ -55878,7 +57257,6 @@ index 0000000000..c259cb69b3
 +
 +  __ bind(resolved);
 +  // get subklass in T2
-+  //add for compressedoops
 +  __ load_klass(T2, FSR);
 +
 +  // Superklass in T3.  Subklass in T2.
@@ -56101,13 +57479,13 @@ index 0000000000..c259cb69b3
 +#endif // !CC_INTERP
 diff --git a/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp
 new file mode 100644
-index 0000000000..4a25cd760b
+index 0000000000..5b9f7b7898
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vmStructs_loongarch.hpp
-@@ -0,0 +1,68 @@
+@@ -0,0 +1,61 @@
 +/*
 + * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56130,22 +57508,15 @@ index 0000000000..4a25cd760b
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
 +
 +// These are the CPU-specific fields, types and integer
 +// constants required by the Serviceability Agent. This file is
 +// referenced by vmStructs.cpp.
 +
 +#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
-+                                                                                                                                     \
-+  /******************************/                                                                                                   \
-+  /* JavaCallWrapper            */                                                                                                   \
-+  /******************************/                                                                                                   \
-+  /******************************/                                                                                                   \
-+  /* JavaFrameAnchor            */                                                                                                   \
-+  /******************************/                                                                                                   \
-+  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                         \
 +                                                                                                                                     \
 +
 +  /* NOTE that we do not use the last_entry() macro here; it is used  */
@@ -56172,7 +57543,7 @@ index 0000000000..4a25cd760b
 +  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
 +  /* be present there)                                                       */
 +
-+#endif // CPU_LOONGARCH_VM_VMSTRUCTS_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_VMSTRUCTS_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.cpp
 new file mode 100644
 index 0000000000..eb8f075c71
@@ -56266,13 +57637,13 @@ index 0000000000..eb8f075c71
 +}
 diff --git a/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp
 new file mode 100644
-index 0000000000..9776f1b056
+index 0000000000..1a93123134
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vm_version_ext_loongarch.hpp
 @@ -0,0 +1,54 @@
 +/*
 + * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2018, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -56295,8 +57666,8 @@ index 0000000000..9776f1b056
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
 +
 +#include "runtime/vm_version.hpp"
 +#include "utilities/macros.hpp"
@@ -56323,13 +57694,13 @@ index 0000000000..9776f1b056
 +  static void initialize_cpu_information(void);
 +};
 +
-+#endif // CPU_LOONGARCH_VM_VM_VERSION_EXT_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_VM_VERSION_EXT_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
 new file mode 100644
-index 0000000000..6817f9ce8b
+index 0000000000..b954fa3e1a
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.cpp
-@@ -0,0 +1,437 @@
+@@ -0,0 +1,414 @@
 +/*
 + * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -56565,33 +57936,6 @@ index 0000000000..6817f9ce8b
 +  }
 +#endif
 +
-+  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
-+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
-+      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
-+    }
-+  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
-+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
-+      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
-+    }
-+  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
-+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
-+      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
-+    }
-+  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
-+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
-+      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
-+    }
-+  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
-+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
-+      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
-+    }
-+  } else {
-+    assert(false, "Should Not Reach Here, what is the cpu type?");
-+    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
-+      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
-+    }
-+  }
-+
 +  char buf[256];
 +
 +  // A note on the _features_string format:
@@ -56603,8 +57947,7 @@ index 0000000000..6817f9ce8b
 +  //   Furthermore, use one, and only one, separator space between features.
 +  //   Multiple spaces are considered separate tokens, messing up everything.
 +  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, "
-+    "0x%lx, fp_ver: %d, lvz_ver: %d, "
-+    "usesynclevel:%d",
++    "0x%lx, fp_ver: %d, lvz_ver: %d, ",
 +    (is_la64()             ?  "la64"  : ""),
 +    (is_la32()             ?  "la32"  : ""),
 +    (supports_lsx()        ?  ", lsx" : ""),
@@ -56623,12 +57966,11 @@ index 0000000000..6817f9ce8b
 +    (needs_ulsync()        ?  ", needs_ulsync": ""),
 +    _cpuid_info.cpucfg_info_id0.bits.PRID,
 +    _cpuid_info.cpucfg_info_id2.bits.FP_VER,
-+    _cpuid_info.cpucfg_info_id2.bits.LVZ_VER,
-+    UseSyncLevel);
++    _cpuid_info.cpucfg_info_id2.bits.LVZ_VER);
 +  _features_str = strdup(buf);
 +
 +  assert(!is_la32(), "Should Not Reach Here, what is the cpu type?");
-+  assert( is_la64(), "Should be loongarch64");
++  assert( is_la64(), "Should be LoongArch64");
 +
 +  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
 +    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
@@ -56729,12 +58071,18 @@ index 0000000000..6817f9ce8b
 +    }
 +  }
 +
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
++    FLAG_SET_DEFAULT(UseMulAddIntrinsic, true);
++  }
++
 +  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
 +    UseMontgomeryMultiplyIntrinsic = true;
 +  }
 +  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
 +    UseMontgomerySquareIntrinsic = true;
 +  }
++#endif
 +
 +  // This machine allows unaligned memory accesses
 +  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
@@ -56769,10 +58117,10 @@ index 0000000000..6817f9ce8b
 +}
 diff --git a/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp
 new file mode 100644
-index 0000000000..2cb8ec7fd3
+index 0000000000..8b5bc4a4c8
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vm_version_loongarch.hpp
-@@ -0,0 +1,290 @@
+@@ -0,0 +1,292 @@
 +/*
 + * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
@@ -56798,14 +58146,16 @@ index 0000000000..2cb8ec7fd3
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
++#define CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
 +
 +#include "runtime/abstract_vm_version.hpp"
 +#include "runtime/globals_extension.hpp"
 +#include "utilities/sizes.hpp"
 +
 +class VM_Version: public Abstract_VM_Version {
++  friend class JVMCIVMStructs;
++
 +public:
 +
 +  union LoongArch_Cpucfg_Id0 {
@@ -57062,16 +58412,16 @@ index 0000000000..2cb8ec7fd3
 +  static const char* cpu_features()           { return _features_str; }
 +};
 +
-+#endif // CPU_LOONGARCH_VM_VM_VERSION_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_VM_VERSION_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp
 new file mode 100644
-index 0000000000..b606d8550c
+index 0000000000..43caba5187
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.cpp
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,53 @@
 +/*
 + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -57104,15 +58454,17 @@ index 0000000000..b606d8550c
 +  Register reg = ::as_Register(0);
 +  int i;
 +  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
-+    regName[i++] = reg->name();
-+    regName[i++] = reg->name();
++    for (int j = 0 ; j < RegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = reg->name();
++    }
 +    reg = reg->successor();
 +  }
 +
 +  FloatRegister freg = ::as_FloatRegister(0);
 +  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
-+    regName[i++] = freg->name();
-+    regName[i++] = freg->name();
++    for (int j = 0 ; j < FloatRegisterImpl::max_slots_per_register ; j++) {
++      regName[i++] = freg->name();
++    }
 +    freg = freg->successor();
 +  }
 +
@@ -57122,7 +58474,7 @@ index 0000000000..b606d8550c
 +}
 diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp
 new file mode 100644
-index 0000000000..2377eed4f6
+index 0000000000..819eaff0bb
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.hpp
 @@ -0,0 +1,58 @@
@@ -57151,8 +58503,8 @@ index 0000000000..2377eed4f6
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
-+#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_HPP
 +
 +inline bool is_Register() {
 +  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
@@ -57160,7 +58512,7 @@ index 0000000000..2377eed4f6
 +
 +inline Register as_Register() {
 +  assert( is_Register(), "must be");
-+  return ::as_Register(value() >> 1);
++  return ::as_Register(value() / RegisterImpl::max_slots_per_register);
 +}
 +
 +inline bool is_FloatRegister() {
@@ -57168,9 +58520,9 @@ index 0000000000..2377eed4f6
 +}
 +
 +inline FloatRegister as_FloatRegister() {
-+  assert( is_FloatRegister(), "must be" );
-+  assert( is_even(value()), "must be" );
-+  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++  assert( is_FloatRegister() && is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) /
++                            FloatRegisterImpl::max_slots_per_register);
 +}
 +
 +inline   bool is_concrete() {
@@ -57183,13 +58535,13 @@ index 0000000000..2377eed4f6
 +  }
 +}
 +
-+#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_HPP
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_HPP
 diff --git a/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp
 new file mode 100644
-index 0000000000..a752e2c80d
+index 0000000000..edb78e36da
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vmreg_loongarch.inline.hpp
-@@ -0,0 +1,38 @@
+@@ -0,0 +1,39 @@
 +/*
 + * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -57215,25 +58567,26 @@ index 0000000000..a752e2c80d
 + *
 + */
 +
-+#ifndef CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
-+#define CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
++#ifndef CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
++#define CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
 +
 +inline VMReg RegisterImpl::as_VMReg() {
 +  if( this==noreg ) return VMRegImpl::Bad();
-+  return VMRegImpl::as_VMReg(encoding() << 1 );
++  return VMRegImpl::as_VMReg(encoding() * RegisterImpl::max_slots_per_register);
 +}
 +
 +inline VMReg FloatRegisterImpl::as_VMReg() {
-+  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++  return VMRegImpl::as_VMReg((encoding() * FloatRegisterImpl::max_slots_per_register) +
++                             ConcreteRegisterImpl::max_gpr);
 +}
 +
-+#endif // CPU_LOONGARCH_VM_VMREG_LOONGARCH_INLINE_HPP
++#endif // CPU_LOONGARCH_VMREG_LOONGARCH_INLINE_HPP
 diff --git a/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp
 new file mode 100644
-index 0000000000..e75f7c4f94
+index 0000000000..2c4b60653b
 --- /dev/null
 +++ b/src/hotspot/cpu/loongarch/vtableStubs_loongarch_64.cpp
-@@ -0,0 +1,323 @@
+@@ -0,0 +1,322 @@
 +/*
 + * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -57338,7 +58691,6 @@ index 0000000000..e75f7c4f94
 +
 +  // get receiver klass
 +  address npe_addr = __ pc();
-+  //add for compressedoops
 +  __ load_klass(t1, T0);
 +
 +#ifndef PRODUCT
@@ -57557,432 +58909,45807 @@ index 0000000000..e75f7c4f94
 +  const unsigned int icache_line_size = wordSize;
 +  return icache_line_size;
 +}
-diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-index 847f7d61d2..f570946090 100644
---- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-+++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
-@@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
-   }
- }
- 
-+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
-+  ShouldNotReachHere();
-+}
- 
- void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
-   Bytecodes::Code code = op->bytecode();
-@@ -1608,6 +1611,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
-   __ bind(skip);
- }
- 
-+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
-+  ShouldNotReachHere();
-+}
-+
- 
- void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
-                              CodeEmitInfo* info, bool pop_fpu_stack) {
-diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
-index d34ea45c0b..f6b6dbdee3 100644
---- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
-+++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
-@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
-   __ move(temp, addr);
- }
- 
--
--void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
-+template<typename T>
-+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
-   LIR_Opr tmp = FrameMap::R0_opr;
-   __ load(new LIR_Address(base, disp, T_INT), tmp, info);
--  __ cmp(condition, tmp, c);
-+  __ cmp_branch(condition, tmp, c, T_INT, tgt);
- }
- 
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
- 
--void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
--                               int disp, BasicType type, CodeEmitInfo* info) {
-+template<typename T>
-+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
-   LIR_Opr tmp = FrameMap::R0_opr;
-   __ load(new LIR_Address(base, disp, type), tmp, info);
--  __ cmp(condition, reg, tmp);
-+  __ cmp_branch(condition, reg, tmp, type, tgt);
- }
- 
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
- 
- bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
-   assert(left != result, "should be different registers");
-diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
-index ef9b0833d3..c6b25bf10e 100644
---- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
-+++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
-@@ -62,3 +62,24 @@ void LIR_Address::verify() const {
- #endif
- }
- #endif // PRODUCT
-+
-+template<typename T>
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
-+  cmp(condition, left, right, info);
-+  branch(condition, type, tgt);
-+}
-+
-+// Explicit instantiation for all supported types.
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
-+
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
-+  cmp(condition, left, right);
-+  branch(condition, type, block, unordered);
-+}
-+
-+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
-+  cmp(condition, left, right);
-+  cmove(condition, src1, src2, dst, type);
-+}
-diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-index 897be2209e..0c27cc20f3 100644
---- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-+++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
-@@ -379,6 +379,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
-   }
- }
- 
-+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
-+  ShouldNotReachHere();
-+}
- 
- void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
-   LIR_Opr src  = op->in_opr();
-@@ -1503,6 +1506,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
-   }
- }
- 
-+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
-+  ShouldNotReachHere();
-+}
-+
- void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
-                              CodeEmitInfo* info, bool pop_fpu_stack) {
-   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
-diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
-index ae297ac635..c786803e0f 100644
---- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
-+++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
-@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
-   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
- }
- 
--void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
-+template<typename T>
-+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
-   LIR_Opr scratch = FrameMap::Z_R1_opr;
-   __ load(new LIR_Address(base, disp, T_INT), scratch, info);
--  __ cmp(condition, scratch, c);
-+  __ cmp_branch(condition, scratch, c, T_INT, tgt);
- }
- 
--void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
-+
-+template<typename T>
-+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
-   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
-+  __ branch(condition, type, tgt);
- }
- 
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
-+
- bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
-   if (tmp->is_valid()) {
-     if (is_power_of_2(c + 1)) {
-diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp
-index 9507ca0856..2116e9af2b 100644
---- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp
-+++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp
-@@ -56,3 +56,23 @@ void LIR_Address::verify() const {
- }
- #endif // PRODUCT
- 
-+template<typename T>
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
-+  cmp(condition, left, right, info);
-+  branch(condition, type, tgt);
-+}
-+
-+// Explicit instantiation for all supported types.
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
-+
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
-+  cmp(condition, left, right);
-+  branch(condition, type, block, unordered);
-+}
-+
-+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
-+  cmp(condition, left, right);
-+  cmove(condition, src1, src2, dst, type);
-+}
-diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
-index e503159eb7..2e5609fec8 100644
---- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
-+++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
-@@ -599,6 +599,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
-   // The peephole pass fills the delay slot
- }
- 
-+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
-+  ShouldNotReachHere();
-+}
- 
- void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
-   Bytecodes::Code code = op->bytecode();
-@@ -1638,6 +1641,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
-   __ bind(skip);
- }
- 
-+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
-+  ShouldNotReachHere();
-+}
- 
- void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
-   assert(info == NULL, "unused on this code path");
-diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
-index a09a159722..a02ffafc77 100644
---- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
-+++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
-@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
-   __ move(temp, addr);
- }
- 
--void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
-+template<typename T>
-+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
-   LIR_Opr o7opr = FrameMap::O7_opr;
-   __ load(new LIR_Address(base, disp, T_INT), o7opr, info);
--  __ cmp(condition, o7opr, c);
-+  __ cmp_branch(condition, o7opr, c, T_INT, tgt);
- }
- 
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
- 
--void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
-+template<typename T>
-+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
-   LIR_Opr o7opr = FrameMap::O7_opr;
-   __ load(new LIR_Address(base, disp, type), o7opr, info);
--  __ cmp(condition, reg, o7opr);
-+  __ cmp_branch(condition, reg, o7opr, type, tgt);
- }
- 
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
- 
- bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
-   assert(left != result, "should be different registers");
-diff --git a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
-index c21d2c1d9a..9cebb387e2 100644
---- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
-+++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
-@@ -54,3 +54,24 @@ void LIR_Address::verify() const {
-          "wrong type for addresses");
- }
- #endif // PRODUCT
-+
-+template<typename T>
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
-+  cmp(condition, left, right, info);
-+  branch(condition, type, tgt);
-+}
-+
-+// Explicit instantiation for all supported types.
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
-+
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
-+  cmp(condition, left, right);
-+  branch(condition, type, block, unordered);
-+}
-+
-+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
-+  cmp(condition, left, right);
-+  cmove(condition, src1, src2, dst, type);
-+}
-diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-index cee3140f4f..7b76eb0b9e 100644
---- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
-@@ -1442,6 +1442,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
-   }
- }
- 
-+void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
-+  ShouldNotReachHere();
-+}
-+
- void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
-   LIR_Opr src  = op->in_opr();
-   LIR_Opr dest = op->result_opr();
-@@ -2030,6 +2034,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
-   }
- }
- 
-+void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
-+  ShouldNotReachHere();
-+}
- 
- void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
-   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
-diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
-index 905708a9fa..1c6774e1d6 100644
---- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
-+++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
-@@ -255,15 +255,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
-   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
- }
- 
--void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
-+template<typename T>
-+void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
-   __ cmp_mem_int(condition, base, disp, c, info);
-+  __ branch(condition, T_INT, tgt);
- }
- 
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
- 
--void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
-+template<typename T>
-+void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
-   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
-+  __ branch(condition, type, tgt);
- }
- 
-+// Explicit instantiation for all supported types.
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
-+template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
- 
- bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
-   if (tmp->is_valid() && c > 0 && c < max_jint) {
-diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp
-index 92277ee063..20e283e302 100644
---- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp
-+++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp
-@@ -72,3 +72,24 @@ void LIR_Address::verify() const {
- #endif
- }
- #endif // PRODUCT
-+
-+template<typename T>
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
-+  cmp(condition, left, right, info);
-+  branch(condition, type, tgt);
-+}
-+
-+// Explicit instantiation for all supported types.
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
-+template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
-+
-+void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
-+  cmp(condition, left, right);
-+  branch(condition, type, block, unordered);
-+}
-+
-+void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
-+  cmp(condition, left, right);
-+  cmove(condition, src1, src2, dst, type);
-+}
-diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-index d2290a6706..3e88d609b5 100644
---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
-@@ -261,7 +261,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
- #define __ ce->masm()->
- 
- void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
--                                                         LIR_Opr ref) const {
-+                                                         LIR_Opr ref,
-+                                                         LIR_Opr res) const {
-   __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread));
- }
- 
-diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
-index 3687754e71..791e4ed43f 100644
---- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
-+++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
-@@ -77,7 +77,8 @@ public:
- 
- #ifdef COMPILER1
-   void generate_c1_load_barrier_test(LIR_Assembler* ce,
--                                     LIR_Opr ref) const;
-+                                     LIR_Opr ref,
-+                                     LIR_Opr res) const;
- 
-   void generate_c1_load_barrier_stub(LIR_Assembler* ce,
-                                      ZLoadBarrierStubC1* stub) const;
-diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
-index 476b1c2175..006e29d0a2 100644
---- a/src/hotspot/os/linux/os_linux.cpp
-+++ b/src/hotspot/os/linux/os_linux.cpp
-@@ -22,6 +22,12 @@
-  *
-  */
- 
-+/*
-+ * This file has been modified by Loongson Technology in 2021. These
-+ * modifications are Copyright (c) 2021 Loongson Technology, and are made
-+ * available on the same license terms set forth above.
-+ */
-+
- // no precompiled headers
- #include "jvm.h"
- #include "classfile/classLoader.hpp"
-@@ -3837,6 +3843,8 @@ size_t os::Linux::find_large_page_size() {
-     IA64_ONLY(256 * M)
-     PPC_ONLY(4 * M)
-     S390_ONLY(1 * M)
-+    MIPS64_ONLY(4 * M)
-+    LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA
-     SPARC_ONLY(4 * M);
- #endif // ZERO
- 
-diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+diff --git a/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp
 new file mode 100644
-index 0000000000..30719a0340
+index 0000000000..73f021c9b7
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
-@@ -0,0 +1,24 @@
++++ b/src/hotspot/cpu/mips/abstractInterpreter_mips.cpp
+@@ -0,0 +1,132 @@
 +/*
-+ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "ci/ciMethod.hpp"
++#include "interpreter/interpreter.hpp"
++#include "runtime/frame.inline.hpp"
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++
++  const int entry_size    = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved ebp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = 6;  // see generate_call_stub
++  // return overhead_size + method->max_locals() + method->max_stack() + stub_code;
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++          Interpreter::stackElementWords;
++  return overhead_size + method_stack + stub_code;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in AbstractInterpreterGenerator::generate_method_entry.
++  // If interpreter_frame!=NULL, set up the method, locals, and monitors.
++  // The frame interpreter_frame, if not NULL, is guaranteed to be the
++  // right size, as determined by a previous call to this method.
++  // It is also guaranteed to be walkable even though it is in a skeletal state
++
++  // fixed size of an interpreter frame:
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) * Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable(2)");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and interpreter_frame_sender_sp
++  // interpreter_frame_sender_sp is the original sp of the caller (the unextended_sp)
++  // and sender_sp is fp+8
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(montop - moncount);
++
++  //set last sp;
++  intptr_t*  esp = (intptr_t*) monbot - tempcount*Interpreter::stackElementWords -
++                      popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++  // All frames but the initial interpreter frame we fill in have a
++  // value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  //
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() == interpreter_frame->interpreter_frame_sender_sp() ) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() + extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() = method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() = method->method_holder()->java_mirror();
++}
++
+diff --git a/src/hotspot/cpu/mips/assembler_mips.cpp b/src/hotspot/cpu/mips/assembler_mips.cpp
+new file mode 100644
+index 0000000000..5f02077d0e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.cpp
+@@ -0,0 +1,733 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Implementation of AddressLiteral
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  _rspec = rspec_from_rtype(rtype, target);
++}
++
++// Implementation of Address
++
++Address Address::make_array(ArrayAddress adr) {
++  AddressLiteral base = adr.base();
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(index._base, index._index, index._scale, (intptr_t) base.target());
++  array._rspec = base._rspec;
++  return array;
++}
++
++// exceedingly dangerous constructor
++Address::Address(address loc, RelocationHolder spec) {
++  _base  = noreg;
++  _index = noreg;
++  _scale = no_scale;
++  _disp  = (intptr_t) loc;
++  _rspec = spec;
++}
++
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of Assembler
++const char *Assembler::ops_name[] = {
++  "special",  "regimm",   "j",      "jal",    "beq",      "bne",      "blez",   "bgtz",
++  "addi",     "addiu",    "slti",   "sltiu",  "andi",     "ori",      "xori",   "lui",
++  "cop0",     "cop1",     "cop2",   "cop3",   "beql",     "bnel",     "bleql",  "bgtzl",
++  "daddi",    "daddiu",   "ldl",    "ldr",    "",         "",         "",       "",
++  "lb",       "lh",       "lwl",    "lw",     "lbu",      "lhu",      "lwr",    "lwu",
++  "sb",       "sh",       "swl",    "sw",     "sdl",      "sdr",      "swr",    "cache",
++  "ll",       "lwc1",     "",       "",       "lld",      "ldc1",     "",       "ld",
++  "sc",       "swc1",     "",       "",       "scd",      "sdc1",     "",       "sd"
++};
++
++const char* Assembler::special_name[] = {
++  "sll",      "",         "srl",      "sra",      "sllv",     "",         "srlv",     "srav",
++  "jr",       "jalr",     "movz",     "movn",     "syscall",  "break",    "",         "sync",
++  "mfhi",     "mthi",     "mflo",     "mtlo",     "dsll",     "",         "dsrl",     "dsra",
++  "mult",     "multu",    "div",      "divu",     "dmult",    "dmultu",   "ddiv",     "ddivu",
++  "add",      "addu",     "sub",      "subu",     "and",      "or",       "xor",      "nor",
++  "",         "",         "slt",      "sltu",     "dadd",     "daddu",    "dsub",     "dsubu",
++  "tge",      "tgeu",     "tlt",      "tltu",     "teq",      "",         "tne",      "",
++  "dsll",     "",         "dsrl",     "dsra",     "dsll32",   "",         "dsrl32",   "dsra32"
++};
++
++const char* Assembler::cop1_name[] = {
++  "add",      "sub",      "mul",      "div",      "sqrt",     "abs",      "mov",      "neg",
++  "round.l",  "trunc.l",  "ceil.l",   "floor.l",  "round.w",  "trunc.w",  "ceil.w",   "floor.w",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "c.f",      "c.un",     "c.eq",     "c.ueq",    "c.olt",    "c.ult",    "c.ole",    "c.ule",
++  "c.sf",     "c.ngle",   "c.seq",    "c.ngl",    "c.lt",     "c.nge",    "c.le",     "c.ngt"
++};
++
++const char* Assembler::cop1x_name[] = {
++  "lwxc1", "ldxc1",       "",         "",         "",    "luxc1",         "",         "",
++  "swxc1", "sdxc1",       "",         "",         "",    "suxc1",         "",    "prefx",
++  "",         "",         "",         "",         "",         "",  "alnv.ps",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "madd.s",   "madd.d",   "",         "",         "",         "",  "madd.ps",         "",
++  "msub.s",   "msub.d",   "",         "",         "",         "",  "msub.ps",         "",
++  "nmadd.s", "nmadd.d",   "",         "",         "",         "", "nmadd.ps",         "",
++  "nmsub.s", "nmsub.d",   "",         "",         "",         "", "nmsub.ps",         ""
++};
++
++const char* Assembler::special2_name[] = {
++  "madd",     "",         "mul",      "",         "msub",     "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "gsdmult",  "",         "",         "gsdiv",    "gsddiv",   "",         "",
++  "",         "",         "",         "",         "gsmod",    "gsdmod",   "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         ""
++};
++
++const char* Assembler::special3_name[] = {
++  "ext",      "",         "",         "",      "ins",    "dinsm",    "dinsu",     "dins",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "bshfl",    "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++  "",         "",         "",         "",         "",         "",         "",         "",
++};
++
++const char* Assembler::regimm_name[] = {
++  "bltz",     "bgez",     "bltzl",    "bgezl",    "",         "",         "",         "",
++  "tgei",     "tgeiu",    "tlti",     "tltiu",    "teqi",     "",         "tnei",     "",
++  "bltzal",   "bgezal",   "bltzall",  "bgezall"
++};
++
++const char* Assembler::gs_ldc2_name[] = {
++  "gslbx",    "gslhx",    "gslwx",    "gsldx",    "",         "",         "gslwxc1",  "gsldxc1"
++};
++
++
++const char* Assembler::gs_lwc2_name[] = {
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "",       "",       "",       "",         "",         "",         "",         "",
++        "gslble", "gslbgt", "gslhle", "gslhgt",   "gslwle",   "gslwgt",   "gsldle",   "gsldgt",
++        "",       "",       "",       "gslwlec1", "gslwgtc1", "gsldlec1", "gsldgtc1", "",/*LWDIR, LWPTE, LDDIR and LDPTE have the same low 6 bits.*/
++        "gslq",   ""
++};
++
++const char* Assembler::gs_sdc2_name[] = {
++  "gssbx",    "gsshx",    "gsswx",    "gssdx",    "",         "",         "gsswxc1",  "gssdxc1"
++};
++
++const char* Assembler::gs_swc2_name[] = {
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "",        "",        "",        "",        "",          "",          "",         "",
++        "gssble",  "gssbgt",  "gsshle",  "gsshgt",  "gsswle",    "gsswgt",    "gssdle",   "gssdgt",
++        "",        "",        "",        "",        "gsswlec1",  "gsswgtc1",  "gssdlec1", "gssdgtc1",
++        "gssq",    ""
++};
++
++//misleading name, print only branch/jump instruction
++void Assembler::print_instruction(int inst) {
++  const char *s;
++  switch( opcode(inst) ) {
++  default:
++    s = ops_name[opcode(inst)];
++    break;
++  case special_op:
++    s = special_name[special(inst)];
++    break;
++  case regimm_op:
++    s = special_name[rt(inst)];
++    break;
++  }
++
++  ::tty->print("%s", s);
++}
++
++int Assembler::is_int_mask(int x) {
++  int xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++int Assembler::is_jlong_mask(jlong x) {
++  jlong  xx = x;
++  int count = 0;
++
++  while (x != 0) {
++    x &= (x - 1);
++    count++;
++  }
++
++  if ((1<<count) == (xx+1)) {
++    return count;
++  } else {
++    return -1;
++  }
++}
++
++//without check, maybe fixed
++int Assembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int v = (dest_pos - inst_pos - 4)>>2;
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++  case lui_op:
++  case ori_op:
++  case daddiu_op:
++    ShouldNotReachHere();
++    break;
++  default:
++    assert(is_simm16(v), "must be simm16");
++#ifndef PRODUCT
++    if (!is_simm16(v)) {
++      tty->print_cr("must be simm16");
++      tty->print_cr("Inst: %x", inst);
++    }
++#endif
++
++    v = low16(v);
++    inst &= 0xffff0000;
++    break;
++  }
++
++  return inst | v;
++}
++
++int Assembler::branch_destination(int inst, int pos) {
++  int off = 0;
++
++  switch(opcode(inst)) {
++  case j_op:
++  case jal_op:
++    assert(false, "should not use j/jal here");
++    break;
++  default:
++    off = expand(low16(inst), 15);
++    break;
++  }
++
++  return off ? pos + 4 + (off<<2) : 0;
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;                  // illegal instruction 0x00000000
++}
++
++// Now the Assembler instruction (identical for 32/64 bits)
++
++void Assembler::lb(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lb(rt, src.base(), src.disp());
++}
++
++void Assembler::lbu(Register rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lbu(rt, src.base(), src.disp());
++}
++
++void Assembler::ld(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsldx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gsldx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        ld(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsldx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          ld(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gsldx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          ld(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      ld(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsldx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        ld(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::ldl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldl(rt, src.base(), src.disp());
++}
++
++void Assembler::ldr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldr(rt, src.base(), src.disp());
++}
++
++void Assembler::lh(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lh(rt, src.base(), src.disp());
++}
++
++void Assembler::lhu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lhu(rt, src.base(), src.disp());
++}
++
++void Assembler::ll(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  ll(rt, src.base(), src.disp());
++}
++
++void Assembler::lld(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lld(rt, src.base(), src.disp());
++}
++
++void Assembler::lw(Register rt, Address dst){
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (Assembler::is_simm16(disp)) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gslwx(src, base, index, disp);
++        } else {
++          dsll(AT, index, scale);
++          gslwx(src, base, AT, disp);
++        }
++      } else {
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        lw(src, AT, disp);
++      }
++    } else {
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gslwx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          lw(src, AT, 0);
++        }
++      } else {
++        assert_different_registers(src, AT);
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(src, split_low(disp >> 16));
++        if (split_low(disp)) ori(src, src, split_low(disp));
++        if (UseLEXT1) {
++          gslwx(src, AT, src, 0);
++        } else {
++          daddu(AT, AT, src);
++          lw(src, AT, 0);
++        }
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      lw(src, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gslwx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        lw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::lea(Register rt, Address src) {
++  Register dst   = rt;
++  Register base  = src.base();
++  Register index = src.index();
++
++  int scale = src.scale();
++  int disp  = src.disp();
++
++  if (index == noreg) {
++    if (is_simm16(disp)) {
++      daddiu(dst, base, disp);
++    } else {
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++      daddu(dst, base, AT);
++    }
++  } else {
++    if (scale == 0) {
++      if (is_simm16(disp)) {
++        daddu(AT, base, index);
++        daddiu(dst, AT, disp);
++      } else {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, base, AT);
++        daddu(dst, AT, index);
++      }
++    } else {
++      if (is_simm16(disp)) {
++        dsll(AT, index, scale);
++        daddu(AT, AT, base);
++        daddiu(dst, AT, disp);
++      } else {
++        assert_different_registers(dst, AT);
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        dsll(dst, index, scale);
++        daddu(dst, dst, AT);
++      }
++    }
++  }
++}
++
++void Assembler::lwl(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwl(rt, src.base(), src.disp());
++}
++
++void Assembler::lwr(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwr(rt, src.base(), src.disp());
++}
++
++void Assembler::lwu(Register rt, Address src){
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwu(rt, src.base(), src.disp());
++}
++
++void Assembler::sb(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sb(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sc(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sc(rt, dst.base(), dst.disp());
++}
++
++void Assembler::scd(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  scd(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sd(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if (is_simm16(disp)) {
++      if ( UseLEXT1 && is_simm(disp, 8)) {
++        if (scale == 0) {
++          gssdx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gssdx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sd(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gssdx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sd(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sd(src, AT, 0);
++      }
++    }
++  } else {
++    if (is_simm16(disp)) {
++      sd(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gssdx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sd(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::sdl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sh(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sh(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sw(Register rt, Address dst) {
++  Register src   = rt;
++  Register base  = dst.base();
++  Register index = dst.index();
++
++  int scale = dst.scale();
++  int disp  = dst.disp();
++
++  if (index != noreg) {
++    if ( Assembler::is_simm16(disp) ) {
++      if ( UseLEXT1 && Assembler::is_simm(disp, 8) ) {
++        if (scale == 0) {
++          gsswx(src, base, index, disp);
++        } else {
++          assert_different_registers(rt, AT);
++          dsll(AT, index, scale);
++          gsswx(src, base, AT, disp);
++        }
++      } else {
++        assert_different_registers(rt, AT);
++        if (scale == 0) {
++          daddu(AT, base, index);
++        } else {
++          dsll(AT, index, scale);
++          daddu(AT, base, AT);
++        }
++        sw(src, AT, disp);
++      }
++    } else {
++      assert_different_registers(rt, AT);
++      if (scale == 0) {
++        lui(AT, split_low(disp >> 16));
++        if (split_low(disp)) ori(AT, AT, split_low(disp));
++        daddu(AT, AT, base);
++        if (UseLEXT1) {
++          gsswx(src, AT, index, 0);
++        } else {
++          daddu(AT, AT, index);
++          sw(src, AT, 0);
++        }
++      } else {
++        daddiu(SP, SP, -wordSize);
++        sd(T9, SP, 0);
++
++        dsll(AT, index, scale);
++        daddu(AT, base, AT);
++        lui(T9, split_low(disp >> 16));
++        if (split_low(disp)) ori(T9, T9, split_low(disp));
++        daddu(AT, AT, T9);
++        ld(T9, SP, 0);
++        daddiu(SP, SP, wordSize);
++        sw(src, AT, 0);
++      }
++    }
++  } else {
++    if (Assembler::is_simm16(disp)) {
++      sw(src, base, disp);
++    } else {
++      assert_different_registers(rt, AT);
++      lui(AT, split_low(disp >> 16));
++      if (split_low(disp)) ori(AT, AT, split_low(disp));
++
++      if (UseLEXT1) {
++        gsswx(src, base, AT, 0);
++      } else {
++        daddu(AT, base, AT);
++        sw(src, AT, 0);
++      }
++    }
++  }
++}
++
++void Assembler::swl(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swl(rt, dst.base(), dst.disp());
++}
++
++void Assembler::swr(Register rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swr(rt, dst.base(), dst.disp());
++}
++
++void Assembler::lwc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  lwc1(rt, src.base(), src.disp());
++}
++
++void Assembler::ldc1(FloatRegister rt, Address src) {
++  assert(src.index() == NOREG, "index is unimplemented");
++  ldc1(rt, src.base(), src.disp());
++}
++
++void Assembler::swc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  swc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::sdc1(FloatRegister rt, Address dst) {
++  assert(dst.index() == NOREG, "index is unimplemented");
++  sdc1(rt, dst.base(), dst.disp());
++}
++
++void Assembler::j(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((j_op<<26) | dest);
++  has_delay_slot();
++}
++
++void Assembler::jal(address entry) {
++  int dest = ((intptr_t)entry & (intptr_t)0xfffffff)>>2;
++  emit_long((jal_op<<26) | dest);
++  has_delay_slot();
++}
+diff --git a/src/hotspot/cpu/mips/assembler_mips.hpp b/src/hotspot/cpu/mips/assembler_mips.hpp
+new file mode 100644
+index 0000000000..7ef33cf592
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.hpp
+@@ -0,0 +1,1792 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++class BiasedLockingCounters;
++
++
++// Note: A register location is represented via a Register, not
++//       via an address for efficiency & simplicity reasons.
++
++class ArrayAddress;
++
++class Address {
++ public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++
++ private:
++  Register         _base;
++  Register         _index;
++  ScaleFactor      _scale;
++  int              _disp;
++  RelocationHolder _rspec;
++
++  // Easily misused constructors make them private
++  Address(address loc, RelocationHolder spec);
++  Address(int disp, address loc, relocInfo::relocType rtype);
++  Address(int disp, address loc, RelocationHolder spec);
++
++ public:
++
++  // creation
++  Address()
++    : _base(noreg),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(0) {
++  }
++
++  // No default displacement otherwise Register can be implicitly
++  // converted to 0(Register) which is quite a different animal.
++
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++
++  // The following two overloads are used in connection with the
++  // ByteSize type (see sizes.hpp).  They simplify the use of
++  // ByteSize'd arguments in assembly code. Note that their equivalent
++  // for the optimized build are the member functions with int disp
++  // argument since ByteSize is mapped to an int type in that case.
++  //
++  // Note: DO NOT introduce similar overloaded functions for WordSize
++  // arguments as in the optimized mode, both ByteSize and WordSize
++  // are mapped to the same type and thus the compiler cannot make a
++  // distinction anymore (=> compiler errors).
++
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)) {
++    assert_different_registers(_base, AT);
++  }
++
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)) {
++    assert(!index->is_valid() == (scale == Address::no_scale), "inconsistent address");
++    assert_different_registers(_base, _index, AT);
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale; }
++  int         disp()             const { return _disp;  }
++
++  static Address make_array(ArrayAddress);
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp
++};
++
++// Calling convention
++class Argument {
++ private:
++  int _number;
++ public:
++  enum {
++    n_register_parameters = 8,   // 8 integer registers used to pass parameters
++    n_float_register_parameters = 8   // 8 float registers used to pass parameters
++  };
++
++  Argument(int number):_number(number){ }
++  Argument successor() {return Argument(number() + 1);}
++
++  int number()const {return _number;}
++  bool is_Register()const {return _number < n_register_parameters;}
++  bool is_FloatRegister()const {return _number < n_float_register_parameters;}
++
++  Register as_Register()const {
++    assert(is_Register(), "must be a register argument");
++    return ::as_Register(A0->encoding() + _number);
++  }
++  FloatRegister  as_FloatRegister()const {
++    assert(is_FloatRegister(), "must be a float register argument");
++    return ::as_FloatRegister(F12->encoding() + _number);
++  }
++
++  Address as_caller_address()const {return Address(SP, (number() - n_register_parameters) * wordSize);}
++};
++
++//
++// AddressLiteral has been split out from Address because operands of this type
++// need to be treated specially on 32bit vs. 64bit platforms. By splitting it out
++// the few instructions that need to deal with address literals are unique and the
++// MacroAssembler does not have to implement every instruction in the Assembler
++// in order to search for address literals that may need special handling depending
++// on the instruction and the platform. As small step on the way to merging i486/amd64
++// directories.
++//
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class OopAddress: public AddressLiteral {
++
++ public:
++
++  OopAddress(address target) : AddressLiteral(target, relocInfo::oop_type){}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, relocInfo::external_word_type){}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++const int FPUStateSizeInWords = 512 / wordSize;
++
++// The MIPS LOONGSON Assembler: Pure assembler doing NO optimizations on the instruction
++// level ; i.e., what you write is what you get. The Assembler is generating code into
++// a CodeBuffer.
++
++class Assembler : public AbstractAssembler  {
++  friend class AbstractAssembler; // for the non-virtual hack
++  friend class LIR_Assembler; // as_Address()
++  friend class StubGenerator;
++
++ public:
++  enum Condition {
++    zero         ,
++    notZero      ,
++    equal        ,
++    notEqual     ,
++    less         ,
++    lessEqual    ,
++    greater      ,
++    greaterEqual ,
++    below        ,
++    belowEqual   ,
++    above        ,
++    aboveEqual
++  };
++
++  static const int LogInstructionSize = 2;
++  static const int InstructionSize    = 1 << LogInstructionSize;
++
++  // opcode, highest 6 bits: bits[31...26]
++  enum ops {
++    special_op  = 0x00, // special_ops
++    regimm_op   = 0x01, // regimm_ops
++    j_op        = 0x02,
++    jal_op      = 0x03,
++    beq_op      = 0x04,
++    bne_op      = 0x05,
++    blez_op     = 0x06,
++    bgtz_op     = 0x07,
++    addiu_op    = 0x09,
++    slti_op     = 0x0a,
++    sltiu_op    = 0x0b,
++    andi_op     = 0x0c,
++    ori_op      = 0x0d,
++    xori_op     = 0x0e,
++    lui_op      = 0x0f,
++    cop0_op     = 0x10, // cop0_ops
++    cop1_op     = 0x11, // cop1_ops
++    gs_cop2_op  = 0x12, // gs_cop2_ops
++    cop1x_op    = 0x13, // cop1x_ops
++    beql_op     = 0x14,
++    bnel_op     = 0x15,
++    blezl_op    = 0x16,
++    bgtzl_op    = 0x17,
++    daddiu_op   = 0x19,
++    ldl_op      = 0x1a,
++    ldr_op      = 0x1b,
++    special2_op = 0x1c, // special2_ops
++    msa_op      = 0x1e, // msa_ops
++    special3_op = 0x1f, // special3_ops
++    lb_op       = 0x20,
++    lh_op       = 0x21,
++    lwl_op      = 0x22,
++    lw_op       = 0x23,
++    lbu_op      = 0x24,
++    lhu_op      = 0x25,
++    lwr_op      = 0x26,
++    lwu_op      = 0x27,
++    sb_op       = 0x28,
++    sh_op       = 0x29,
++    swl_op      = 0x2a,
++    sw_op       = 0x2b,
++    sdl_op      = 0x2c,
++    sdr_op      = 0x2d,
++    swr_op      = 0x2e,
++    cache_op    = 0x2f,
++    ll_op       = 0x30,
++    lwc1_op     = 0x31,
++    gs_lwc2_op  = 0x32, //gs_lwc2_ops
++    pref_op     = 0x33,
++    lld_op      = 0x34,
++    ldc1_op     = 0x35,
++    gs_ldc2_op  = 0x36, //gs_ldc2_ops
++    ld_op       = 0x37,
++    sc_op       = 0x38,
++    swc1_op     = 0x39,
++    gs_swc2_op  = 0x3a, //gs_swc2_ops
++    scd_op      = 0x3c,
++    sdc1_op     = 0x3d,
++    gs_sdc2_op  = 0x3e, //gs_sdc2_ops
++    sd_op       = 0x3f
++  };
++
++  static  const char *ops_name[];
++
++  //special family, the opcode is in low 6 bits.
++  enum special_ops {
++    sll_op       = 0x00,
++    movci_op     = 0x01,
++    srl_op       = 0x02,
++    sra_op       = 0x03,
++    sllv_op      = 0x04,
++    srlv_op      = 0x06,
++    srav_op      = 0x07,
++    jr_op        = 0x08,
++    jalr_op      = 0x09,
++    movz_op      = 0x0a,
++    movn_op      = 0x0b,
++    syscall_op   = 0x0c,
++    break_op     = 0x0d,
++    sync_op      = 0x0f,
++    mfhi_op      = 0x10,
++    mthi_op      = 0x11,
++    mflo_op      = 0x12,
++    mtlo_op      = 0x13,
++    dsllv_op     = 0x14,
++    dsrlv_op     = 0x16,
++    dsrav_op     = 0x17,
++    mult_op      = 0x18,
++    multu_op     = 0x19,
++    div_op       = 0x1a,
++    divu_op      = 0x1b,
++    dmult_op     = 0x1c,
++    dmultu_op    = 0x1d,
++    ddiv_op      = 0x1e,
++    ddivu_op     = 0x1f,
++    addu_op      = 0x21,
++    subu_op      = 0x23,
++    and_op       = 0x24,
++    or_op        = 0x25,
++    xor_op       = 0x26,
++    nor_op       = 0x27,
++    slt_op       = 0x2a,
++    sltu_op      = 0x2b,
++    daddu_op     = 0x2d,
++    dsubu_op     = 0x2f,
++    tge_op       = 0x30,
++    tgeu_op      = 0x31,
++    tlt_op       = 0x32,
++    tltu_op      = 0x33,
++    teq_op       = 0x34,
++    tne_op       = 0x36,
++    dsll_op      = 0x38,
++    dsrl_op      = 0x3a,
++    dsra_op      = 0x3b,
++    dsll32_op    = 0x3c,
++    dsrl32_op    = 0x3e,
++    dsra32_op    = 0x3f
++  };
++
++  static  const char* special_name[];
++
++  //regimm family, the opcode is in rt[16...20], 5 bits
++  enum regimm_ops {
++    bltz_op      = 0x00,
++    bgez_op      = 0x01,
++    bltzl_op     = 0x02,
++    bgezl_op     = 0x03,
++    tgei_op      = 0x08,
++    tgeiu_op     = 0x09,
++    tlti_op      = 0x0a,
++    tltiu_op     = 0x0b,
++    teqi_op      = 0x0c,
++    tnei_op      = 0x0e,
++    bltzal_op    = 0x10,
++    bgezal_op    = 0x11,
++    bltzall_op   = 0x12,
++    bgezall_op   = 0x13,
++    bposge32_op  = 0x1c,
++    bposge64_op  = 0x1d,
++    synci_op     = 0x1f,
++  };
++
++  static  const char* regimm_name[];
++
++  //cop0 family, the ops is in bits[25...21], 5 bits
++  enum cop0_ops {
++    mfc0_op     = 0x00,
++    dmfc0_op    = 0x01,
++    //
++    mxgc0_op    = 0x03, //MFGC0, DMFGC0, MTGC0
++    mtc0_op     = 0x04,
++    dmtc0_op    = 0x05,
++    rdpgpr_op   = 0x0a,
++    inter_op    = 0x0b,
++    wrpgpr_op   = 0x0c
++  };
++
++  //cop1 family, the ops is in bits[25...21], 5 bits
++  enum cop1_ops {
++    mfc1_op     = 0x00,
++    dmfc1_op    = 0x01,
++    cfc1_op     = 0x02,
++    mfhc1_op    = 0x03,
++    mtc1_op     = 0x04,
++    dmtc1_op    = 0x05,
++    ctc1_op     = 0x06,
++    mthc1_op    = 0x07,
++    bc1f_op     = 0x08,
++    single_fmt  = 0x10,
++    double_fmt  = 0x11,
++    word_fmt    = 0x14,
++    long_fmt    = 0x15,
++    ps_fmt      = 0x16
++  };
++
++
++  //2 bist (bits[17...16]) of bc1x instructions (cop1)
++  enum bc_ops {
++    bcf_op       = 0x0,
++    bct_op       = 0x1,
++    bcfl_op      = 0x2,
++    bctl_op      = 0x3,
++  };
++
++  // low 6 bits of c_x_fmt instructions (cop1)
++  enum c_conds {
++    f_cond       = 0x30,
++    un_cond      = 0x31,
++    eq_cond      = 0x32,
++    ueq_cond     = 0x33,
++    olt_cond     = 0x34,
++    ult_cond     = 0x35,
++    ole_cond     = 0x36,
++    ule_cond     = 0x37,
++    sf_cond      = 0x38,
++    ngle_cond    = 0x39,
++    seq_cond     = 0x3a,
++    ngl_cond     = 0x3b,
++    lt_cond      = 0x3c,
++    nge_cond     = 0x3d,
++    le_cond      = 0x3e,
++    ngt_cond     = 0x3f
++  };
++
++  // low 6 bits of cop1 instructions
++  enum float_ops {
++    fadd_op      = 0x00,
++    fsub_op      = 0x01,
++    fmul_op      = 0x02,
++    fdiv_op      = 0x03,
++    fsqrt_op     = 0x04,
++    fabs_op      = 0x05,
++    fmov_op      = 0x06,
++    fneg_op      = 0x07,
++    froundl_op   = 0x08,
++    ftruncl_op   = 0x09,
++    fceill_op    = 0x0a,
++    ffloorl_op   = 0x0b,
++    froundw_op   = 0x0c,
++    ftruncw_op   = 0x0d,
++    fceilw_op    = 0x0e,
++    ffloorw_op   = 0x0f,
++    movf_f_op    = 0x11,
++    movt_f_op    = 0x11,
++    movz_f_op    = 0x12,
++    movn_f_op    = 0x13,
++    frecip_op    = 0x15,
++    frsqrt_op    = 0x16,
++    fcvts_op     = 0x20,
++    fcvtd_op     = 0x21,
++    fcvtw_op     = 0x24,
++    fcvtl_op     = 0x25,
++    fcvtps_op    = 0x26,
++    fcvtspl_op   = 0x28,
++    fpll_op      = 0x2c,
++    fplu_op      = 0x2d,
++    fpul_op      = 0x2e,
++    fpuu_op      = 0x2f
++  };
++
++  static const char* cop1_name[];
++
++  //cop1x family, the opcode is in low 6 bits.
++  enum cop1x_ops {
++    lwxc1_op    = 0x00,
++    ldxc1_op    = 0x01,
++    luxc1_op    = 0x05,
++    swxc1_op    = 0x08,
++    sdxc1_op    = 0x09,
++    suxc1_op    = 0x0d,
++    prefx_op    = 0x0f,
++
++    alnv_ps_op  = 0x1e,
++    madd_s_op   = 0x20,
++    madd_d_op   = 0x21,
++    madd_ps_op  = 0x26,
++    msub_s_op   = 0x28,
++    msub_d_op   = 0x29,
++    msub_ps_op  = 0x2e,
++    nmadd_s_op  = 0x30,
++    nmadd_d_op  = 0x31,
++    nmadd_ps_op = 0x36,
++    nmsub_s_op  = 0x38,
++    nmsub_d_op  = 0x39,
++    nmsub_ps_op = 0x3e
++  };
++
++  static const char* cop1x_name[];
++
++  //special2 family, the opcode is in low 6 bits.
++  enum special2_ops {
++    madd_op       = 0x00,
++    maddu_op      = 0x01,
++    mul_op        = 0x02,
++    gs0x03_op     = 0x03,
++    msub_op       = 0x04,
++    msubu_op      = 0x05,
++    gs0x06_op     = 0x06,
++    gsemul2_op    = 0x07,
++    gsemul3_op    = 0x08,
++    gsemul4_op    = 0x09,
++    gsemul5_op    = 0x0a,
++    gsemul6_op    = 0x0b,
++    gsemul7_op    = 0x0c,
++    gsemul8_op    = 0x0d,
++    gsemul9_op    = 0x0e,
++    gsemul10_op   = 0x0f,
++    gsmult_op     = 0x10,
++    gsdmult_op    = 0x11,
++    gsmultu_op    = 0x12,
++    gsdmultu_op   = 0x13,
++    gsdiv_op      = 0x14,
++    gsddiv_op     = 0x15,
++    gsdivu_op     = 0x16,
++    gsddivu_op    = 0x17,
++    gsmod_op      = 0x1c,
++    gsdmod_op     = 0x1d,
++    gsmodu_op     = 0x1e,
++    gsdmodu_op    = 0x1f,
++    clz_op        = 0x20,
++    clo_op        = 0x21,
++    xctx_op       = 0x22, //ctz, cto, dctz, dcto, gsX
++    gsrxr_x_op    = 0x23, //gsX
++    dclz_op       = 0x24,
++    dclo_op       = 0x25,
++    gsle_op       = 0x26,
++    gsgt_op       = 0x27,
++    gs86j_op      = 0x28,
++    gsloop_op     = 0x29,
++    gsaj_op       = 0x2a,
++    gsldpc_op     = 0x2b,
++    gs86set_op    = 0x30,
++    gstm_op       = 0x31,
++    gscvt_ld_op   = 0x32,
++    gscvt_ud_op   = 0x33,
++    gseflag_op    = 0x34,
++    gscam_op      = 0x35,
++    gstop_op      = 0x36,
++    gssettag_op   = 0x37,
++    gssdbbp_op    = 0x38
++  };
++
++  static  const char* special2_name[];
++
++  // special3 family, the opcode is in low 6 bits.
++  enum special3_ops {
++    ext_op         = 0x00,
++    dextm_op       = 0x01,
++    dextu_op       = 0x02,
++    dext_op        = 0x03,
++    ins_op         = 0x04,
++    dinsm_op       = 0x05,
++    dinsu_op       = 0x06,
++    dins_op        = 0x07,
++    lxx_op         = 0x0a, //lwx, lhx, lbux, ldx
++    insv_op        = 0x0c,
++    dinsv_op       = 0x0d,
++    ar1_op         = 0x10, //MIPS DSP
++    cmp1_op        = 0x11, //MIPS DSP
++    re1_op         = 0x12, //MIPS DSP, re1_ops
++    sh1_op         = 0x13, //MIPS DSP
++    ar2_op         = 0x14, //MIPS DSP
++    cmp2_op        = 0x15, //MIPS DSP
++    re2_op         = 0x16, //MIPS DSP, re2_ops
++    sh2_op         = 0x17, //MIPS DSP
++    ar3_op         = 0x18, //MIPS DSP
++    bshfl_op       = 0x20  //seb, seh
++  };
++
++  // re1_ops
++  enum re1_ops {
++    absq_s_qb_op = 0x01,
++    repl_qb_op   = 0x02,
++    replv_qb_op  = 0x03,
++    absq_s_ph_op = 0x09,
++    repl_ph_op   = 0x0a,
++    replv_ph_op  = 0x0b,
++    absq_s_w_op  = 0x11,
++    bitrev_op    = 0x1b
++  };
++
++  // re2_ops
++  enum re2_ops {
++    repl_ob_op   = 0x02,
++    replv_ob_op  = 0x03,
++    absq_s_qh_op = 0x09,
++    repl_qh_op   = 0x0a,
++    replv_qh_op  = 0x0b,
++    absq_s_pw_op = 0x11,
++    repl_pw_op   = 0x12,
++    replv_pw_op  = 0x13
++  };
++
++  static  const char* special3_name[];
++
++  // lwc2/gs_lwc2 family, the opcode is in low 6 bits.
++  enum gs_lwc2_ops {
++    gslble_op       = 0x10,
++    gslbgt_op       = 0x11,
++    gslhle_op       = 0x12,
++    gslhgt_op       = 0x13,
++    gslwle_op       = 0x14,
++    gslwgt_op       = 0x15,
++    gsldle_op       = 0x16,
++    gsldgt_op       = 0x17,
++    gslwlec1_op     = 0x1c,
++    gslwgtc1_op     = 0x1d,
++    gsldlec1_op     = 0x1e,
++    gsldgtc1_op     = 0x1f,
++    gslq_op         = 0x20
++  };
++
++  static const char* gs_lwc2_name[];
++
++  // ldc2/gs_ldc2 family, the opcode is in low 3 bits.
++  enum gs_ldc2_ops {
++    gslbx_op        =  0x0,
++    gslhx_op        =  0x1,
++    gslwx_op        =  0x2,
++    gsldx_op        =  0x3,
++    gslwxc1_op      =  0x6,
++    gsldxc1_op      =  0x7
++  };
++
++  static const char* gs_ldc2_name[];
++
++  // swc2/gs_swc2 family, the opcode is in low 6 bits.
++  enum gs_swc2_ops {
++    gssble_op       = 0x10,
++    gssbgt_op       = 0x11,
++    gsshle_op       = 0x12,
++    gsshgt_op       = 0x13,
++    gsswle_op       = 0x14,
++    gsswgt_op       = 0x15,
++    gssdle_op       = 0x16,
++    gssdgt_op       = 0x17,
++    gsswlec1_op     = 0x1c,
++    gsswgtc1_op     = 0x1d,
++    gssdlec1_op     = 0x1e,
++    gssdgtc1_op     = 0x1f,
++    gssq_op         = 0x20
++  };
++
++  static const char* gs_swc2_name[];
++
++  // sdc2/gs_sdc2 family, the opcode is in low 3 bits.
++  enum gs_sdc2_ops {
++    gssbx_op        =  0x0,
++    gsshx_op        =  0x1,
++    gsswx_op        =  0x2,
++    gssdx_op        =  0x3,
++    gsswxc1_op      =  0x6,
++    gssdxc1_op      =  0x7
++  };
++
++  static const char* gs_sdc2_name[];
++
++  enum WhichOperand {
++    // input to locate_operand, and format code for relocations
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++
++  static int opcode(int insn) { return (insn>>26)&0x3f; }
++  static int rs(int insn) { return (insn>>21)&0x1f; }
++  static int rt(int insn) { return (insn>>16)&0x1f; }
++  static int rd(int insn) { return (insn>>11)&0x1f; }
++  static int sa(int insn) { return (insn>>6)&0x1f; }
++  static int special(int insn) { return insn&0x3f; }
++  static int imm_off(int insn) { return (short)low16(insn); }
++
++  static int low  (int x, int l) { return bitfield(x, 0, l); }
++  static int low16(int x)        { return low(x, 16); }
++  static int low26(int x)        { return low(x, 26); }
++
++ protected:
++  //help methods for instruction ejection
++
++  // I-Type (Immediate)
++  // 31        26 25        21 20      16 15                              0
++  //|   opcode   |      rs    |    rt    |            immediat             |
++  //|            |            |          |                                 |
++  //      6              5          5                     16
++  static int insn_ORRI(int op, int rs, int rt, int imm) { assert(is_simm16(imm), "not a signed 16-bit int"); return (op<<26) | (rs<<21) | (rt<<16) | low16(imm); }
++
++  // R-Type (Register)
++  // 31         26 25        21 20      16 15      11 10         6 5         0
++  //|   special   |      rs    |    rt    |    rd    |     0      |   opcode  |
++  //| 0 0 0 0 0 0 |            |          |          | 0 0 0 0 0  |           |
++  //      6              5          5           5          5            6
++  static int insn_RRRO(int rs, int rt, int rd,   int op) { return (rs<<21) | (rt<<16) | (rd<<11)  | op; }
++  static int insn_RRSO(int rt, int rd, int sa,   int op) { return (rt<<16) | (rd<<11) | (sa<<6)   | op; }
++  static int insn_RRCO(int rs, int rt, int code, int op) { return (rs<<21) | (rt<<16) | (code<<6) | op; }
++
++  static int insn_COP0(int op, int rt, int rd) { return (cop0_op<<26) | (op<<21) | (rt<<16) | (rd<<11); }
++  static int insn_COP1(int op, int rt, int fs) { return (cop1_op<<26) | (op<<21) | (rt<<16) | (fs<<11); }
++
++  static int insn_F3RO(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++  static int insn_F3ROX(int fmt, int ft, int fs, int fd, int func) {
++    return (cop1x_op<<26) | (fmt<<21) | (ft<<16) | (fs<<11) | (fd<<6) | func;
++  }
++
++  static int high  (int x, int l) { return bitfield(x, 32-l, l); }
++  static int high16(int x)        { return high(x, 16); }
++  static int high6 (int x)        { return high(x, 6); }
++
++  //get the offset field of jump/branch instruction
++  int offset(address entry) {
++    assert(is_simm16((entry - pc() - 4) / 4), "change this code");
++    if (!is_simm16((entry - pc() - 4) / 4)) {
++      tty->print_cr("!!! is_simm16: %lx", (entry - pc() - 4) / 4);
++    }
++    return (entry - pc() - 4) / 4;
++  }
++
++
++public:
++  using AbstractAssembler::offset;
++
++  //sign expand with the sign bit is h
++  static int expand(int x, int h) { return -(x & (1<<h)) | x;  }
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_int_mask(int x);
++
++  // If x is a mask, return the number of one-bit in x.
++  // else return -1.
++  static int is_jlong_mask(jlong x);
++
++  // MIPS lui/addiu is both sign extended, so if you wan't to use off32/imm32, you have to use the follow three
++  static int split_low(int x) {
++    return (x & 0xffff);
++  }
++
++  // Convert 16-bit x to a sign-extended 16-bit integer
++  static int simm16(int x) {
++    assert(x == (x & 0xFFFF), "must be 16-bit only");
++    return (x << 16) >> 16;
++  }
++
++  static int split_high(int x) {
++    return ( (x >> 16) + ((x & 0x8000) != 0) ) & 0xffff;
++  }
++
++  static int merge(int low, int high) {
++    return expand(low, 15) + (high<<16);
++  }
++
++  static intptr_t merge(intptr_t x0, intptr_t x16, intptr_t x32, intptr_t x48) {
++    return (x48 << 48) | (x32 << 32) | (x16 << 16) | x0;
++  }
++
++  // Test if x is within signed immediate range for nbits.
++  static bool is_simm  (int x, int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   min      = -( ((int)1) << nbits-1 );
++    const int   maxplus1 =  ( ((int)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  static bool is_simm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong min      = -( ((jlong)1) << nbits-1 );
++    const jlong maxplus1 =  ( ((jlong)1) << nbits-1 );
++    return min <= x && x < maxplus1;
++  }
++
++  // Test if x is within unsigned immediate range for nbits
++  static bool is_uimm(int x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 32, "out of bounds");
++    const int   maxplus1 = ( ((int)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_uimm(jlong x, unsigned int nbits) {
++    assert(0 < nbits && nbits < 64, "out of bounds");
++    const jlong maxplus1 =  ( ((jlong)1) << nbits );
++    return 0 <= x && x < maxplus1;
++  }
++
++  static bool is_simm16(int x)            { return is_simm(x, 16); }
++  static bool is_simm16(long x)           { return is_simm((jlong)x, (unsigned int)16); }
++
++  static bool fit_in_jal(address target, address pc) {
++    intptr_t mask = 0xfffffffff0000000;
++    return ((intptr_t)(pc + 4) & mask) == ((intptr_t)target & mask);
++  }
++
++  bool fit_int_branch(address entry) {
++    return is_simm16(offset(entry));
++  }
++
++protected:
++#ifdef ASSERT
++    #define CHECK_DELAY
++#endif
++#ifdef CHECK_DELAY
++  enum Delay_state { no_delay, at_delay_slot, filling_delay_slot } delay_state;
++#endif
++
++public:
++  void assert_not_delayed() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "next instruction should not be a delay slot");
++#endif
++  }
++
++protected:
++  // Delay slot helpers
++  // cti is called when emitting control-transfer instruction,
++  // BEFORE doing the emitting.
++  // Only effective when assertion-checking is enabled.
++
++  // called when emitting cti with a delay slot, AFTER emitting
++  void has_delay_slot() {
++#ifdef CHECK_DELAY
++    assert(delay_state == no_delay, "just checking");
++    delay_state = at_delay_slot;
++#endif
++  }
++
++public:
++  Assembler* delayed() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == at_delay_slot, "delayed instructition is not in delay slot");
++    delay_state = filling_delay_slot;
++#endif
++    return this;
++  }
++
++  void flush() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == no_delay, "ending code with a delay slot");
++#endif
++    AbstractAssembler::flush();
++  }
++
++  inline void emit_long(int);  // shadows AbstractAssembler::emit_long
++  inline void emit_data(int x) { emit_long(x); }
++  inline void emit_data(int, RelocationHolder const&);
++  inline void emit_data(int, relocInfo::relocType rtype);
++  inline void check_delay();
++
++
++  // Generic instructions
++  // Does 32bit or 64bit as needed for the platform. In some sense these
++  // belong in macro assembler but there is no need for both varieties to exist
++
++  void addu32(Register rd, Register rs, Register rt){ emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), addu_op)); }
++  void addiu32(Register rt, Register rs, int imm)   { emit_long(insn_ORRI(addiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void addiu(Register rt, Register rs, int imm)     { daddiu (rt, rs, imm);}
++  void addu(Register rd, Register rs, Register rt)  { daddu  (rd, rs, rt);  }
++
++  void andr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), and_op)); }
++  void andi(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(andi_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void beq    (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beq_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void beql   (Register rs, Register rt, int off)  { emit_long(insn_ORRI(beql_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bgez   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgez_op, off)); has_delay_slot(); }
++  void bgezal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezal_op, off)); has_delay_slot(); }
++  void bgezall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezall_op, off)); has_delay_slot(); }
++  void bgezl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bgezl_op, off)); has_delay_slot(); }
++  void bgtz   (Register rs, int off) { emit_long(insn_ORRI(bgtz_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bgtzl  (Register rs, int off) { emit_long(insn_ORRI(bgtzl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blez   (Register rs, int off) { emit_long(insn_ORRI(blez_op,   (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void blezl  (Register rs, int off) { emit_long(insn_ORRI(blezl_op,  (int)rs->encoding(), 0, off)); has_delay_slot(); }
++  void bltz   (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltz_op, off)); has_delay_slot(); }
++  void bltzal (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzal_op, off)); has_delay_slot(); }
++  void bltzall(Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzall_op, off)); has_delay_slot(); }
++  void bltzl  (Register rs, int off) { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), bltzl_op, off)); has_delay_slot(); }
++  void bne    (Register rs, Register rt, int off) { emit_long(insn_ORRI(bne_op,  (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  void bnel   (Register rs, Register rt, int off) { emit_long(insn_ORRI(bnel_op, (int)rs->encoding(), (int)rt->encoding(), off)); has_delay_slot(); }
++  // two versions of brk:
++  // the brk(code) version is according to MIPS64 Architecture For Programmers Volume II: The MIPS64 Instruction Set
++  // the brk(code1, code2) is according to disassembler of hsdis (binutils-2.27)
++  // both versions work
++  void brk    (int code) { assert(is_uimm(code, 20), "code is 20 bits"); emit_long( (low(code, 20)<<6) | break_op ); }
++  void brk    (int code1, int code2) { assert(is_uimm(code1, 10) && is_uimm(code2, 10), "code is 20 bits"); emit_long( (low(code1, 10)<<16) | (low(code2, 10)<<6) | break_op ); }
++
++  void beq    (Register rs, Register rt, address entry) { beq(rs, rt, offset(entry)); }
++  void beql   (Register rs, Register rt, address entry) { beql(rs, rt, offset(entry));}
++  void bgez   (Register rs, address entry) { bgez   (rs, offset(entry)); }
++  void bgezal (Register rs, address entry) { bgezal (rs, offset(entry)); }
++  void bgezall(Register rs, address entry) { bgezall(rs, offset(entry)); }
++  void bgezl  (Register rs, address entry) { bgezl  (rs, offset(entry)); }
++  void bgtz   (Register rs, address entry) { bgtz   (rs, offset(entry)); }
++  void bgtzl  (Register rs, address entry) { bgtzl  (rs, offset(entry)); }
++  void blez   (Register rs, address entry) { blez   (rs, offset(entry)); }
++  void blezl  (Register rs, address entry) { blezl  (rs, offset(entry)); }
++  void bltz   (Register rs, address entry) { bltz   (rs, offset(entry)); }
++  void bltzal (Register rs, address entry) { bltzal (rs, offset(entry)); }
++  void bltzall(Register rs, address entry) { bltzall(rs, offset(entry)); }
++  void bltzl  (Register rs, address entry) { bltzl  (rs, offset(entry)); }
++  void bne    (Register rs, Register rt, address entry) { bne(rs, rt, offset(entry)); }
++  void bnel   (Register rs, Register rt, address entry) { bnel(rs, rt, offset(entry)); }
++
++  void beq    (Register rs, Register rt, Label& L) { beq(rs, rt, target(L)); }
++  void beql   (Register rs, Register rt, Label& L) { beql(rs, rt, target(L)); }
++  void bgez   (Register rs, Label& L){ bgez   (rs, target(L)); }
++  void bgezal (Register rs, Label& L){ bgezal (rs, target(L)); }
++  void bgezall(Register rs, Label& L){ bgezall(rs, target(L)); }
++  void bgezl  (Register rs, Label& L){ bgezl  (rs, target(L)); }
++  void bgtz   (Register rs, Label& L){ bgtz   (rs, target(L)); }
++  void bgtzl  (Register rs, Label& L){ bgtzl  (rs, target(L)); }
++  void blez   (Register rs, Label& L){ blez   (rs, target(L)); }
++  void blezl  (Register rs, Label& L){ blezl  (rs, target(L)); }
++  void bltz   (Register rs, Label& L){ bltz   (rs, target(L)); }
++  void bltzal (Register rs, Label& L){ bltzal (rs, target(L)); }
++  void bltzall(Register rs, Label& L){ bltzall(rs, target(L)); }
++  void bltzl  (Register rs, Label& L){ bltzl  (rs, target(L)); }
++  void bne    (Register rs, Register rt, Label& L){ bne(rs, rt, target(L)); }
++  void bnel   (Register rs, Register rt, Label& L){ bnel(rs, rt, target(L)); }
++
++  void daddiu(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(daddiu_op, (int)rs->encoding(), (int)rt->encoding(), imm)); }
++  void daddu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), daddu_op)); }
++  void ddiv  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddiv_op));  }
++  void ddivu (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, ddivu_op)); }
++
++  void movz  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movz_op)); }
++  void movn  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), movn_op)); }
++
++  void movt  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | (1 << 16) | ((int)rd->encoding() << 11) | movci_op); }
++  void movf  (Register rd, Register rs) { emit_long(((int)rs->encoding() << 21) | ((int)rd->encoding() << 11) | movci_op); }
++
++  enum bshfl_ops {
++     seb_op = 0x10,
++     seh_op = 0x18
++  };
++  void seb  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seb_op << 6) | bshfl_op); }
++  void seh  (Register rd, Register rt) { emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (seh_op << 6) | bshfl_op); }
++
++  void ext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | ext_op);
++  }
++
++  void dext  (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 63), "pos + size must be in (0, 63]");
++
++     int lsb  = pos;
++     int msbd = size - 1;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dext_op);
++  }
++
++  void dextm (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((32 < size) && (size <= 64), "size must be in (32, 64]");
++     guarantee((32 < pos + size) && (pos + size <= 64), "pos + size must be in (32, 64]");
++
++     int lsb  = pos;
++     int msbd = size - 1 - 32;
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (msbd << 11) | (lsb << 6) | dextm_op);
++  }
++
++  void rotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | srl_op);
++  }
++
++  void drotr (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl_op);
++  }
++
++  void drotr32 (Register rd, Register rt, int sa) {
++     emit_long((special_op << 26) | (1 << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (low(sa, 5) << 6) | dsrl32_op);
++  }
++
++  void rotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | srlv_op);
++  }
++
++  void drotrv (Register rd, Register rt, Register rs) {
++     emit_long((special_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | (1 << 6) | dsrlv_op);
++  }
++
++  void div   (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, div_op)); }
++  void divu  (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, divu_op)); }
++  void dmult (Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmult_op)); }
++  void dmultu(Register rs, Register rt)              { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, dmultu_op)); }
++  void dsll  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll_op)); }
++  void dsllv (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsllv_op)); }
++  void dsll32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsll32_op)); }
++  void dsra  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra_op)); }
++  void dsrav (Register rd, Register rt, Register rs) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrav_op)); }
++  void dsra32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsra32_op)); }
++  void dsrl  (Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl_op)); }
++  void dsrlv (Register rd, Register rt, Register rs)  { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsrlv_op)); }
++  void dsrl32(Register rd, Register rt , int sa)     { emit_long(insn_RRSO((int)rt->encoding(), (int)rd->encoding(), low(sa, 5), dsrl32_op)); }
++  void dsubu (Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), dsubu_op)); }
++
++  void b(int off)       { beq(R0, R0, off); }
++  void b(address entry) { b(offset(entry)); }
++  void b(Label& L)      { b(target(L)); }
++
++  void j(address entry);
++  void jal(address entry);
++
++  void jalr(Register rd, Register rs) { emit_long( ((int)rs->encoding()<<21) | ((int)rd->encoding()<<11) | jalr_op); has_delay_slot(); }
++  void jalr(Register rs)              { jalr(RA, rs); }
++  void jalr()                         { jalr(RT9); }
++
++  void jr(Register rs) { emit_long(((int)rs->encoding()<<21) | jr_op); has_delay_slot(); }
++  void jr_hb(Register rs) { emit_long(((int)rs->encoding()<<21) | (1 << 10) | jr_op); has_delay_slot(); }
++
++  void lb (Register rt, Register base, int off) { emit_long(insn_ORRI(lb_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lbu(Register rt, Register base, int off) { emit_long(insn_ORRI(lbu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ld (Register rt, Register base, int off) { emit_long(insn_ORRI(ld_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldl(Register rt, Register base, int off) { emit_long(insn_ORRI(ldl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ldr(Register rt, Register base, int off) { emit_long(insn_ORRI(ldr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lh (Register rt, Register base, int off) { emit_long(insn_ORRI(lh_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lhu(Register rt, Register base, int off) { emit_long(insn_ORRI(lhu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void ll (Register rt, Register base, int off) { emit_long(insn_ORRI(ll_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lld(Register rt, Register base, int off) { emit_long(insn_ORRI(lld_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lui(Register rt, int imm)                { emit_long(insn_ORRI(lui_op, 0, (int)rt->encoding(), simm16(imm))); }
++  void lw (Register rt, Register base, int off) { emit_long(insn_ORRI(lw_op,  (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwl(Register rt, Register base, int off) { emit_long(insn_ORRI(lwl_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwr(Register rt, Register base, int off) { emit_long(insn_ORRI(lwr_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++  void lwu(Register rt, Register base, int off) { emit_long(insn_ORRI(lwu_op, (int)base->encoding(), (int)rt->encoding(), off)); }
++
++  void lb (Register rt, Address src);
++  void lbu(Register rt, Address src);
++  void ld (Register rt, Address src);
++  void ldl(Register rt, Address src);
++  void ldr(Register rt, Address src);
++  void lh (Register rt, Address src);
++  void lhu(Register rt, Address src);
++  void ll (Register rt, Address src);
++  void lld(Register rt, Address src);
++  void lw (Register rt, Address src);
++  void lwl(Register rt, Address src);
++  void lwr(Register rt, Address src);
++  void lwu(Register rt, Address src);
++  void lea(Register rt, Address src);
++  void pref(int hint, Register base, int off) { emit_long(insn_ORRI(pref_op, (int)base->encoding(), low(hint, 5), low(off, 16))); }
++
++  void mfhi (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mfhi_op ); }
++  void mflo (Register rd)              { emit_long( ((int)rd->encoding()<<11) | mflo_op ); }
++  void mthi (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mthi_op ); }
++  void mtlo (Register rs)              { emit_long( ((int)rs->encoding()<<21) | mtlo_op ); }
++
++  void mult (Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, mult_op)); }
++  void multu(Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), 0, multu_op)); }
++
++  void nor(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), nor_op)); }
++
++  void orr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), or_op)); }
++  void ori(Register rt, Register rs, int imm)     { emit_long(insn_ORRI(ori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void sb   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sb_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sc   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sc_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void scd  (Register rt, Register base, int off)     { emit_long(insn_ORRI(scd_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sd   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sd_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sdr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(sdr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sh   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sh_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void sll  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sll_op)); }
++  void sllv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sllv_op)); }
++  void slt  (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), slt_op)); }
++  void slti (Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(slti_op,  (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltiu(Register rt, Register rs,   int imm)     { emit_long(insn_ORRI(sltiu_op, (int)rs->encoding(),   (int)rt->encoding(), imm)); }
++  void sltu (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), sltu_op)); }
++  void sra  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      sra_op)); }
++  void srav (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srav_op)); }
++  void srl  (Register rd, Register rt ,  int sa)      { emit_long(insn_RRSO((int)rt->encoding(),  (int)rd->encoding(),   low(sa, 5),      srl_op)); }
++  void srlv (Register rd, Register rt,   Register rs) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), srlv_op)); }
++
++  void subu (Register rd, Register rs,   Register rt) { dsubu (rd, rs, rt); }
++  void subu32 (Register rd, Register rs,   Register rt) { emit_long(insn_RRRO((int)rs->encoding(),  (int)rt->encoding(),   (int)rd->encoding(), subu_op)); }
++  void sw   (Register rt, Register base, int off)     { emit_long(insn_ORRI(sw_op,    (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swl  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swl_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void swr  (Register rt, Register base, int off)     { emit_long(insn_ORRI(swr_op,   (int)base->encoding(), (int)rt->encoding(), off)); }
++  void synci(Register base, int off)                  { emit_long(insn_ORRI(regimm_op, (int)base->encoding(), synci_op, off)); }
++  void sync ()                                        {
++    if (os::is_ActiveCoresMP())
++      emit_long(0);
++    else
++      emit_long(sync_op);
++  }
++  void syscall(int code)                              { emit_long( (code<<6) | syscall_op ); }
++
++  void sb(Register rt, Address dst);
++  void sc(Register rt, Address dst);
++  void scd(Register rt, Address dst);
++  void sd(Register rt, Address dst);
++  void sdl(Register rt, Address dst);
++  void sdr(Register rt, Address dst);
++  void sh(Register rt, Address dst);
++  void sw(Register rt, Address dst);
++  void swl(Register rt, Address dst);
++  void swr(Register rt, Address dst);
++
++  void teq  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, teq_op)); }
++  void teqi (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), teqi_op, imm)); }
++  void tge  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tge_op)); }
++  void tgei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgei_op, imm)); }
++  void tgeiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tgeiu_op, imm)); }
++  void tgeu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tgeu_op)); }
++  void tlt  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tlt_op)); }
++  void tlti (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tlti_op, imm)); }
++  void tltiu(Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tltiu_op, imm)); }
++  void tltu (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tltu_op)); }
++  void tne  (Register rs, Register rt, int code) { emit_long(insn_RRCO((int)rs->encoding(),   (int)rt->encoding(), code, tne_op)); }
++  void tnei (Register rs, int imm)               { emit_long(insn_ORRI(regimm_op, (int)rs->encoding(), tnei_op, imm)); }
++
++  void xorr(Register rd, Register rs, Register rt) { emit_long(insn_RRRO((int)rs->encoding(), (int)rt->encoding(), (int)rd->encoding(), xor_op)); }
++  void xori(Register rt, Register rs, int imm) { emit_long(insn_ORRI(xori_op, (int)rs->encoding(), (int)rt->encoding(), simm16(imm))); }
++
++  void nop()               { emit_long(0); }
++
++
++
++  void ldc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(ldc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void lwc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(lwc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void ldc1(FloatRegister ft, Address src);
++  void lwc1(FloatRegister ft, Address src);
++
++  //COP0
++  void mfc0  (Register rt, Register rd)       { emit_long(insn_COP0( mfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmfc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmfc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  // MFGC0, DMFGC0, MTGC0, DMTGC0 not implemented yet
++  void mtc0  (Register rt, Register rd)       { emit_long(insn_COP0( mtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  void dmtc0 (Register rt, FloatRegister rd)  { emit_long(insn_COP0(dmtc0_op, (int)rt->encoding(), (int)rd->encoding())); }
++  //COP0 end
++
++
++  //COP1
++  void mfc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1 (mfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmfc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmfc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void cfc1 (Register rt, int fs)           { emit_long(insn_COP1( cfc1_op, (int)rt->encoding(), fs)); }
++  void mfhc1(Register rt, int fs)           { emit_long(insn_COP1(mfhc1_op, (int)rt->encoding(), fs)); }
++  void mtc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( mtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void dmtc1(Register rt, FloatRegister fs) { emit_long(insn_COP1(dmtc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, FloatRegister fs) { emit_long(insn_COP1( ctc1_op, (int)rt->encoding(), (int)fs->encoding())); }
++  void ctc1 (Register rt, int fs)           { emit_long(insn_COP1(ctc1_op,  (int)rt->encoding(), fs)); }
++  void mthc1(Register rt, int fs)           { emit_long(insn_COP1(mthc1_op, (int)rt->encoding(), fs)); }
++
++  void bc1f (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcf_op, off)); has_delay_slot(); }
++  void bc1fl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bcfl_op, off)); has_delay_slot(); }
++  void bc1t (int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bct_op, off)); has_delay_slot(); }
++  void bc1tl(int off) { emit_long(insn_ORRI(cop1_op, bc1f_op, bctl_op, off));  has_delay_slot(); }
++
++  void bc1f (address entry) { bc1f(offset(entry)); }
++  void bc1fl(address entry) { bc1fl(offset(entry)); }
++  void bc1t (address entry) { bc1t(offset(entry)); }
++  void bc1tl(address entry) { bc1tl(offset(entry)); }
++
++  void bc1f (Label& L) { bc1f(target(L)); }
++  void bc1fl(Label& L) { bc1fl(target(L)); }
++  void bc1t (Label& L) { bc1t(target(L)); }
++  void bc1tl(Label& L) { bc1tl(target(L)); }
++
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_SINGLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(single_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void add_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fadd_op)}
++  void sub_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fsub_op)}
++  void mul_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fmul_op)}
++  void div_s    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fdiv_op)}
++  void sqrt_s   (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fsqrt_op)}
++  void abs_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fabs_op)}
++  void mov_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fmov_op)}
++  void neg_s    (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fneg_op)}
++  void round_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundl_op)}
++  void trunc_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceill_op)}
++  void floor_l_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorl_op)}
++  void round_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, froundw_op)}
++  void trunc_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fceilw_op)}
++  void floor_w_s(FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_s(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (single_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movz_f_op)}
++  void movn_s  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_SINGLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frecip_op)}
++  void rsqrt_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_d_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtd_op)}
++  //null
++  void cvt_w_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtw_op)}
++  void cvt_l_s (FloatRegister fd, FloatRegister fs) {INSN_SINGLE(R0, fs, fd, fcvtl_op)}
++  void cvt_ps_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, fd, fcvtps_op)}
++  //null
++  void c_f_s   (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, f_cond)}
++  void c_un_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, un_cond)}
++  void c_eq_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, eq_cond)}
++  void c_ueq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ueq_cond)}
++  void c_olt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, olt_cond)}
++  void c_ult_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ult_cond)}
++  void c_ole_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ole_cond)}
++  void c_ule_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ule_cond)}
++  void c_sf_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, sf_cond)}
++  void c_ngle_s(FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngle_cond)}
++  void c_seq_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, seq_cond)}
++  void c_ngl_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngl_cond)}
++  void c_lt_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, lt_cond)}
++  void c_nge_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, nge_cond)}
++  void c_le_s  (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, le_cond)}
++  void c_ngt_s (FloatRegister fs, FloatRegister ft) {INSN_SINGLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_SINGLE
++
++
++//R0->encoding() is 0; INSN_DOUBLE is enclosed by {} for ctags.
++#define INSN_DOUBLE(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(double_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fadd_op)}
++  void sub_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fsub_op)}
++  void mul_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fmul_op)}
++  void div_d    (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, fd, fdiv_op)}
++  void sqrt_d   (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fsqrt_op)}
++  void abs_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fabs_op)}
++  void mov_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fmov_op)}
++  void neg_d    (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fneg_op)}
++  void round_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundl_op)}
++  void trunc_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncl_op)}
++  void ceil_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceill_op)}
++  void floor_l_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorl_op)}
++  void round_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, froundw_op)}
++  void trunc_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ftruncw_op)}
++  void ceil_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fceilw_op)}
++  void floor_w_d(FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, ffloorw_op)}
++  //null
++  void movf_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movt_d(FloatRegister fs, FloatRegister fd, int cc = 0) {
++    assert(cc >= 0 && cc <= 7, "cc is 3 bits");
++    emit_long((cop1_op<<26) | (double_fmt<<21) | (cc<<18) | 1<<16 | ((int)fs->encoding()<<11) | ((int)fd->encoding()<<6) | movf_f_op );}
++  void movz_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movz_f_op)}
++  void movn_d  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_DOUBLE(rt, fs, fd, movn_f_op)}
++  //null
++  void recip_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frecip_op)}
++  void rsqrt_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, frsqrt_op)}
++  //null
++  void cvt_s_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvts_op)}
++  void cvt_l_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtl_op)}
++  //null
++  void cvt_w_d (FloatRegister fd, FloatRegister fs) {INSN_DOUBLE(R0, fs, fd, fcvtw_op)}
++  //null
++  void c_f_d   (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, f_cond)}
++  void c_un_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, un_cond)}
++  void c_eq_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, eq_cond)}
++  void c_ueq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ueq_cond)}
++  void c_olt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, olt_cond)}
++  void c_ult_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ult_cond)}
++  void c_ole_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ole_cond)}
++  void c_ule_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ule_cond)}
++  void c_sf_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, sf_cond)}
++  void c_ngle_d(FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngle_cond)}
++  void c_seq_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, seq_cond)}
++  void c_ngl_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngl_cond)}
++  void c_lt_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, lt_cond)}
++  void c_nge_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, nge_cond)}
++  void c_le_d  (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, le_cond)}
++  void c_ngt_d (FloatRegister fs, FloatRegister ft) {INSN_DOUBLE(ft, fs, R0, ngt_cond)}
++
++#undef INSN_DOUBLE
++
++
++  //null
++  void cvt_s_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_w(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(word_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++  void cvt_s_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvts_op)); }
++  void cvt_d_l(FloatRegister fd, FloatRegister fs) { emit_long(insn_F3RO(long_fmt, 0, (int)fs->encoding(), (int)fd->encoding(), fcvtd_op)); }
++  //null
++
++
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_PS(r1, r2, r3, op)   \
++  { emit_long(insn_F3RO(ps_fmt, (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++
++  void add_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fadd_op)}
++  void sub_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fsub_op)}
++  void mul_ps (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fmul_op)}
++  //null
++  void abs_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fabs_op)}
++  void mov_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fmov_op)}
++  void neg_ps (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fneg_op)}
++  //null
++  //void movf_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movf_ps")}
++  //void movt_ps(FloatRegister rd, FloatRegister rs, FPConditionCode cc) { unimplemented(" movt_ps") }
++  void movz_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movz_f_op)}
++  void movn_ps  (FloatRegister fd, FloatRegister fs, Register rt) {INSN_PS(rt, fs, fd, movn_f_op)}
++  //null
++  void cvt_s_pu (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvts_op)}
++  //null
++  void cvt_s_pl (FloatRegister fd, FloatRegister fs) {INSN_PS(R0, fs, fd, fcvtspl_op)}
++  //null
++  void pll_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpll_op)}
++  void plu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fplu_op)}
++  void pul_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpul_op)}
++  void puu_ps   (FloatRegister fd, FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, fd, fpuu_op)}
++  void c_f_ps   (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, f_cond)}
++  void c_un_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, un_cond)}
++  void c_eq_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, eq_cond)}
++  void c_ueq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ueq_cond)}
++  void c_olt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, olt_cond)}
++  void c_ult_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ult_cond)}
++  void c_ole_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ole_cond)}
++  void c_ule_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ule_cond)}
++  void c_sf_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, sf_cond)}
++  void c_ngle_ps(FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngle_cond)}
++  void c_seq_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, seq_cond)}
++  void c_ngl_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngl_cond)}
++  void c_lt_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, lt_cond)}
++  void c_nge_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, nge_cond)}
++  void c_le_ps  (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, le_cond)}
++  void c_ngt_ps (FloatRegister fs, FloatRegister ft) {INSN_PS(ft, fs, R0, ngt_cond)}
++  //null
++#undef INSN_PS
++  //COP1 end
++
++
++  //COP1X
++//R0->encoding() is 0; INSN_SINGLE is enclosed by {} for ctags.
++#define INSN_COP1X(r0, r1, r2, r3, op)   \
++  { emit_long(insn_F3ROX((int)r0->encoding(), (int)r1->encoding(), (int)r2->encoding(), (int)r3->encoding(), op));}
++  void madd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_s_op) }
++  void madd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, madd_d_op) }
++  void madd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, madd_ps_op) }
++  void msub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_s_op) }
++  void msub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, msub_d_op) }
++  void msub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, msub_ps_op) }
++  void nmadd_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_s_op) }
++  void nmadd_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmadd_d_op) }
++  void nmadd_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmadd_ps_op) }
++  void nmsub_s(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_s_op) }
++  void nmsub_d(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft) {INSN_COP1X(fr, ft, fs, fd, nmsub_d_op) }
++  void nmsub_ps(FloatRegister fd, FloatRegister fr, FloatRegister fs, FloatRegister ft){INSN_COP1X(fr, ft, fs, fd, nmsub_ps_op) }
++#undef INSN_COP1X
++  //COP1X end
++
++  //SPECIAL2
++//R0->encoding() is 0; INSN_PS is enclosed by {} for ctags.
++#define INSN_S2(op)   \
++  { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | op);}
++
++  void madd    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | madd_op); }
++  void maddu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | maddu_op); }
++  void mul     (Register rd, Register rs, Register rt) { INSN_S2(mul_op)     }
++  void gsandn  (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x03_op) }
++  void msub    (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msub_op); }
++  void msubu   (Register rs, Register rt) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | msubu_op); }
++  void gsorn   (Register rd, Register rs, Register rt) { INSN_S2((0x12 << 6) | gs0x06_op) }
++
++  void gsmult  (Register rd, Register rs, Register rt) { INSN_S2(gsmult_op)  }
++  void gsdmult (Register rd, Register rs, Register rt) { INSN_S2(gsdmult_op) }
++  void gsmultu (Register rd, Register rs, Register rt) { INSN_S2(gsmultu_op) }
++  void gsdmultu(Register rd, Register rs, Register rt) { INSN_S2(gsdmultu_op)}
++  void gsdiv   (Register rd, Register rs, Register rt) { INSN_S2(gsdiv_op)   }
++  void gsddiv  (Register rd, Register rs, Register rt) { INSN_S2(gsddiv_op)  }
++  void gsdivu  (Register rd, Register rs, Register rt) { INSN_S2(gsdivu_op)  }
++  void gsddivu (Register rd, Register rs, Register rt) { INSN_S2(gsddivu_op) }
++  void gsmod   (Register rd, Register rs, Register rt) { INSN_S2(gsmod_op)   }
++  void gsdmod  (Register rd, Register rs, Register rt) { INSN_S2(gsdmod_op)  }
++  void gsmodu  (Register rd, Register rs, Register rt) { INSN_S2(gsmodu_op)  }
++  void gsdmodu (Register rd, Register rs, Register rt) { INSN_S2(gsdmodu_op) }
++  void clz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clz_op); }
++  void clo (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | clo_op); }
++  void ctz (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 0 << 6| xctx_op); }
++  void cto (Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 1 << 6| xctx_op); }
++  void dctz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 2 << 6| xctx_op); }
++  void dcto(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | 3 << 6| xctx_op); }
++  void dclz(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclz_op); }
++  void dclo(Register rd, Register rs) { emit_long((special2_op << 26) | ((int)rs->encoding() << 21) | ((int)rd->encoding() << 16) | ((int)rd->encoding() << 11) | dclo_op); }
++
++#undef INSN_S2
++
++  //SPECIAL3
++/*
++// FIXME
++#define is_0_to_32(a, b) \
++  assert (a >= 0, " just a check"); \
++  assert (a <= 0, " just a check"); \
++  assert (b >= 0, " just a check"); \
++  assert (b <= 0, " just a check"); \
++  assert (a+b >= 0, " just a check"); \
++  assert (a+b <= 0, " just a check");
++  */
++#define is_0_to_32(a, b)
++
++  void ins  (Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | ins_op); }
++  void dinsm(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos, 5) << 6) | dinsm_op); }
++  void dinsu(Register rt, Register rs, int pos, int size) { is_0_to_32(pos, size); emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-33, 5) << 11) | (low(pos-32, 5) << 6) | dinsu_op); }
++  void dins (Register rt, Register rs, int pos, int size) {
++     guarantee((0 <= pos) && (pos < 32), "pos must be in [0, 32)");
++     guarantee((0 < size) && (size <= 32), "size must be in (0, 32]");
++     guarantee((0 < pos + size) && (pos + size <= 32), "pos + size must be in (0, 32]");
++
++     emit_long((special3_op << 26) | ((int)rs->encoding() << 21) | ((int)rt->encoding() << 16) | (low(pos+size-1, 5) << 11) | (low(pos, 5) << 6) | dins_op);
++  }
++
++  void repl_qb (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_qb_op  << 6 | re1_op); }
++  void replv_qb(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qb_op << 6 | re1_op ); }
++  void repl_ph (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_ph_op  << 6 | re1_op); }
++  void replv_ph(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ph_op << 6 | re1_op ); }
++
++  void repl_ob (Register rd, int const8)  { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const8, 8) << 16)      | ((int)rd->encoding() << 11) | repl_ob_op  << 6 | re2_op); }
++  void replv_ob(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_ob_op << 6 | re2_op ); }
++  void repl_qh (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_qh_op  << 6 | re2_op); }
++  void replv_qh(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_qh_op << 6 | re2_op ); }
++  void repl_pw (Register rd, int const10) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | (low(const10, 10) << 16)    | ((int)rd->encoding() << 11) | repl_pw_op  << 6 | re2_op); }
++  void replv_pw(Register rd, Register rt) { assert(VM_Version::supports_dsp(), ""); emit_long((special3_op << 26) | ((int)rt->encoding() << 16) | ((int)rd->encoding() << 11) | replv_pw_op << 6 | re2_op ); }
++
++  void sdc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(sdc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void sdc1(FloatRegister ft, Address dst);
++  void swc1(FloatRegister ft, Register base, int off) { emit_long(insn_ORRI(swc1_op, (int)base->encoding(), (int)ft->encoding(), off)); }
++  void swc1(FloatRegister ft, Address dst);
++
++
++  static void print_instruction(int);
++  int patched_branch(int dest_pos, int inst, int inst_pos);
++  int branch_destination(int inst, int pos);
++
++  // Loongson extension
++
++  // gssq/gslq/gssqc1/gslqc1: vAddr = sign_extend(offset << 4 ) + GPR[base]. Therefore, the off should be ">> 4".
++  void gslble(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslble_op);
++  }
++
++  void gslbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslbgt_op);
++  }
++
++  void gslhle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhle_op);
++  }
++
++  void gslhgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslhgt_op);
++  }
++
++  void gslwle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwle_op);
++  }
++
++  void gslwgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgt_op);
++  }
++
++  void gsldle(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldle_op);
++  }
++
++  void gsldgt(Register rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgt_op);
++  }
++
++  void gslwlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwlec1_op);
++  }
++
++  void gslwgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gslwgtc1_op);
++  }
++
++  void gsldlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldlec1_op);
++  }
++
++  void gsldgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsldgtc1_op);
++  }
++
++  void gslq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gslq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslq: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gslqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gslqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gslqc1: off exceeds 9 bits");
++    emit_long((gs_lwc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gslq_op | (int)rq->encoding() );
++  }
++
++  void gssble(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssble_op);
++  }
++
++  void gssbgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssbgt_op);
++  }
++
++  void gsshle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshle_op);
++  }
++
++  void gsshgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsshgt_op);
++  }
++
++  void gsswle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswle_op);
++  }
++
++  void gsswgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgt_op);
++  }
++
++  void gssdle(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdle_op);
++  }
++
++  void gssdgt(Register rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgt_op);
++  }
++
++  void gsswlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswlec1_op);
++  }
++
++  void gsswgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gsswgtc1_op);
++  }
++
++  void gssdlec1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdlec1_op);
++  }
++
++  void gssdgtc1(FloatRegister rt, Register base, Register bound) {
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | ((int)bound->encoding() << 11) | 0 << 6 | gssdgtc1_op);
++  }
++
++  void gssq(Register rq, Register rt, Register base, int off) {
++    assert(!(off & 0xF), "gssq: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssq: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 0 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  void gssqc1(FloatRegister rq, FloatRegister rt, Register base, int off) {
++    assert(!(off & 0xF), "gssqc1: the low 4 bits of off must be 0");
++    off = off >> 4;
++    assert(is_simm(off, 9),"gssqc1: off exceeds 9 bits");
++    emit_long((gs_swc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) | 1 << 15 | (low(off, 9) << 6) | gssq_op | (int)rq->encoding() );
++  }
++
++  //LDC2 & SDC2
++#define INSN(OPS, OP) \
++    assert(is_simm(off, 8), "NAME: off exceeds 8 bits");                                           \
++    assert(UseLEXT1, "check UseLEXT1");                                                      \
++    emit_long( (OPS << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |         \
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | OP);
++
++#define INSN_LDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_LDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_ldc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2(NAME, op)  \
++  void NAME(Register rt, Register base, Register index, int off) {                                 \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++#define INSN_SDC2_F(NAME, op)  \
++  void NAME(FloatRegister rt, Register base, Register index, int off) {                            \
++    INSN(gs_sdc2_op, op)                                                                           \
++  }
++
++/*
++ void gslbx(Register rt, Register base, Register index, int off) {
++    assert(is_simm(off, 8), "gslbx: off exceeds 8 bits");
++    assert(UseLEXT1, "check UseLEXT1");
++    emit_long( (gs_ldc2_op << 26) | ((int)base->encoding() << 21) | ((int)rt->encoding() << 16) |
++               ((int)index->encoding() << 11) | (low(off, 8) << 3) | gslbx_op);
++ void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op);}
++
++  INSN_LDC2(gslbx, gslbx_op)
++  INSN_LDC2(gslhx, gslhx_op)
++  INSN_LDC2(gslwx, gslwx_op)
++  INSN_LDC2(gsldx, gsldx_op)
++  INSN_LDC2_F(gslwxc1, gslwxc1_op)
++  INSN_LDC2_F(gsldxc1, gsldxc1_op)
++
++  INSN_SDC2(gssbx, gssbx_op)
++  INSN_SDC2(gsshx, gsshx_op)
++  INSN_SDC2(gsswx, gsswx_op)
++  INSN_SDC2(gssdx, gssdx_op)
++  INSN_SDC2_F(gsswxc1, gsswxc1_op)
++  INSN_SDC2_F(gssdxc1, gssdxc1_op)
++*/
++  void gslbx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslbx_op) }
++  void gslhx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslhx_op) }
++  void gslwx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwx_op) }
++  void gsldx(Register rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldx_op) }
++  void gslwxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gslwxc1_op) }
++  void gsldxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_ldc2_op, gsldxc1_op) }
++
++  void gssbx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssbx_op) }
++  void gsshx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsshx_op) }
++  void gsswx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswx_op) }
++  void gssdx(Register rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdx_op) }
++  void gsswxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gsswxc1_op) }
++  void gssdxc1(FloatRegister rt, Register base, Register index, int off) {INSN(gs_sdc2_op, gssdxc1_op) }
++
++#undef INSN
++#undef INSN_LDC2
++#undef INSN_LDC2_F
++#undef INSN_SDC2
++#undef INSN_SDC2_F
++
++  // cpucfg on Loongson CPUs above 3A4000
++  void cpucfg(Register rd, Register rs) { emit_long((gs_lwc2_op << 26) | ((int)rs->encoding() << 21) | (0b01000 << 16) | ((int)rd->encoding() << 11) | ( 0b00100 << 6) | 0b011000);}
++
++  enum Membar_mask_bits {
++    StoreStore = 1 << 3,
++    LoadStore  = 1 << 2,
++    StoreLoad  = 1 << 1,
++    LoadLoad   = 1 << 0
++  };
++
++  // Serializes memory and blows flags
++  void membar(Membar_mask_bits order_constraint) {
++    sync();
++  }
++
++public:
++  // Creation
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++#ifdef CHECK_DELAY
++    delay_state = no_delay;
++#endif
++  }
++
++  // Decoding
++  static address locate_operand(address inst, WhichOperand which);
++  static address locate_next_instruction(address inst);
++};
++
++
++#include "assembler_mips.inline.hpp"
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/assembler_mips.inline.hpp b/src/hotspot/cpu/mips/assembler_mips.inline.hpp
+new file mode 100644
+index 0000000000..21c8a76156
+--- /dev/null
++++ b/src/hotspot/cpu/mips/assembler_mips.inline.hpp
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++#ifndef PRODUCT
++#include "compiler/disassembler.hpp"
++#endif
++
++
++
++inline void Assembler::check_delay() {
++# ifdef CHECK_DELAY
++  guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot");
++  delay_state = no_delay;
++# endif
++}
++
++inline void Assembler::emit_long(int x) {
++  check_delay();
++  AbstractAssembler::emit_int32(x);
++}
++
++inline void Assembler::emit_data(int x, relocInfo::relocType rtype) {
++  relocate(rtype);
++  emit_long(x);
++}
++
++inline void Assembler::emit_data(int x, RelocationHolder const& rspec) {
++  relocate(rspec);
++  emit_long(x);
++}
++
++#endif // CPU_MIPS_VM_ASSEMBLER_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/bytes_mips.hpp b/src/hotspot/cpu/mips/bytes_mips.hpp
+new file mode 100644
+index 0000000000..4172db219b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/bytes_mips.hpp
+@@ -0,0 +1,181 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_BYTES_MIPS_HPP
++#define CPU_MIPS_VM_BYTES_MIPS_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++ public:
++  // Returns true if the byte ordering used by Java is different from the native byte ordering
++  // of the underlying machine. For example, this is true for Intel x86, but false for Solaris
++  // on Sparc.
++  // we use mipsel, so return true
++  static inline bool is_Java_byte_ordering_different(){ return true; }
++
++
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since x86 CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         {
++    if ((intptr_t)p & 0x1) {
++      return ((u2)p[1] << 8) | (u2)p[0];
++    } else {
++      return *(u2*)p;
++    }
++  }
++
++  static inline u4   get_native_u4(address p)         {
++    if ((intptr_t)p & 3) {
++      u4 res;
++      __asm__ __volatile__ (
++          " .set push\n"
++          " .set mips64\n"
++          " .set noreorder\n"
++
++          "    lwr %[res], 0(%[addr])    \n"
++          "    lwl  %[res], 3(%[addr])    \n"
++
++          " .set pop"
++          :  [res] "=&r" (res)
++          : [addr] "r" (p)
++          : "memory"
++          );
++      return res;
++    } else {
++      return *(u4*)p;
++    }
++  }
++
++  static inline u8   get_native_u8(address p)         {
++    u8 res;
++    u8 temp = 0;
++    //  u4 tp;//tmp register
++    __asm__ __volatile__ (
++        " .set push\n"
++        " .set mips64\n"
++        " .set noreorder\n"
++        " .set noat\n"
++        "    andi $1,%[addr],0x7    \n"
++        "    beqz $1,1f        \n"
++        "    nop        \n"
++        "    ldr %[temp], 0(%[addr])    \n"
++        "    ldl  %[temp], 7(%[addr])  \n"
++        "               b 2f        \n"
++        "    nop        \n"
++        "  1:\t  ld  %[temp],0(%[addr])  \n"
++        "  2:\t   sd  %[temp], %[res]    \n"
++
++        " .set at\n"
++        " .set pop\n"
++        :  [addr]"=r"(p), [temp]"=r" (temp)
++        :  "[addr]"(p), "[temp]" (temp), [res]"m" (*(volatile jint*)&res)
++        : "memory"
++        );
++
++    return res;
++  }
++
++  //use mips unaligned load instructions
++  static inline void put_native_u2(address p, u2 x)   {
++    if((intptr_t)p & 0x1) {
++      p[0] = (u_char)(x);
++      p[1] = (u_char)(x>>8);
++    } else {
++      *(u2*)p  = x;
++    }
++  }
++
++  static inline void put_native_u4(address p, u4 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 3 ) {
++    case 0:  *(u4*)p = x;
++        break;
++
++    case 2:  ((u2*)p)[1] = x >> 16;
++       ((u2*)p)[0] = x;
++       break;
++
++    default: ((u1*)p)[3] = x >> 24;
++       ((u1*)p)[2] = x >> 16;
++       ((u1*)p)[1] = x >>  8;
++       ((u1*)p)[0] = x;
++       break;
++    }
++  }
++
++  static inline void put_native_u8(address p, u8 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++    switch ( intptr_t(p) & 7 ) {
++    case 0:  *(u8*)p = x;
++      break;
++
++    case 4:  ((u4*)p)[1] = x >> 32;
++      ((u4*)p)[0] = x;
++      break;
++
++    case 2:  ((u2*)p)[3] = x >> 48;
++      ((u2*)p)[2] = x >> 32;
++      ((u2*)p)[1] = x >> 16;
++      ((u2*)p)[0] = x;
++      break;
++
++    default: ((u1*)p)[7] = x >> 56;
++      ((u1*)p)[6] = x >> 48;
++      ((u1*)p)[5] = x >> 40;
++      ((u1*)p)[4] = x >> 32;
++      ((u1*)p)[3] = x >> 24;
++      ((u1*)p)[2] = x >> 16;
++      ((u1*)p)[1] = x >>  8;
++      ((u1*)p)[0] = x;
++    }
++  }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since MIPS64EL CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_MIPS_VM_BYTES_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/c2_globals_mips.hpp b/src/hotspot/cpu/mips/c2_globals_mips.hpp
+new file mode 100644
+index 0000000000..ef11827abf
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_globals_mips.hpp
+@@ -0,0 +1,95 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++#ifdef CC_INTERP
++define_pd_global(bool, ProfileInterpreter,           false);
++#else
++define_pd_global(bool, ProfileInterpreter,           true);
++#endif // CC_INTERP
++// Disable C1 in server JIT
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             10000);
++define_pd_global(intx, BackEdgeThreshold,            100000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                6);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  13);
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        120*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      57*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         58*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     4);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++#endif // CPU_MIPS_VM_C2_GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/c2_init_mips.cpp b/src/hotspot/cpu/mips/c2_init_mips.cpp
+new file mode 100644
+index 0000000000..e6d5815f42
+--- /dev/null
++++ b/src/hotspot/cpu/mips/c2_init_mips.cpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for mips
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++}
+diff --git a/src/hotspot/cpu/mips/codeBuffer_mips.hpp b/src/hotspot/cpu/mips/codeBuffer_mips.hpp
+new file mode 100644
+index 0000000000..3cc191006d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/codeBuffer_mips.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++#define CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_MIPS_VM_CODEBUFFER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/compiledIC_mips.cpp b/src/hotspot/cpu/mips/compiledIC_mips.cpp
+new file mode 100644
+index 0000000000..068ca4799d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/compiledIC_mips.cpp
+@@ -0,0 +1,151 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // get mark within main instrs section
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base = __ start_a_stub(CompiledStaticCall::to_interp_stub_size());
++  if (base == NULL)  return NULL;  // CodeBuffer::expand failed
++  // static stub relocation stores the instruction address of the call
++
++  __ relocate(static_stub_Relocation::spec(mark), 0);
++
++  // Code stream for loading method may be changed.
++  __ synci(R0, 0);
++
++  // Rmethod contains methodOop, it should be relocated for GC
++  // static stub relocation also tags the methodOop in the code-stream.
++  __ mov_metadata(Rmethod, NULL);
++  // This is recognized as unresolved by relocs/nativeInst/ic code
++
++  __ relocate(relocInfo::runtime_call_type);
++
++  cbuf.set_insts_mark();
++  address call_pc = (address)-1;
++  __ patchable_jump(call_pc);
++  __ align(16);
++  // Update current stubs pointer and restore code_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {
++  int size = NativeInstruction::nop_instruction_size + NativeMovConstReg::instruction_size + NativeCall::instruction_size;
++  return round_to(size, 16);
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {
++  return  NativeInstruction::nop_instruction_size + NativeCallTrampolineStub::instruction_size;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {
++  return 16;
++}
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
++
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++  assert(method_holder->data() == 0 || method_holder->data() == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(jump->jump_destination() == (address)-1 || jump->jump_destination() == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump*        jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++  method_holder->set_data(0);
++  jump->set_jump_destination((address)-1);
++}
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub + NativeInstruction::nop_instruction_size);
++  NativeGeneralJump* jump          = nativeGeneralJump_at(method_holder->next_instruction_address());
++
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/mips/copy_mips.hpp b/src/hotspot/cpu/mips/copy_mips.hpp
+new file mode 100644
+index 0000000000..dcc77adfec
+--- /dev/null
++++ b/src/hotspot/cpu/mips/copy_mips.hpp
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_COPY_MIPS_HPP
++#define CPU_MIPS_VM_COPY_MIPS_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++// Template for atomic, element-wise copy.
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif //CPU_MIPS_VM_COPY_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/depChecker_mips.cpp b/src/hotspot/cpu/mips/depChecker_mips.cpp
+new file mode 100644
+index 0000000000..756ccb68f9
+--- /dev/null
++++ b/src/hotspot/cpu/mips/depChecker_mips.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_mips.hpp"
++
++// Nothing to do on mips
+diff --git a/src/hotspot/cpu/mips/depChecker_mips.hpp b/src/hotspot/cpu/mips/depChecker_mips.hpp
+new file mode 100644
+index 0000000000..11e52b4e8f
+--- /dev/null
++++ b/src/hotspot/cpu/mips/depChecker_mips.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++#define CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
++
++// Nothing to do on MIPS
++
++#endif // CPU_MIPS_VM_DEPCHECKER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/disassembler_mips.hpp b/src/hotspot/cpu/mips/disassembler_mips.hpp
+new file mode 100644
+index 0000000000..c5f3a8888d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/disassembler_mips.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "gpr-names=64";
++  }
++
++#endif // CPU_MIPS_VM_DISASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/frame_mips.cpp b/src/hotspot/cpu/mips/frame_mips.cpp
+new file mode 100644
+index 0000000000..d49bd6290d
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.cpp
+@@ -0,0 +1,690 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_mips.inline.hpp"
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++
++// Profiling/safepoint support
++// for Profiling - acting on another frame. walks sender frames
++// if valid.
++// frame profile_find_Java_sender_frame(JavaThread *thread);
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++    JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = (fp < thread->stack_base() && (fp > sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++    }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++      }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++        return false;
++      }
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On MIPS the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // FP is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved FP
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++    }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && ( jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++      if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++        nm->method()->is_method_handle_intrinsic()) {
++        return false;
++      }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++#ifdef CC_INTERP
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  // QQQ why does this specialize method exist if frame::sender_sp() does same thing?
++  // seems odd and if we always know interpreted vs. non then sender_sp() is really
++  // doing too much work.
++  return get_interpreterState()->sender_sp();
++}
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return get_interpreterState()->monitor_base();
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  return (BasicObjectLock*) get_interpreterState()->stack_base();
++}
++
++#else // CC_INTERP
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  int_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert((intptr_t) fp() >  (intptr_t) result, "result must <  than frame pointer");
++  assert((intptr_t) sp() <= (intptr_t) result, "result must >= than stack pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++#endif // CC_INTERP
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  if (jfa->last_Java_pc() != NULL ) {
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++  frame fr(jfa->last_Java_sp(), jfa->last_Java_fp());
++  return fr;
++}
++
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // sp is the raw sp from the sender after adapter or interpreter extension
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++  // The interpreter and compiler(s) always save FP in a known
++  // location on entry. We must record where that location is
++  // so this if FP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves FP if we record where it is then
++  // we don't have to always save FP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++#ifdef COMPILER2
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif /* COMPILER2 */
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.  The unextended SP might also be the saved SP
++// for MethodHandle call sites.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++void frame::adjust_unextended_sp() {
++  // On MIPS, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        DEBUG_ONLY(verify_deopt_original_pc(sender_cm, _unextended_sp));
++      }
++    }
++  }
++}
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save fp in a known
++  // location on entry. We must record where that location is
++  // so that if fp was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves fp if we record where it is then
++  // we don't have to always save fp on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(FP->as_VMReg(), (address) link_addr);
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(FP->as_VMReg()->next(), (address) link_addr);
++  }
++}
++
++//------------------------------sender_for_compiled_frame-----------------------
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Loongson the return_address is always the word on the stack
++  // the fp in compiler points to sender fp, but in interpreter, fp points to return address,
++  // so getting sender for compiled frame is not same as interpreter frame.
++  // we hard code here temporarily
++  // spark
++  address sender_pc = (address) *(sender_sp-1);
++
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of epb there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++// QQQ
++#ifdef CC_INTERP
++#else
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++
++  // first the method
++
++  Method* m = *interpreter_frame_method_addr();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++
++  //if (fp() - sp() > 1024 + m->max_stack()*Interpreter::stackElementSize()) {
++  if (fp() - sp() > 4096) {  // stack frames shouldn't be large.
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp    = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++
++#endif // CC_INTERP
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++#ifdef CC_INTERP
++  // Needed for JVMTI. The result should always be in the interpreterState object
++  assert(false, "NYI");
++  interpreterState istate = get_interpreterState();
++#endif // CC_INTERP
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++      tos_addr += 2;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++#ifdef CC_INTERP
++        obj = istate->_oop_temp;
++#else
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++#endif // CC_INTERP
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  }
++}
++#endif
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
+diff --git a/src/hotspot/cpu/mips/frame_mips.hpp b/src/hotspot/cpu/mips/frame_mips.hpp
+new file mode 100644
+index 0000000000..bdbfa8aaa2
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.hpp
+@@ -0,0 +1,215 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code index/pointr]                   = bcx()                bcx_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [methodOop             ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++
++// ------------------------------ C++ interpreter ----------------------------------------
++//
++// Layout of C++ interpreter frame: (While executing in BytecodeInterpreter::run)
++//
++//                             <- SP (current sp)
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    ...                        BytecodeInterpreter::run local variables
++//    [local variables         ] BytecodeInterpreter::run local variables
++//    [old frame pointer       ]   fp [ BytecodeInterpreter::run's fp ]
++//    [return pc               ]  (return to frame manager)
++//    [interpreter_state*      ]  (arg to BytecodeInterpreter::run)   --------------
++//    [expression stack        ] <- last_Java_sp                           |
++//    [...                     ] * <- interpreter_state.stack              |
++//    [expression stack        ] * <- interpreter_state.stack_base         |
++//    [monitors                ]   \                                       |
++//     ...                          | monitor block size                   |
++//    [monitors                ]   / <- interpreter_state.monitor_base     |
++//    [struct interpretState   ] <-----------------------------------------|
++//    [return pc               ] (return to callee of frame manager [1]
++//    [locals and parameters   ]
++//                               <- sender sp
++
++// [1] When the c++ interpreter calls a new method it returns to the frame
++//     manager which allocates a new frame on the stack. In that case there
++//     is no real callee of this newly allocated frame. The frame manager is
++//     aware of the  additional frame(s) and will pop them as nested calls
++//     complete. Howevers tTo make it look good in the debugger the frame
++//     manager actually installs a dummy pc pointing to RecursiveInterpreterActivation
++//     with a fake interpreter_state* parameter to make it easy to debug
++//     nested calls.
++
++// Note that contrary to the layout for the assembly interpreter the
++// expression stack allocated for the C++ interpreter is full sized.
++// However this is not as bad as it seems as the interpreter frame_manager
++// will truncate the unused space on succesive method calls.
++//
++// ------------------------------ C++ interpreter ----------------------------------------
++
++// Layout of interpreter frame:
++//
++//    [ monitor entry            ] <--- sp
++//      ...
++//    [ monitor entry            ]
++// -9 [ monitor block top        ] ( the top monitor entry )
++// -8 [ byte code pointer        ] (if native, bcp = 0)
++// -7 [ constant pool cache      ]
++// -6 [ methodData               ] mdx_offset(not core only)
++// -5 [ mirror                   ]
++// -4 [ methodOop                ]
++// -3 [ locals offset            ]
++// -2 [ last_sp                  ]
++// -1 [ sender's sp              ]
++//  0 [ sender's fp              ] <--- fp
++//  1 [ return address           ]
++//  2 [ oop temp offset          ] (only for native calls)
++//  3 [ result handler offset    ] (only for native calls)
++//  4 [ result type info         ] (only for native calls)
++//    [ local var m-1            ]
++//      ...
++//    [ local var 0              ]
++//    [ argumnet word n-1        ] <--- ( sender's sp )
++//        ...
++//    [ argument word 0          ] <--- S7
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      =  0,
++    return_addr_offset                               =  1,
++    // non-interpreter frames
++    sender_sp_offset                                 =  2,
++
++    // Interpreter frames
++    interpreter_frame_return_addr_offset             =  1,
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_fp_offset               =  0,
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_locals_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_cache_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    entry_frame_call_wrapper_offset                  = -9,
++
++    // Native frames
++
++    native_frame_initial_param_offset                =  2
++
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp();
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved FP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/frame_mips.inline.hpp b/src/hotspot/cpu/mips/frame_mips.inline.hpp
+new file mode 100644
+index 0000000000..c408f01d69
+--- /dev/null
++++ b/src/hotspot/cpu/mips/frame_mips.inline.hpp
+@@ -0,0 +1,238 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Loongson frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++inline intptr_t* frame::link() const {
++  return (intptr_t*) *(intptr_t **)addr_at(link_offset);
++}
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
++inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
++
++inline intptr_t*    frame::sender_sp()        const { return            addr_at(   sender_sp_offset); }
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL ) {
++    return sp();
++  } else {
++    // sp() may have been extended by an adapter
++    assert(last_sp <= (intptr_t*)interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const       {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_MIPS_VM_FRAME_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..179f7703c8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.cpp
+@@ -0,0 +1,364 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "utilities/macros.hpp"
++
++#define __ masm->
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count) {
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++
++  if (!dest_uninitialized) {
++#ifndef OPT_THREAD
++    Register thread = T9;
++    __ get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ lw(AT, in_progress);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ lb(AT, in_progress);
++    }
++
++    __ beq(AT, R0, filtered);
++    __ delayed()->nop();
++
++    __ pushad();                      // push registers
++    if (count == A0) {
++      if (addr == A1) {
++        __ move(AT, A0);
++        __ move(A0, A1);
++        __ move(A1, AT);
++      } else {
++        __ move(A1, count);
++        __ move(A0, addr);
++      }
++    } else {
++      __ move(A0, addr);
++      __ move(A1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++    __ popad();
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp) {
++  __ pushad();             // push registers (overkill)
++  if (count == A0) {
++    assert_different_registers(A1, addr);
++    __ move(A1, count);
++    __ move(A0, addr);
++  } else {
++    assert_different_registers(A0, count);
++    __ move(A0, addr);
++    __ move(A1, count);
++  }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++  __ popad();
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == TREG, "must be");
++
++  Label done;
++  Label runtime;
++
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != V0, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ lw(AT, in_progress);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ lb(AT, in_progress);
++  }
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0));
++  }
++
++  // Is the previous value null?
++  __ beq(pre_val, R0, done);
++  __ delayed()->nop();
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ld(tmp, index);
++  __ beq(tmp, R0, runtime);
++  __ delayed()->nop();
++
++  __ daddiu(tmp, tmp, -1 * wordSize);
++  __ sd(tmp, index);
++  __ ld(AT, buffer);
++  __ daddu(tmp, tmp, AT);
++
++  // Record the previous value
++  __ sd(pre_val, tmp, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  if (tosca_live) __ push(V0);
++
++  if (obj != noreg && obj != V0) __ push(obj);
++
++  if (pre_val != V0) __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++  if (expand_call) {
++    assert(pre_val != A1, "smashed arg");
++    if (thread != A1) __ move(A1, thread);
++    if (pre_val != A0) __ move(A0, pre_val);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != V0)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != V0)
++    __ pop(obj);
++
++  if (tosca_live) __ pop(V0);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++  assert_different_registers(tmp, tmp2, AT);
++  assert(thread == TREG, "must be");
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct = barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  __ xorr(AT, store_addr, new_val);
++  __ dsrl(AT, AT, HeapRegion::LogOfHRGrainBytes);
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // crosses regions, storing NULL?
++  __ beq(new_val, R0, done);
++  __ delayed()->nop();
++
++  // storing region crossing non-NULL, is card already dirty?
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ move(card_addr, store_addr);
++  __ dsrl(card_addr, card_addr, CardTable::card_shift);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ set64(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ daddu(card_addr, card_addr, cardtable);
++
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::g1_young_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  __ sync();
++  __ lb(AT, card_addr, 0);
++  __ daddiu(AT, AT, -1 * (int)G1CardTable::dirty_card_val());
++  __ beq(AT, R0, done);
++  __ delayed()->nop();
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ move(AT, (int)G1CardTable::dirty_card_val());
++  __ sb(AT, card_addr, 0);
++
++  __ lw(AT, queue_index);
++  __ beq(AT, R0, runtime);
++  __ delayed()->nop();
++  __ daddiu(AT, AT, -1 * wordSize);
++  __ sw(AT, queue_index);
++  __ ld(tmp2, buffer);
++  __ ld(AT, queue_index);
++  __ daddu(tmp2, tmp2, AT);
++  __ sd(card_addr, tmp2, 0);
++  __ beq(R0, R0, done);
++  __ delayed()->nop();
++
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, TREG);
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = RT3;
++  Register rthread = TREG;
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp3) {
++      __ move(tmp3, dst.base());
++    }
++  } else {
++    __ lea(tmp3, dst);
++  }
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp3 /* obj */,
++                         tmp2 /* pre_val */,
++                         rthread /* thread */,
++                         tmp1  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ move(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp3, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp3 /* store_adr */,
++                            new_val /* new_val */,
++                            rthread /* thread */,
++                            tmp1 /* tmp */,
++                            tmp2 /* tmp2 */);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..ec5c243c3f
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/g1/g1BarrierSetAssembler_mips.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++ protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count);
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++ public:
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_MIPS_GC_G1_G1BARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..071debdc3a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.cpp
+@@ -0,0 +1,194 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ lwu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst);
++        }
++      } else
++      {
++        __ ld_ptr(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ld_ptr(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ lbu   (dst, src);    break;
++  case T_BYTE:    __ lb    (dst, src);    break;
++  case T_CHAR:    __ lhu   (dst, src);    break;
++  case T_SHORT:   __ lh    (dst, src);    break;
++  case T_INT:     __ lw    (dst, src);    break;
++  case T_LONG:    __ ld    (dst, src);    break;
++  case T_ADDRESS: __ ld_ptr(dst, src);    break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ lwc1(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ ldc1(FSF, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++        if (UseCompressedOops) {
++          __ sw(R0, dst);
++        } else {
++          __ sd(R0, dst);
++        }
++      } else {
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++          __ sw(val, dst);
++        } else
++        {
++          __ st_ptr(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ st_ptr(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andi(val, val, 0x1);  // boolean is true if LSB is 1
++    __ sb(val, dst);
++    break;
++  case T_BYTE:
++    __ sb(val, dst);
++    break;
++  case T_SHORT:
++    __ sh(val, dst);
++    break;
++  case T_CHAR:
++    __ sh(val, dst);
++    break;
++  case T_INT:
++    __ sw(val, dst);
++    break;
++  case T_LONG:
++    __ sd(val, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ swc1(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ sdc1(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ st_ptr(val, dst);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  __ clear_jweak_tag(obj);
++  __ ld_ptr(obj, Address(obj, 0));
++}
++
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  Unimplemented();
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                                               Register var_size_in_bytes,
++                                               int con_size_in_bytes,
++                                               Register t1) {
++  Unimplemented();
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..b97ecbcca5
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/barrierSetAssembler_mips.hpp
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class InterpreterMacroAssembler;
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                            Register var_size_in_bytes,
++                            int con_size_in_bytes,
++                            Register t1);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG) {}
++
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2);
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Address obj2);
++
++  virtual void resolve(MacroAssembler* masm, DecoratorSet decorators, Register obj) {
++    // Default implementation does not need to do anything.
++  }
++
++  // Support for jniFastGetField to try resolving a jobject/jweak in native
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1, Register t2,
++                             Label& slow_case);
++  virtual void eden_allocate(MacroAssembler* masm,
++                             Register thread, Register obj,
++                             Register var_size_in_bytes,
++                             int con_size_in_bytes,
++                             Register t1,
++                             Label& slow_case);
++
++  virtual void barrier_stubs_init() {}
++};
++
++#endif // CPU_MIPS_GC_SHARED_BARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..cb1d53db0a
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.cpp
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp) {
++  BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ beq(count, R0, L_done); // zero count - nothing to do
++  __ delayed()->nop();
++
++  if (UseConcMarkSweepGC) __ sync();
++
++  __ set64(tmp, disp);
++
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ daddiu(end, end, -BytesPerHeapOop); // end - 1 to make inclusive
++  __ shr(addr, CardTable::card_shift);
++  __ shr(end, CardTable::card_shift);
++  __ dsubu(end, end, addr); // end --> cards count
++
++  __ daddu(addr, addr, tmp);
++
++  __ BIND(L_loop);
++  if (UseLEXT1) {
++    __ gssbx(R0, addr, count, 0);
++  } else {
++    __ daddu(AT, addr, count);
++    __ sb(R0, AT, 0);
++  }
++  __ daddiu(count, count, -1);
++  __ bgez(count, L_loop);
++  __ delayed()->nop();
++
++  __ BIND(L_done);
++}
++
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  __ shr(obj, CardTable::card_shift);
++
++  Address card_addr;
++
++  intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
++  Register tmp = T9;
++  assert_different_registers(tmp, obj);
++  __ li(tmp, byte_map_base);
++  __ addu(tmp, tmp, obj);
++
++  assert(CardTable::dirty_card_val() == 0, "must be");
++
++  jbyte dirty = CardTable::dirty_card_val();
++  if (UseCondCardMark) {
++    Untested("Untested");
++    __ warn("store_check Untested");
++    Label L_already_dirty;
++    __ membar(Assembler::StoreLoad);
++    __ lb(AT, tmp, 0);
++    __ addiu(AT, AT, -1 * dirty);
++    __ beq(AT, R0, L_already_dirty);
++    __ delayed()->nop();
++    __ sb(R0, tmp, 0);
++    __ bind(L_already_dirty);
++  } else {
++    if (ct->scanned_concurrently()) {
++      __ membar(Assembler::StoreLoad);
++    }
++    __ sb(R0, tmp, 0);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), dst);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, dst);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..49c2a0ea80
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/cardTableBarrierSetAssembler_mips.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Address dst);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators, Register addr, Register count, Register tmp);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp
+new file mode 100644
+index 0000000000..765259e626
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                                   Register dst, Register count, Register scratch) {
++  if (is_oop) {
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, scratch);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff --git a/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp
+new file mode 100644
+index 0000000000..5320a4c0ad
+--- /dev/null
++++ b/src/hotspot/cpu/mips/gc/shared/modRefBarrierSetAssembler_mips.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++#define CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register addr, Register count, Register tmp) {}
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, bool is_oop,
++                                  Register dst, Register count, Register scratch = NOREG);
++
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_MIPS_GC_SHARED_MODREFBARRIERSETASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/globalDefinitions_mips.hpp b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp
+new file mode 100644
+index 0000000000..abf8141e8b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/globalDefinitions_mips.hpp
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
++// Size of MIPS Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are properly extended to 64 bits.
++// If set, SharedRuntime::c_calling_convention() must adapt
++// signatures accordingly.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++#define SUPPORTS_NATIVE_CX8
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#define THREAD_LOCAL_POLL
++
++#endif // CPU_MIPS_VM_GLOBALDEFINITIONS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/globals_mips.hpp b/src/hotspot/cpu/mips/globals_mips.hpp
+new file mode 100644
+index 0000000000..3bcad005d1
+--- /dev/null
++++ b/src/hotspot/cpu/mips/globals_mips.hpp
+@@ -0,0 +1,137 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_GLOBALS_MIPS_HPP
++#define CPU_MIPS_VM_GLOBALS_MIPS_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,      false); // Not needed on x86.
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs passed to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64);
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++// MIPS generates 3x instructions than X86
++define_pd_global(intx, InlineSmallCode,          4000);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+4))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++define_pd_global(uintx, TLABSize,                 0);
++define_pd_global(uintx, NewSize,                  1024 * K);
++define_pd_global(intx,  PreInflateSpin,      10);
++
++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
++define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
++define_pd_global(intx, PrefetchFieldsAhead,         -1);
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(intx, StackYellowPages, 2);
++define_pd_global(intx, StackRedPages, 1);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++// GC Ergo Flags
++define_pd_global(intx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, CompactStrings, true);
++
++define_pd_global(bool, PreserveFramePointer, false);
++
++define_pd_global(intx, InitArrayShortSize, 8*BytesPerLong);
++
++define_pd_global(bool, ThreadLocalHandshakes, true);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++
++#define ARCH_FLAGS(develop, \
++                   product, \
++                   diagnostic, \
++                   experimental, \
++                   notproduct, \
++                   range, \
++                   constraint, \
++                   writeable) \
++                                                                            \
++  product(bool, UseLEXT1, false,                                            \
++                "Use LoongISA general EXTensions 1")                        \
++                                                                            \
++  product(bool, UseLEXT2, false,                                            \
++                "Use LoongISA general EXTensions 2")                        \
++                                                                            \
++  product(bool, UseLEXT3, false,                                            \
++                "Use LoongISA general EXTensions 3")                        \
++                                                                            \
++  product(bool, UseCodeCacheAllocOpt, true,                                 \
++                "Allocate code cache within 32-bit memory address space")   \
++                                                                            \
++  product(intx, UseSyncLevel, 10000,                                        \
++                "The sync level on Loongson CPUs"                           \
++                "UseSyncLevel == 10000, 111, for all Loongson CPUs, "       \
++                "UseSyncLevel == 4000, 101, maybe for GS464V"               \
++                "UseSyncLevel == 3000, 001, maybe for GS464V"               \
++                "UseSyncLevel == 2000, 011, maybe for GS464E/GS264"         \
++                "UseSyncLevel == 1000, 110, maybe for GS464")               \
++                                                                            \
++  develop(bool, UseBoundCheckInstruction, false,                            \
++                "Use bound check instruction")                              \
++                                                                            \
++  product(intx, SetFSFOFN, 999,                                             \
++          "Set the FS/FO/FN bits in FCSR"                                   \
++          "999 means FS/FO/FN will not be changed"                          \
++          "=XYZ, with X:FS, Y:FO, Z:FN, X, Y and Z in 0=off, 1=on")         \
++                                                                            \
++  /* assembler */                                                           \
++  product(bool, UseCountLeadingZerosInstructionMIPS64, true,                \
++          "Use count leading zeros instruction")                            \
++                                                                            \
++  product(bool, UseCountTrailingZerosInstructionMIPS64, false,              \
++          "Use count trailing zeros instruction")                           \
++                                                                            \
++  product(bool, UseActiveCoresMP, false,                                    \
++                "Eliminate barriers for single active cpu")
++
++#endif // CPU_MIPS_VM_GLOBALS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/icBuffer_mips.cpp b/src/hotspot/cpu/mips/icBuffer_mips.cpp
+new file mode 100644
+index 0000000000..6586c63965
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icBuffer_mips.cpp
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size +
++         NativeGeneralJump::instruction_size +
++         1;
++  // so that code_end can be set in CodeBuffer
++  // 64bit 15 = 6 + 8 bytes + 1 byte
++  // 32bit 7 = 2 + 4 bytes + 1 byte
++}
++
++
++// we use T1 as cached oop(klass) now. this is the target of virtual call,
++// when reach here, the receiver in T0
++// refer to shareRuntime_mips.cpp,gen_i2c2i_adapters
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // note: even though the code contains an embedded oop, we do not need reloc info
++  // because
++  // (1) the oop is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++//  assert(cached_oop == NULL || cached_oop->is_perm(), "must be perm oop");
++#define __ masm->
++  __ patchable_set48(T1, (long)cached_value);
++
++  __ patchable_jump(entry_point);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  return jump->jump_destination();
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg*        move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  NativeGeneralJump*        jump = nativeGeneralJump_at(move->next_instruction_address());
++  void* o= (void*)move->data();
++  return o;
++}
+diff --git a/src/hotspot/cpu/mips/icache_mips.cpp b/src/hotspot/cpu/mips/icache_mips.cpp
+new file mode 100644
+index 0000000000..e84e37358b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icache_mips.cpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub)
++{
++#define __ _masm->
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  address start = __ pc();
++
++  __ jr_hb(RA);
++  __ delayed()->ori(V0, A2, 0);
++
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++#undef __
++}
+diff --git a/src/hotspot/cpu/mips/icache_mips.hpp b/src/hotspot/cpu/mips/icache_mips.hpp
+new file mode 100644
+index 0000000000..f90dee6eef
+--- /dev/null
++++ b/src/hotspot/cpu/mips/icache_mips.hpp
+@@ -0,0 +1,41 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_ICACHE_MIPS_HPP
++#define CPU_MIPS_VM_ICACHE_MIPS_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM modifies
++// code, part of the processor instruction cache potentially has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 2 * BytesPerInstWord,  // Size of the icache flush stub in bytes
++    line_size      = 32,  // flush instruction affects a dword
++    log2_line_size = 5    // log2(line_size)
++  };
++};
++
++#endif // CPU_MIPS_VM_ICACHE_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/interp_masm_mips.hpp b/src/hotspot/cpu/mips/interp_masm_mips.hpp
+new file mode 100644
+index 0000000000..e526e39d53
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interp_masm_mips.hpp
+@@ -0,0 +1,276 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2020, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++#define CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++
++class InterpreterMacroAssembler: public MacroAssembler {
++#ifndef CC_INTERP
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ protected:
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++#endif // CC_INTERP
++
++ public:
++  void jump_to_entry(address entry);
++  // narrow int return value
++  void narrow(Register result);
++
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), _locals_register(LVP), _bcp_register(BCP) {}
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++#ifdef CC_INTERP
++  void save_bcp()                                          { /*  not needed in c++ interpreter and harmless */ }
++  void restore_bcp()                                       { /*  not needed in c++ interpreter and harmless */ }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg);
++
++#else
++
++  // Interpreter-specific registers
++  void save_bcp() {
++    sd(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_bcp() {
++    ld(BCP, FP, frame::interpreter_frame_bcp_offset * wordSize);
++  }
++
++  void restore_locals() {
++    ld(LVP, FP, frame::interpreter_frame_locals_offset * wordSize);
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ld(reg, FP, frame::interpreter_frame_method_offset * wordSize);
++  }
++
++  void get_const(Register reg){
++    get_method(reg);
++    ld(reg, reg, in_bytes(Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ld(reg, reg, in_bytes(ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ld(reg, reg, ConstantPool::cache_offset_in_bytes());
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ld(tags, cpool, ConstantPool::tags_offset_in_bytes());
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache, Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache, Register index, Register bytecode, int byte_no, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache, Register tmp, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index, int bcp_offset, size_t index_size = sizeof(u2));
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void pop_ptr(   Register r = FSR);
++  void pop_i(     Register r = FSR);
++  void pop_l(     Register r = FSR);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++
++  void push_ptr(   Register r = FSR);
++  void push_i(     Register r = FSR);
++  void push_l(     Register r = FSR);
++  void push_f(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop(Register r ) { ((MacroAssembler*)this)->pop(r); }
++
++  void push(Register r ) { ((MacroAssembler*)this)->push(r); }
++
++  void pop(TosState state); // transition vtos -> state
++  void push(TosState state); // transition state -> vtos
++
++  void empty_expression_stack() {
++    ld(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    // NULL last_sp until next java call
++    sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  }
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  //void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++  void gen_subtype_check( Register Rsup_klass, Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  void dispatch_only(TosState state, bool generate_poll = false);
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++#endif // CC_INTERP
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++#ifndef CC_INTERP
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, int mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++#endif // !CC_INTERP
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++};
++
++#endif // CPU_MIPS_VM_INTERP_MASM_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/interp_masm_mips_64.cpp b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp
+new file mode 100644
+index 0000000000..eb35bb0633
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interp_masm_mips_64.cpp
+@@ -0,0 +1,2126 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interp_masm_mips.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of InterpreterMacroAssembler
++
++#ifdef CC_INTERP
++void InterpreterMacroAssembler::get_method(Register reg) {
++}
++#endif // CC_INTERP
++
++void InterpreterMacroAssembler::get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  // The runtime address of BCP may be unaligned.
++  // Refer to the SPARC implementation.
++  lbu(reg, BCP, offset+1);
++  lbu(tmp, BCP, offset);
++  dsll(reg, reg, 8);
++  daddu(reg, tmp, reg);
++}
++
++void InterpreterMacroAssembler::get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset) {
++  assert(reg != tmp, "need separate temp register");
++  if (offset & 3) { // Offset unaligned?
++    lbu(reg, BCP, offset+3);
++    lbu(tmp, BCP, offset+2);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset+1);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++    lbu(tmp, BCP, offset);
++    dsll(reg, reg, 8);
++    daddu(reg, tmp, reg);
++  } else {
++    lwu(reg, BCP, offset);
++  }
++}
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jmp(entry);
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  save_bcp();
++  {
++    Label L;
++    ld(AT,FP,frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT,R0,L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_leaf_base: last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  assert(java_thread == noreg , "not expecting a precomputed java thread");
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    ld(AT, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++    beq(AT, R0, L);
++    delayed()->nop();
++    stop("InterpreterMacroAssembler::call_VM_base: last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, java_thread, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    // Not clear if any other register is available, so load AT twice
++    assert(AT != java_thread, "check");
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_pending_bit);
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    lw(AT, java_thread, in_bytes(JavaThread::popframe_condition_offset()));
++    andi(AT, AT, JavaThread::popframe_processing_bit);
++    bne(AT, R0, L);
++    delayed()->nop();
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  ld_ptr(T8, thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr (T8, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++  const Address oop_addr (T8, in_bytes(JvmtiThreadState::earlyret_oop_offset()));
++  const Address val_addr (T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++  //V0, oop_addr,V1,val_addr
++  switch (state) {
++    case atos:
++      ld_ptr(V0, oop_addr);
++      st_ptr(R0, oop_addr);
++      verify_oop(V0, state);
++      break;
++    case ltos:
++      ld_ptr(V0, val_addr);               // fall through
++      break;
++    case btos:                                     // fall through
++    case ztos:                                     // fall through
++    case ctos:                                     // fall through
++    case stos:                                     // fall through
++    case itos:
++      lw(V0, val_addr);
++      break;
++    case ftos:
++      lwc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case dtos:
++      ldc1(F0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++      break;
++    case vtos: /* nothing to do */                    break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  move(AT, (int)ilgl);
++  sw(AT, tos_addr);
++  sw(R0, T8, in_bytes(JvmtiThreadState::earlyret_value_offset()));
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = T9;
++
++    assert(java_thread != AT, "check");
++    assert(java_thread != tmp, "check");
++    ld_ptr(AT, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    lw(AT, AT, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++    move(tmp, JvmtiThreadState::earlyret_pending);
++    bne(tmp, AT, L);
++    delayed()->nop();
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ld_ptr(tmp, java_thread, in_bytes(JavaThread::jvmti_thread_state_offset()));
++    lw(AT, tmp, in_bytes(JvmtiThreadState::earlyret_tos_offset()));
++    move(A0, AT);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), A0);
++    jr(V0);
++    delayed()->nop();
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  lbu(AT, BCP, bcp_offset);
++  lbu(reg, BCP, bcp_offset + 1);
++  ins(reg, AT, 8, 8);
++}
++
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    get_2_byte_integer_at_bcp(index, AT, bcp_offset);
++  } else if (index_size == sizeof(u4)) {
++    get_4_byte_integer_at_bcp(index, AT, bcp_offset);
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    nor(index, index, R0);
++    sll(index, index, 0);
++  } else if (index_size == sizeof(u1)) {
++    lbu(index, BCP, bcp_offset);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  shl(index, 2);
++}
++
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, cache, AT);
++  lw(bytecode, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  if(os::is_MP()) {
++    sync(); // load acquire
++  }
++
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  dsrl(bytecode, bytecode, shift_count);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  move(AT, ConstantPoolCacheEntry::bytecode_1_mask);
++  andr(bytecode, bytecode, AT);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  shl(tmp, 2 + LogBytesPerWord);
++  ld(cache, FP, frame::interpreter_frame_cache_offset * wordSize);
++  // skip past the header
++  daddiu(cache, cache, in_bytes(ConstantPoolCache::base_offset()));
++  daddu(cache, cache, tmp);
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  bne(mcs, R0, has_counters);
++  delayed()->nop();
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ld(mcs, method, in_bytes(Method::method_counters_offset()));
++  beq(mcs, R0, skip);   // No MethodCounters allocated, OutOfMemory
++  delayed()->nop();
++  bind(has_counters);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++  // convert from field index to resolved_references() index and from
++  // word index to byte offset. Since this is a java object, it can be compressed
++  shl(index, LogBytesPerHeapOop);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ld(result, result, ConstantPool::cache_offset_in_bytes());
++  ld(result, result, ConstantPoolCache::resolved_references_offset_in_bytes());
++  resolve_oop_handle(result, tmp);
++  // Add in the index
++  daddu(result, result, index);
++  load_heap_oop(result, Address(result, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                           Register index, Register klass) {
++  dsll(AT, index, Address::times_ptr);
++  if (UseLEXT1 && Assembler::is_simm(sizeof(ConstantPool), 8)) {
++    gslhx(index, cpool, AT, sizeof(ConstantPool));
++  } else {
++    daddu(AT, cpool, AT);
++    lh(index, AT, sizeof(ConstantPool));
++  }
++  Register resolved_klasses = cpool;
++  ld_ptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  dsll(AT, index, Address::times_ptr);
++  daddu(AT, resolved_klasses, AT);
++  ld(klass, AT, Array<Klass*>::base_offset_in_bytes());
++}
++
++// Resets LVP to locals.  Register sub_klass cannot be any of the above.
++void InterpreterMacroAssembler::gen_subtype_check( Register Rsup_klass, Register Rsub_klass, Label &ok_is_subtype ) {
++  assert( Rsub_klass != Rsup_klass, "Rsup_klass holds superklass" );
++  assert( Rsub_klass != T1, "T1 holds 2ndary super array length" );
++  assert( Rsub_klass != T0, "T0 holds 2ndary super array scan ptr" );
++  // Profile the not-null value's klass.
++  // Here T9 and T1 are used as temporary registers.
++  profile_typecheck(T9, Rsub_klass, T1); // blows T9, reloads T1
++
++  // Do the check.
++  check_klass_subtype(Rsub_klass, Rsup_klass, T1, ok_is_subtype); // blows T1
++
++  // Profile the failure of the check.
++  profile_typecheck_failed(T9); // blows T9
++}
++
++
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++  lw(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++  ld(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++  lwc1(r, SP, 0);
++  daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++  ldc1(r, SP, 0);
++  daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  // For compatibility reason, don't change to sw.
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  sd(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sd(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  daddiu(SP, SP, - Interpreter::stackElementSize);
++  swc1(r, SP, 0);
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  daddiu(SP, SP, -2 * Interpreter::stackElementSize);
++  sdc1(r, SP, 0);
++  sd(R0, SP, Interpreter::stackElementSize);
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();           break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();             break;
++    case ltos: pop_l();             break;
++    case ftos: pop_f();             break;
++    case dtos: pop_d();             break;
++    case vtos: /* nothing to do */  break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=V1
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();          break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();            break;
++    case ltos: push_l();            break;
++    case ftos: push_f();            break;
++    case dtos: push_d();            break;
++    case vtos: /* nothing to do */  break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++
++
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ld(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  sd(val, SP, Interpreter::expr_offset_in_bytes(n));
++}
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {
++  // record last_sp
++  move(Rsender, SP);
++  sd(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++#ifndef OPT_THREAD
++    Register thread = temp;
++    get_thread(thread);
++#else
++    Register thread = TREG;
++#endif
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    lw(AT, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(AT, R0, run_compiled_code);
++    delayed()->nop();
++    ld(AT, method, in_bytes(Method::interpreter_entry_offset()));
++    jr(AT);
++    delayed()->nop();
++    bind(run_compiled_code);
++  }
++
++  ld(AT, method, in_bytes(Method::from_interpreted_offset()));
++  jr(AT);
++  delayed()->nop();
++}
++
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  mips64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing mips64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++  dispatch_next(state, step);
++}
++
++// assume the next bytecode in T8.
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++
++  if (VerifyActivationFrameSize) {
++    Label L;
++
++    dsubu(T2, FP, SP);
++    int min_frame_size = (frame::link_offset -
++      frame::interpreter_frame_initial_sp_offset) * wordSize;
++    daddiu(T2, T2, -min_frame_size);
++    bgez(T2, L);
++    delayed()->nop();
++    stop("broken stack frame");
++    bind(L);
++  }
++  // FIXME: I do not know which register should pass to verify_oop
++  if (verifyoop) verify_oop(FSR, state);
++  dsll(T2, Rnext, LogBytesPerWord);
++
++  Label safepoint;
++  address* const safepoint_table = Interpreter::safept_table(state);
++  bool needs_thread_local_poll = generate_poll &&
++    SafepointMechanism::uses_thread_local_poll() && table != safepoint_table;
++
++  if (needs_thread_local_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    ld(T3, thread, in_bytes(Thread::polling_page_offset()));
++    andi(T3, T3, SafepointMechanism::poll_bit());
++    bne(T3, R0, safepoint);
++    delayed()->nop();
++  }
++
++  if((long)table >= (long)Interpreter::dispatch_table(btos) &&
++     (long)table <= (long)Interpreter::dispatch_table(vtos)
++    ) {
++     int table_size = (long)Interpreter::dispatch_table(itos) - (long)Interpreter::dispatch_table(stos);
++     int table_offset = ((int)state - (int)itos) * table_size;
++
++     // GP points to the starting address of Interpreter::dispatch_table(itos).
++     // See StubGenerator::generate_call_stub(address& return_address) for the initialization of GP.
++     if(table_offset != 0) {
++        daddiu(T3, GP, table_offset);
++        if (UseLEXT1) {
++          gsldx(T3, T2, T3, 0);
++        } else {
++          daddu(T3, T2, T3);
++          ld(T3, T3, 0);
++        }
++     } else {
++        if (UseLEXT1) {
++          gsldx(T3, T2, GP, 0);
++        } else {
++          daddu(T3, T2, GP);
++          ld(T3, T3, 0);
++        }
++     }
++  } else {
++     li(T3, (long)table);
++     if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++  }
++  jr(T3);
++  delayed()->nop();
++
++  if (needs_thread_local_poll) {
++    bind(safepoint);
++    li(T3, (long)safepoint_table);
++    if (UseLEXT1) {
++       gsldx(T3, T2, T3, 0);
++     } else {
++       daddu(T3, T2, T3);
++       ld(T3, T3, 0);
++     }
++    jr(T3);
++    delayed()->nop();
++  }
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {
++  // load next bytecode (load before advancing r13 to prevent AGI)
++  lbu(Rnext, BCP, step);
++  increment(BCP, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  lbu(Rnext, BCP, 0);
++  dispatch_base(state, table);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++// used registers : T1, T2, T3, T8
++// T1 : thread, method access flags
++// T2 : monitor entry pointer
++// T3 : method, monitor top
++// T8 : unlock flag
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {
++  // Note: Registers V0, V1 and F0, F1 may be in use for the result
++  // check if synchronized method
++  Label unlocked, unlock, no_unlock;
++
++  // get the value of _do_not_unlock_if_synchronized into T8
++#ifndef OPT_THREAD
++  Register thread = T1;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  lb(T8, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // reset the flag
++  sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  // get method access flags
++  ld(T3, FP, frame::interpreter_frame_method_offset * wordSize);
++  lw(T1, T3, in_bytes(Method::access_flags_offset()));
++  andi(T1, T1, JVM_ACC_SYNCHRONIZED);
++  beq(T1, R0, unlocked);
++  delayed()->nop();
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag is set.
++  bne(T8, R0, no_unlock);
++  delayed()->nop();
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize
++      - (int)sizeof(BasicObjectLock));
++  // address of first monitor
++  ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  bne(T1, R0, unlock);
++  delayed()->nop();
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    // I think mips do not need empty_FPU_stack
++    // remove possible return value from FPU-stack, otherwise stack could overflow
++    empty_FPU_stack();
++    call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here();
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::new_illegal_monitor_state_exception));
++
++    }
++
++    b(unlocked);
++    delayed()->nop();
++  }
++
++  bind(unlock);
++  unlock_object(c_rarg0);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // V0, V1: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(FP,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++
++    bind(restart);
++    // points to current entry, starting with top-most entry
++    ld(c_rarg0, monitor_block_top);
++    // points to word before bottom of monitor block
++    daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++    b(entry);
++    delayed()->nop();
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      // remove possible return value from FPU-stack,
++      // otherwise stack could overflow
++      empty_FPU_stack();
++      MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_illegal_monitor_state_exception));
++      should_not_reach_here();
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception
++      // Unlock does not block, so don't have to worry about the frame
++      // We don't have to preserve c_rarg0, since we are going to
++      // throw an exception
++
++      push(state);
++      unlock_object(c_rarg0);
++      pop(state);
++
++      if (install_monitor_exception) {
++        empty_FPU_stack();
++        call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::new_illegal_monitor_state_exception));
++      }
++
++      b(restart);
++      delayed()->nop();
++    }
++
++    bind(loop);
++    ld(T1, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    bne(T1, R0, exception);// check if current entry is used
++    delayed()->nop();
++
++    daddiu(c_rarg0, c_rarg0, entry_size);// otherwise advance to next entry
++    bind(entry);
++    bne(c_rarg0, T3, loop);  // check if bottom reached
++    delayed()->nop();  // if not at bottom then check this entry
++  }
++
++  bind(no_unlock);
++
++  // jvmpi support (jvmdi does not generate MethodExit on exception / popFrame)
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI); // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  ld(TSR, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++    dsubu(AT, TSR, AT);
++    blez(AT, no_reserved_zone_enabling);
++    delayed()->nop();
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here();
++
++    bind(no_reserved_zone_enabling);
++  }
++  ld(ret_addr, FP, frame::interpreter_frame_return_addr_offset * wordSize);
++  ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize);
++  move(SP, TSR); // set sp to sender sp
++}
++
++#endif // CC_INTERP
++
++// Lock object
++//
++// Args:
++//      c_rarg0: BasicObjectLock to be used for locking
++//
++// Kills:
++//      T1
++//      T2
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++  } else {
++    Label done, slow_case;
++    const Register tmp_reg = T2;
++    const Register scr_reg = T1;
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset + BasicLock::displaced_header_offset_in_bytes();
++
++    // Load object pointer into scr_reg
++    ld(scr_reg, lock_reg, obj_offset);
++
++    if (UseBiasedLocking) {
++      // Note: we use noreg for the temporary register since it's hard
++      // to come up with a free register on all incoming code paths
++      biased_locking_enter(lock_reg, scr_reg, tmp_reg, noreg, false, done, &slow_case);
++    }
++
++    // Load (object->mark() | 1) into tmp_reg
++    ld(AT, scr_reg, 0);
++    ori(tmp_reg, AT, 1);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    sd(tmp_reg, lock_reg, mark_offset);
++
++    assert(lock_offset == 0, "displached header must be first word in BasicObjectLock");
++
++    if (PrintBiasedLockingStatistics) {
++      Label succ, fail;
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, succ, &fail);
++      bind(succ);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++      b(done);
++      delayed()->nop();
++      bind(fail);
++    } else {
++      cmpxchg(Address(scr_reg, 0), tmp_reg, lock_reg, AT, true, false, done);
++    }
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) SP <= mark < SP + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in tmp_reg as the result of cmpxchg
++
++    dsubu(tmp_reg, tmp_reg, SP);
++    move(AT, 7 - os::vm_page_size());
++    andr(tmp_reg, tmp_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    sd(tmp_reg, lock_reg, mark_offset);
++    if (PrintBiasedLockingStatistics) {
++      bne(tmp_reg, R0, slow_case);
++      delayed()->nop();
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scr_reg);
++    }
++    beq(tmp_reg, R0, done);
++    delayed()->nop();
++
++    bind(slow_case);
++    // Call the runtime routine for slow case
++    call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter), lock_reg);
++
++    bind(done);
++  }
++}
++
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg0: BasicObjectLock for lock
++//
++// Kills:
++//      T1
++//      T2
++//      T3
++// Throw an IllegalMonitorException if object is not locked by current thread
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg0, "The argument is only for looks. It must be c_rarg0");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit), lock_reg);
++  } else {
++    Label done;
++
++    const Register tmp_reg = T1;
++    const Register scr_reg = T2;
++    const Register hdr_reg = T3;
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into %T2
++    daddiu(tmp_reg, lock_reg, BasicObjectLock::lock_offset_in_bytes());
++
++    // Load oop into scr_reg(%T1)
++    ld(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    // free entry
++    sd(R0, lock_reg, BasicObjectLock::obj_offset_in_bytes());
++    if (UseBiasedLocking) {
++      biased_locking_exit(scr_reg, hdr_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ld(hdr_reg, tmp_reg, BasicLock::displaced_header_offset_in_bytes());
++    // zero for recursive case
++    beq(hdr_reg, R0, done);
++    delayed()->nop();
++
++    // Atomic swap back the old header
++    cmpxchg(Address(scr_reg, 0), tmp_reg, hdr_reg, AT, false, false, done);
++
++    // Call the runtime routine for slow case.
++    sd(scr_reg, lock_reg, BasicObjectLock::obj_offset_in_bytes()); // restore obj
++    call_VM(NOREG,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++#ifndef CC_INTERP
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ld(mdp, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++  beq(mdp, R0, zero_continue);
++  delayed()->nop();
++}
++
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++
++  // V0 and T0 will be used as two temporary registers.
++  push2(V0, T0);
++
++  get_method(T0);
++  // Test MDO to avoid the call if it is NULL.
++  ld(V0, T0, in_bytes(Method::method_data_offset()));
++  beq(V0, R0, set_mdp);
++  delayed()->nop();
++
++  // method: T0
++  // bcp: BCP --> S0
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), T0, BCP);
++  // mdi: V0
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(T0);
++  ld(T0, T0, in_bytes(Method::method_data_offset()));
++  daddiu(T0, T0, in_bytes(MethodData::data_offset()));
++  daddu(V0, T0, V0);
++  bind(set_mdp);
++  sd(V0, FP, frame::interpreter_frame_mdp_offset * wordSize);
++  pop2(V0, T0);
++}
++
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  Register method = V0;
++  Register mdp = V1;
++  Register tmp = A0;
++  push(method);
++  push(mdp);
++  push(tmp);
++  test_method_data_pointer(mdp, verify_continue); // If mdp is zero, continue
++  get_method(method);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  lhu(tmp, mdp, in_bytes(DataLayout::bci_offset()));
++  ld(AT, method, in_bytes(Method::const_offset()));
++  daddu(tmp, tmp, AT);
++  daddiu(tmp, tmp, in_bytes(ConstMethod::codes_offset()));
++  beq(tmp, BCP, verify_continue);
++  delayed()->nop();
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp), method, BCP, mdp);
++  bind(verify_continue);
++  pop(tmp);
++  pop(mdp);
++  pop(method);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  sd(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    ld(AT, data);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    sd(AT, data);
++  } else {
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    ld(AT, data);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    sd(AT, data);
++  }
++  pop(tmp);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {
++  Register tmp = S0;
++  push(tmp);
++  if (decrement) {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Decrement the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    sltu(tmp, R0, AT);
++    dsubu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  } else {
++    assert(Assembler::is_simm16(constant), "constant is not a simm16 !");
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    // Increment the register.
++    daddu(tmp, mdp_in, reg);
++    ld(AT, tmp, constant);
++    daddiu(tmp, AT, DataLayout::counter_increment);
++    sltu(tmp, R0, tmp);
++    daddu(AT, AT, tmp);
++    daddu(tmp, mdp_in, reg);
++    sd(AT, tmp, constant);
++  }
++  pop(tmp);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::header_offset());
++  int header_bits = DataLayout::flag_mask_to_header_mask(flag_byte_constant);
++  // Set the flag
++  lw(AT, Address(mdp_in, header_offset));
++  if(Assembler::is_simm16(header_bits)) {
++    ori(AT, AT, header_bits);
++  } else {
++    push(T8);
++    // T8 is used as a temporary register.
++    move(T8, header_bits);
++    orr(AT, AT, T8);
++    pop(T8);
++  }
++  sw(AT, Address(mdp_in, header_offset));
++}
++
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    ld(AT, Address(mdp_in, offset));
++    bne(AT, value, not_equal_continue);
++    delayed()->nop();
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ld(test_value_out, Address(mdp_in, offset));
++    bne(value, test_value_out, not_equal_continue);
++    delayed()->nop();
++  }
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, mdp_in, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  daddu(AT, reg, mdp_in);
++  assert(Assembler::is_simm16(offset_of_disp), "offset is not an simm16");
++  ld(AT, AT, offset_of_disp);
++  daddu(mdp_in, mdp_in, AT);
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if(Assembler::is_simm16(constant)) {
++    daddiu(mdp_in, mdp_in, constant);
++  } else {
++    move(AT, constant);
++    daddu(mdp_in, mdp_in, AT);
++  }
++  sd(mdp_in, Address(FP, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    ld(bumped_count, mdp, in_bytes(JumpData::taken_offset()));
++    assert(DataLayout::counter_increment == 1, "flow-free idiom only works with 1");
++    daddiu(AT, bumped_count, DataLayout::counter_increment);
++    sltu(AT, R0, AT);
++    daddu(bumped_count, bumped_count, AT);
++    sd(bumped_count, mdp, in_bytes(JumpData::taken_offset())); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      bne(receiver, R0, not_null);
++      delayed()->nop();
++      // We are making a call.  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      beq(R0, R0, skip_receiver_profile);
++      delayed()->nop();
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++#if INCLUDE_JVMCI
++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
++  assert_different_registers(method, mdp, reg2);
++  if (ProfileInterpreter && MethodProfileWidth > 0) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label done;
++    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
++      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
++    bind(done);
++
++    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++#endif // INCLUDE_JVMCI
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++    return;
++  }
++
++  int last_row = VirtualCallData::row_limit() - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the receiver and for null.
++  // Take any of three different outcomes:
++  //   1. found receiver => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the receiver is receiver[n].
++    int recvr_offset = in_bytes(VirtualCallData::receiver_offset(row));
++    test_mdp_data_at(mdp, recvr_offset, receiver,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the receiver from the CallData.)
++
++    // The receiver is receiver[n].  Increment count[n].
++    int count_offset = in_bytes(VirtualCallData::receiver_count_offset(row));
++    increment_mdp_data_at(mdp, count_offset);
++    beq(R0, R0, done);
++    delayed()->nop();
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on receiver[n]...  Test for null.
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (is_virtual_call) {
++          beq(reg2, R0, found_null);
++          delayed()->nop();
++          // Receiver did not match any saved receiver and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++          beq(R0, R0, done);
++          delayed()->nop();
++          bind(found_null);
++        } else {
++          bne(reg2, R0, done);
++          delayed()->nop();
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      beq(reg2, R0, found_null);
++      delayed()->nop();
++
++      // Put all the "Case 3" tests here.
++      record_klass_in_profile_helper(receiver, mdp, reg2, start_row + 1, done, is_virtual_call);
++
++      // Found a null.  Keep searching for a matching receiver,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching receiver, but we
++  // observed the receiver[start_row] is NULL.
++
++  // Fill in the receiver field and increment the count.
++  int recvr_offset = in_bytes(VirtualCallData::receiver_offset(start_row));
++  set_mdp_data_at(mdp, recvr_offset, receiver);
++  int count_offset = in_bytes(VirtualCallData::receiver_count_offset(start_row));
++  move(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    beq(R0, R0, done);
++    delayed()->nop();
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      beq(R0, R0, profile_continue);
++      delayed()->nop();
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    move(reg2, in_bytes(MultiBranchData::per_case_size()));
++    if (UseLEXT1) {
++      gsdmult(index, index, reg2);
++    } else {
++      dmult(index, reg2);
++      mflo(index);
++    }
++    daddiu(index, index, in_bytes(MultiBranchData::case_array_offset()));
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::narrow(Register result) {
++
++  // Get method->_constMethod->_result_type
++  ld(T9, FP, frame::interpreter_frame_method_offset * wordSize);
++  ld(T9, T9, in_bytes(Method::const_offset()));
++  lbu(T9, T9, in_bytes(ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  addiu(AT, T9, -T_INT);
++  beq(AT, R0, done);
++  delayed()->nop();
++
++  // mask integer result to narrower return type.
++  addiu(AT, T9, -T_BOOLEAN);
++  bne(AT, R0, notBool);
++  delayed()->nop();
++  andi(result, result, 0x1);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notBool);
++  addiu(AT, T9, -T_BYTE);
++  bne(AT, R0, notByte);
++  delayed()->nop();
++  seb(result, result);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notByte);
++  addiu(AT, T9, -T_CHAR);
++  bne(AT, R0, notChar);
++  delayed()->nop();
++  andi(result, result, 0xFFFF);
++  beq(R0, R0, done);
++  delayed()->nop();
++
++  bind(notChar);
++  seh(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  if (mdo_addr.index() != noreg) {
++    guarantee(T0 != mdo_addr.base(), "The base register will be corrupted !");
++    guarantee(T0 != mdo_addr.index(), "The index register will be corrupted !");
++    push(T0);
++    dsll(T0, mdo_addr.index(), mdo_addr.scale());
++    daddu(T0, T0, mdo_addr.base());
++  }
++
++  bne(obj, R0, update);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::null_seen);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(update);
++  load_klass(obj, obj);
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  andi(AT, obj, TypeEntries::type_unknown);
++  bne(AT, R0, next);
++  delayed()->nop();
++
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  daddiu(AT, AT, -(TypeEntries::null_seen));
++  beq(AT, R0, none);
++  delayed()->nop();
++
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  xorr(obj, obj, AT);
++  assert(TypeEntries::type_klass_mask == -4, "must be");
++  dextm(AT, obj, 2, 62);
++  beq(AT, R0, next);
++  delayed()->nop();
++
++  // different than before. Cannot keep accurate profile.
++  if (mdo_addr.index() == noreg) {
++    ld(AT, mdo_addr);
++  } else {
++    ld(AT, T0, mdo_addr.disp());
++  }
++  ori(AT, AT, TypeEntries::type_unknown);
++  if (mdo_addr.index() == noreg) {
++    sd(AT, mdo_addr);
++  } else {
++    sd(AT, T0, mdo_addr.disp());
++  }
++  beq(R0, R0, next);
++  delayed()->nop();
++
++  bind(none);
++  // first time here. Set profile type.
++  if (mdo_addr.index() == noreg) {
++    sd(obj, mdo_addr);
++  } else {
++    sd(obj, T0, mdo_addr.disp());
++  }
++
++  bind(next);
++  if (mdo_addr.index() != noreg) {
++    pop(T0);
++  }
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    lb(AT, mdp, in_bytes(DataLayout::tag_offset()) - off_to_start);
++    li(tmp, is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    bne(tmp, AT, profile_continue);
++    delayed()->nop();
++
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      if (Assembler::is_simm16(off_to_args)) {
++        daddiu(mdp, mdp, off_to_args);
++      } else {
++        move(AT, off_to_args);
++        daddu(mdp, mdp, AT);
++      }
++
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++          if (Assembler::is_simm16(-1 * i * TypeStackSlotEntries::per_arg_count())) {
++            addiu32(tmp, tmp, -1 * i * TypeStackSlotEntries::per_arg_count());
++          } else {
++            li(AT, i*TypeStackSlotEntries::per_arg_count());
++            subu32(tmp, tmp, AT);
++          }
++
++          li(AT, TypeStackSlotEntries::per_arg_count());
++          slt(AT, tmp, AT);
++          bne(AT, R0, done);
++          delayed()->nop();
++        }
++        ld(tmp, callee, in_bytes(Method::const_offset()));
++
++        lhu(tmp, tmp, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ld(AT, mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args);
++        subu(tmp, tmp, AT);
++
++        addiu32(tmp, tmp, -1);
++
++        Address arg_addr = argument_address(tmp);
++        ld(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        if (Assembler::is_simm16(to_add)) {
++          daddiu(mdp, mdp, to_add);
++        } else {
++          move(AT, to_add);
++          daddu(mdp, mdp, AT);
++        }
++
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ld(tmp, mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args);
++
++        int tmp_arg_counts = TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count();
++        if (Assembler::is_simm16(-1 * tmp_arg_counts)) {
++          addiu32(tmp, tmp, -1 * tmp_arg_counts);
++        } else {
++          move(AT, tmp_arg_counts);
++          subu32(mdp, mdp, AT);
++        }
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        sll(tmp, tmp, exact_log2(DataLayout::cell_size));
++        daddu(mdp, mdp, tmp);
++      }
++      sd(mdp, FP, frame::interpreter_frame_mdp_offset * wordSize);
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      lb(tmp, _bcp_register, 0);
++      daddiu(AT, tmp, -1 * Bytecodes::_invokedynamic);
++      beq(AT, R0, do_profile);
++      delayed()->daddiu(AT, tmp, -1 * Bytecodes::_invokehandle);
++      beq(AT, R0, do_profile);
++      delayed()->nop();
++
++      get_method(tmp);
++      lhu(tmp, tmp, Method::intrinsic_id_offset_in_bytes());
++      li(AT, vmIntrinsics::_compiledLambdaForm);
++      bne(tmp, AT, profile_continue);
++      delayed()->nop();
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    daddu(tmp, ret, R0);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  guarantee(T9 == tmp1, "You are reqired to use T9 as the index register for MIPS !");
++
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    lw(tmp1, mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset()));
++    bltz(tmp1, profile_continue);
++    delayed()->nop();
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    daddu(mdp, mdp, tmp1);
++    ld(tmp1, mdp, in_bytes(ArrayData::array_len_offset()));
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++
++    // load offset on the stack from the slot for this parameter
++    dsll(AT, tmp1, per_arg_scale);
++    daddu(AT, AT, mdp);
++    ld(tmp2, AT, off_base);
++
++    subu(tmp2, R0, tmp2);
++
++    // read the parameter from the local area
++    dsll(AT, tmp2, Interpreter::logStackElementSize);
++    daddu(AT, AT, _locals_register);
++    ld(tmp2, AT, 0);
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrement(tmp1, TypeStackSlotEntries::per_arg_count());
++    bgtz(tmp1, loop);
++    delayed()->nop();
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) {
++}
++#endif // !CC_INTERP
++
++
++void InterpreterMacroAssembler::notify_method_entry() {
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, L);
++    delayed()->nop();
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                                  //Rthread,
++                                  thread,
++                                  //Rmethod);
++                                  S3);
++  }
++
++}
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {
++  Register tempreg = T0;
++#ifndef OPT_THREAD
++  Register thread = T8;
++  get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label skip;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave it on the top of the stack.
++    push(state);
++    lw(tempreg, thread, in_bytes(JavaThread::interp_only_mode_offset()));
++    beq(tempreg, R0, skip);
++    delayed()->nop();
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(skip);
++    pop(state);
++  }
++
++  {
++    // Dtrace notification
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    push(state);
++    get_method(S3);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 //Rthread, Rmethod);
++                 thread, S3);
++    pop(state);
++  }
++}
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, int mask,
++                                                        Register scratch, bool preloaded,
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, AT);
++
++  if (!preloaded) {
++    lw(scratch, counter_addr);
++  }
++  addiu32(scratch, scratch, increment);
++  sw(scratch, counter_addr);
++
++  move(AT, mask);
++  andr(scratch, scratch, AT);
++
++  if (cond == Assembler::zero) {
++    beq(scratch, R0, *where);
++    delayed()->nop();
++  } else {
++    unimplemented();
++  }
++}
+diff --git a/src/hotspot/cpu/mips/interpreterRT_mips.hpp b/src/hotspot/cpu/mips/interpreterRT_mips.hpp
+new file mode 100644
+index 0000000000..054138ea42
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interpreterRT_mips.hpp
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++#define CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++
++  void move(int from_offset, int to_offset);
++
++  void box(int from_offset, int to_offset);
++  void pass_int();
++  void pass_long();
++  void pass_object();
++  void pass_float();
++  void pass_double();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_MIPS_VM_INTERPRETERRT_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp
+new file mode 100644
+index 0000000000..e655b2a1a8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/interpreterRT_mips_64.cpp
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of SignatureHandlerGenerator
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(
++      const methodHandle& method, CodeBuffer* buffer) : NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::move(int from_offset, int to_offset) {
++  __ ld(temp(), from(), Interpreter::local_offset_in_bytes(from_offset));
++  __ sd(temp(), to(), to_offset * longSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::box(int from_offset, int to_offset) {
++  __ addiu(temp(), from(),Interpreter::local_offset_in_bytes(from_offset) );
++  __ lw(AT, from(), Interpreter::local_offset_in_bytes(from_offset) );
++
++  __ movz(temp(), R0, AT);
++  __ sw(temp(), to(), to_offset * wordSize);
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++  // return result handler
++  __ li(V0, AbstractInterpreter::result_handler(method()->result_type()));
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  __ flush();
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lw(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that long type takes 2 stack spaces, so in do_long(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ld(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  Argument jni_arg(jni_offset());
++
++  // the handle for a receiver will never be null
++  bool do_NULL_check = offset() != 0 || is_static();
++  if (do_NULL_check) {
++    __ ld(AT, from(), Interpreter::local_offset_in_bytes(offset()));
++    __ daddiu((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ movz((jni_arg.is_Register() ? jni_arg.as_Register() : temp()), R0, AT);
++  } else {
++    __ daddiu(jni_arg.as_Register(), from(), Interpreter::local_offset_in_bytes(offset()));
++  }
++
++  if (!jni_arg.is_Register())
++    __ sd(temp(), jni_arg.as_caller_address());
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ lwc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset()));
++  } else {
++    __ lw(temp(), from(), Interpreter::local_offset_in_bytes(offset()));
++    __ sw(temp(), jni_arg.as_caller_address());
++  }
++}
++
++// the jvm specifies that double type takes 2 stack spaces, so in do_double(), _offset += 2.
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  Argument jni_arg(jni_offset());
++  if(jni_arg.is_Register()) {
++    __ ldc1(jni_arg.as_FloatRegister(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  } else {
++    __ ld(temp(), from(), Interpreter::local_offset_in_bytes(offset() + 1));
++    __ sd(temp(), jni_arg.as_caller_address());
++  }
++}
++
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from()       { return LVP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()         { return SP; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp()       { return T8; }
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _reg_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2 * Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    if (_num_args < Argument::n_register_parameters) {
++      *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x01 << (_num_args*2)); // mark as float
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters) {
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= (0x3 << (_num_args*2)); // mark as double
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(methodHandle method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    // see TemplateInterpreterGenerator::generate_slow_signature_handler()
++    _reg_args = to - Argument::n_register_parameters + jni_offset() - 1;
++    _fp_identifiers = to - 1;
++    *(int*) _fp_identifiers = 0;
++    _num_args = jni_offset();
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate(UCONST64(-1));
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp
+new file mode 100644
+index 0000000000..dccdf6a019
+--- /dev/null
++++ b/src/hotspot/cpu/mips/javaFrameAnchor_mips.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++#define CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    // fence?
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp)
++      _last_Java_sp = NULL;
++
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  // Always walkable
++  bool walkable(void) { return true; }
++  // Never any thing to do since we are always walkable and can find address of return addresses
++  void make_walkable(JavaThread* thread) { }
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { _last_Java_fp = fp; }
++
++#endif // CPU_MIPS_VM_JAVAFRAMEANCHOR_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp
+new file mode 100644
+index 0000000000..46c8889f99
+--- /dev/null
++++ b/src/hotspot/cpu/mips/jniFastGetField_mips_64.cpp
+@@ -0,0 +1,166 @@
++/*
++ * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define BUFFER_SIZE 30*wordSize
++
++// Instead of issuing lfence for LoadLoad barrier, we create data dependency
++// between loads, which is more efficient than lfence.
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    case T_FLOAT:   name = "jni_fast_GetFloatField";   break;
++    case T_DOUBLE:  name = "jni_fast_GetDoubleField";  break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  //  return pc        RA
++  //  jni env          A0
++  //  obj              A1
++  //  jfieldID         A2
++
++  address counter_addr = SafepointSynchronize::safepoint_counter_addr();
++  __ set64(AT, (long)counter_addr);
++  __ lw(T1, AT, 0);
++
++  // Parameters(A0~A3) should not be modified, since they will be used in slow path
++  __ andi(AT, T1, 1);
++  __ bne(AT, R0, slow);
++  __ delayed()->nop();
++
++  __ move(T0, A1);
++  // Both T0 and T9 are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, /* jni_env */ A0, T0, T9, slow);
++
++  __ dsrl(T2, A2, 2);                 // offset
++  __ daddu(T0, T0, T2);
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ lbu (V0, T0, 0); break;
++    case T_BYTE:    __ lb  (V0, T0, 0); break;
++    case T_CHAR:    __ lhu (V0, T0, 0); break;
++    case T_SHORT:   __ lh  (V0, T0, 0); break;
++    case T_INT:     __ lw  (V0, T0, 0); break;
++    case T_LONG:    __ ld  (V0, T0, 0); break;
++    case T_FLOAT:   __ lwc1(F0, T0, 0); break;
++    case T_DOUBLE:  __ ldc1(F0, T0, 0); break;
++    default:        ShouldNotReachHere();
++  }
++
++  __ set64(AT, (long)counter_addr);
++  __ lw(AT, AT, 0);
++  __ bne(T1, AT, slow);
++  __ delayed()->nop();
++
++  __ jr(RA);
++  __ delayed()->nop();
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    case T_FLOAT:   slow_case_addr = jni_GetFloatField_addr();   break;
++    case T_DOUBLE:  slow_case_addr = jni_GetDoubleField_addr();  break;
++    default:        ShouldNotReachHere();
++  }
++  __ jmp(slow_case_addr);
++  __ delayed()->nop();
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_int_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_int_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/mips/jniTypes_mips.hpp b/src/hotspot/cpu/mips/jniTypes_mips.hpp
+new file mode 100644
+index 0000000000..e93237ffd9
+--- /dev/null
++++ b/src/hotspot/cpu/mips/jniTypes_mips.hpp
+@@ -0,0 +1,144 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_JNITYPES_MIPS_HPP
++#define CPU_MIPS_VM_JNITYPES_MIPS_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // In MIPS64, the sizeof intptr_t is 8 bytes, and each unit in JavaCallArguments::_value_buffer[]
++  //   is 8 bytes.
++  // If we only write the low 4 bytes with (jint *), the high 4-bits will be left with uncertain values.
++  // Then, in JavaCallArguments::parameters(), the whole 8 bytes of a T_INT parameter is loaded.
++  // This error occurs in ReflectInvoke.java
++  // The parameter of DD(int) should be 4 instead of 0x550000004.
++  //
++  // See: [runtime/javaCalls.hpp]
++
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  // A long parameter occupies two slot.
++  // It must fit the layout rule in methodHandle.
++  //
++  // See: [runtime/reflection.cpp] Reflection::invoke()
++  // assert(java_args.size_of_parameters() == method->size_of_parameters(), "just checking");
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_MIPS_VM_JNITYPES_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.cpp b/src/hotspot/cpu/mips/macroAssembler_mips.cpp
+new file mode 100644
+index 0000000000..cc868cae55
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.cpp
+@@ -0,0 +1,4257 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "nativeInst_mips.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++
++#ifdef COMPILER2
++#include "opto/intrinsicnode.hpp"
++#endif
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Implementation of MacroAssembler
++
++intptr_t MacroAssembler::i[32] = {0};
++float MacroAssembler::f[32] = {0.0};
++
++void MacroAssembler::print(outputStream *s) {
++  unsigned int k;
++  for(k=0; k<sizeof(i)/sizeof(i[0]); k++) {
++    s->print_cr("i%d = 0x%.16lx", k, i[k]);
++  }
++  s->cr();
++
++  for(k=0; k<sizeof(f)/sizeof(f[0]); k++) {
++    s->print_cr("f%d = %f", k, f[k]);
++  }
++  s->cr();
++}
++
++int MacroAssembler::i_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->i[k]; }
++int MacroAssembler::f_offset(unsigned int k) { return (intptr_t)&((MacroAssembler*)0)->f[k]; }
++
++void MacroAssembler::save_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ sw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ swc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++void MacroAssembler::restore_registers(MacroAssembler *masm) {
++#define __ masm->
++  for(int k=0; k<32; k++) {
++    __ lw (as_Register(k), A0, i_offset(k));
++  }
++
++  for(int k=0; k<32; k++) {
++    __ lwc1 (as_FloatRegister(k), A0, f_offset(k));
++  }
++#undef __
++}
++
++
++void MacroAssembler::pd_patch_instruction(address branch, address target) {
++  jint& stub_inst = *(jint*) branch;
++  jint *pc = (jint *)branch;
++
++  if((opcode(stub_inst) == special_op) && (special(stub_inst) == daddu_op)) {
++    //b_far:
++    //  move(AT, RA); // daddu
++    //  emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    //  nop();
++    //  lui(T9, 0); // to be patched
++    //  ori(T9, 0);
++    //  daddu(T9, T9, RA);
++    //  move(RA, AT);
++    //  jr(T9);
++
++    assert(opcode(pc[3]) == lui_op
++        && opcode(pc[4]) == ori_op
++        && special(pc[5]) == daddu_op, "Not a branch label patch");
++    if(!(opcode(pc[3]) == lui_op
++          && opcode(pc[4]) == ori_op
++          && special(pc[5]) == daddu_op)) { tty->print_cr("Not a branch label patch"); }
++
++    int offset = target - branch;
++    if (!is_simm16(offset)) {
++      pc[3] = (pc[3] & 0xffff0000) | high16(offset - 12);
++      pc[4] = (pc[4] & 0xffff0000) | low16(offset - 12);
++    } else {
++      // revert to "beq + nop"
++      CodeBuffer cb(branch, 4 * 10);
++      MacroAssembler masm(&cb);
++#define __ masm.
++      __ b(target);
++      __ delayed()->nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++      __ nop();
++    }
++    return;
++  } else if (special(pc[4]) == jr_op
++             && opcode(pc[4]) == special_op
++             && (((opcode(pc[0]) == lui_op) || opcode(pc[0]) == daddiu_op) || (opcode(pc[0]) == ori_op))) {
++    //jmp_far:
++    //  patchable_set48(T9, target);
++    //  jr(T9);
++    //  nop();
++
++    CodeBuffer cb(branch, 4 * 4);
++    MacroAssembler masm(&cb);
++    masm.patchable_set48(T9, (long)(target));
++    return;
++  }
++
++#ifndef PRODUCT
++  if (!is_simm16((target - branch - 4) >> 2)) {
++    tty->print_cr("Illegal patching: branch = " INTPTR_FORMAT ", target = " INTPTR_FORMAT, p2i(branch), p2i(target));
++    tty->print_cr("======= Start decoding at branch = " INTPTR_FORMAT " =======", p2i(branch));
++    Disassembler::decode(branch - 4 * 16, branch + 4 * 16, tty);
++    tty->print_cr("======= End of decoding =======");
++  }
++#endif
++
++  stub_inst = patched_branch(target - branch, stub_inst, 0);
++}
++
++static inline address first_cache_address() {
++  return CodeCache::low_bound() + sizeof(HeapBlock::Header);
++}
++
++static inline address last_cache_address() {
++  return CodeCache::high_bound() - Assembler::InstructionSize;
++}
++
++int MacroAssembler::call_size(address target, bool far, bool patchable) {
++  if (patchable) return 6 << Assembler::LogInstructionSize;
++  if (!far) return 2 << Assembler::LogInstructionSize; // jal + nop
++  return (insts_for_set64((jlong)target) + 2) << Assembler::LogInstructionSize;
++}
++
++// Can we reach target using jal/j from anywhere
++// in the code cache (because code can be relocated)?
++bool MacroAssembler::reachable_from_cache(address target) {
++  address cl = first_cache_address();
++  address ch = last_cache_address();
++
++  return (cl <= target) && (target <= ch) && fit_in_jal(cl, ch);
++}
++
++bool MacroAssembler::reachable_from_cache() {
++  if (ForceUnreachable) {
++    return false;
++  } else {
++    address cl = first_cache_address();
++    address ch = last_cache_address();
++
++    return fit_in_jal(cl, ch);
++  }
++}
++
++void MacroAssembler::general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    j(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_jump(address target) {
++  if (reachable_from_cache(target)) {
++    //j(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_jump(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    j(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_jump(address target) {
++  return 6;
++}
++
++void MacroAssembler::general_call(address target) {
++  if (reachable_from_cache(target)) {
++    jal(target);
++    delayed()->nop();
++  } else {
++    set64(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_general_call(address target) {
++  if (reachable_from_cache(target)) {
++    //jal(target);
++    //nop();
++    return 2;
++  } else {
++    //set64(T9, (long)target);
++    //jalr(T9);
++    //nop();
++    return insts_for_set64((jlong)target) + 2;
++  }
++}
++
++void MacroAssembler::patchable_call(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    patchable_set48(T9, (long)target);
++    jalr(T9);
++    delayed()->nop();
++  }
++}
++
++int MacroAssembler::insts_for_patchable_call(address target) {
++  return 6;
++}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(AddressLiteral entry, CodeBuffer *cbuf) {
++  assert(JavaThread::current()->is_Compiler_thread(), "just checking");
++  assert(entry.rspec().type() == relocInfo::runtime_call_type
++         || entry.rspec().type() == relocInfo::opt_virtual_call_type
++         || entry.rspec().type() == relocInfo::static_call_type
++         || entry.rspec().type() == relocInfo::virtual_call_type, "wrong reloc type");
++
++  address target = entry.target();
++  if (!reachable_from_cache()) {
++    address stub = emit_trampoline_stub(offset(), target);
++    if (stub == NULL) {
++      return NULL; // CodeCache is full
++    }
++  }
++
++  if (cbuf) cbuf->set_insts_mark();
++  relocate(entry.rspec());
++
++  if (reachable_from_cache()) {
++    nop();
++    nop();
++    nop();
++    nop();
++    jal(target);
++    delayed()->nop();
++  } else {
++    // load the call target from the trampoline stub
++    // branch
++    long dest = (long)pc();
++    dest += (dest & 0x8000) << 1;
++    lui(T9, dest >> 32);
++    ori(T9, T9, split_low(dest >> 16));
++    dsll(T9, T9, 16);
++    ld(T9, T9, simm16(split_low(dest)));
++    jalr(T9);
++    delayed()->nop();
++  }
++  return pc();
++}
++
++// Emit a trampoline stub for a call to a target which is too far away.
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  // Max stub size: alignment nop, TrampolineStub.
++  address stub = start_a_stub(NativeInstruction::nop_instruction_size
++                   + NativeCallTrampolineStub::instruction_size);
++  if (stub == NULL) {
++    return NULL;  // CodeBuffer::expand failed
++  }
++
++  // Create a trampoline stub relocation which relates this trampoline stub
++  // with the call instruction at insts_call_instruction_offset in the
++  // instructions code-section.
++  align(wordSize);
++  relocate(trampoline_stub_Relocation::spec(code()->insts()->start()
++                                            + insts_call_instruction_offset));
++  emit_int64((int64_t)dest);
++  end_a_stub();
++  return stub;
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::beq(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    beq_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    bne(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, address entry) {
++  u_char * cur_pc = pc();
++
++  //Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    Assembler::bne(rs, rt, offset(entry));
++  } else {
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(entry);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::bne_far(Register rs, Register rt, Label& L) {
++  if (L.is_bound()) {
++    bne_far(rs, rt, target(L));
++  } else {
++    u_char * cur_pc = pc();
++    Label not_jump;
++    beq(rs, rt, not_jump);
++    delayed()->nop();
++
++    b_far(L);
++    delayed()->nop();
++
++    bind(not_jump);
++    has_delay_slot();
++  }
++}
++
++void MacroAssembler::beq_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  bne(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bne_long(Register rs, Register rt, Label& L) {
++  Label not_taken;
++
++  beq(rs, rt, not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1t_long(Label& L) {
++  Label not_taken;
++
++  bc1f(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::bc1f_long(Label& L) {
++  Label not_taken;
++
++  bc1t(not_taken);
++  delayed()->nop();
++
++  jmp_far(L);
++
++  bind(not_taken);
++}
++
++void MacroAssembler::b_far(Label& L) {
++  if (L.is_bound()) {
++    b_far(target(L));
++  } else {
++    volatile address dest = target(L);
++//
++// MacroAssembler::pd_patch_instruction branch=55651ed514, target=55651ef6d8
++//   0x00000055651ed514: daddu at, ra, zero
++//   0x00000055651ed518: [4110001]bgezal zero, 0x00000055651ed520
++//
++//   0x00000055651ed51c: sll zero, zero, 0
++//   0x00000055651ed520: lui t9, 0x0
++//   0x00000055651ed524: ori t9, t9, 0x21b8
++//   0x00000055651ed528: daddu t9, t9, ra
++//   0x00000055651ed52c: daddu ra, at, zero
++//   0x00000055651ed530: jr t9
++//   0x00000055651ed534: sll zero, zero, 0
++//
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    lui(T9, 0); // to be patched
++    ori(T9, T9, 0);
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::b_far(address entry) {
++  u_char * cur_pc = pc();
++
++  // Near/Far jump
++  if(is_simm16((entry - pc() - 4) / 4)) {
++    b(offset(entry));
++  } else {
++    // address must be bounded
++    move(AT, RA);
++    emit_long(insn_ORRI(regimm_op, 0, bgezal_op, 1));
++    nop();
++    li32(T9, entry - pc());
++    daddu(T9, T9, RA);
++    move(RA, AT);
++    jr(T9);
++  }
++}
++
++void MacroAssembler::ld_ptr(Register rt, Register base, Register offset) {
++  addu_long(AT, base, offset);
++  ld_ptr(rt, AT, 0);
++}
++
++void MacroAssembler::st_ptr(Register rt, Register base, Register offset) {
++  guarantee(AT != rt, "AT must not equal rt");
++  addu_long(AT, base, offset);
++  st_ptr(rt, AT, 0);
++}
++
++Address MacroAssembler::as_Address(AddressLiteral adr) {
++  return Address(adr.target(), adr.rspec());
++}
++
++Address MacroAssembler::as_Address(ArrayAddress adr) {
++  return Address::make_array(adr);
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  Label again;
++
++  li(tmp_reg1, counter_addr);
++  bind(again);
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  ll(tmp_reg2, tmp_reg1, 0);
++  addiu(tmp_reg2, tmp_reg2, inc);
++  sc(tmp_reg2, tmp_reg1, 0);
++  beq(tmp_reg2, R0, again);
++  delayed()->nop();
++}
++
++void MacroAssembler::reserved_stack_check() {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++
++  ld(AT, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  dsubu(AT, SP, AT);
++  bltz(AT, no_reserved_zone_enabling);
++  delayed()->nop();
++
++  enter();   // RA and FP are live.
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  leave();
++
++  // We have already removed our own frame.
++  // throw_delayed_StackOverflowError will think that it's been
++  // called by our caller.
++  li(AT, (long)StubRoutines::throw_delayed_StackOverflowError_entry());
++  jr(AT);
++  delayed()->nop();
++  should_not_reach_here();
++
++  bind(no_reserved_zone_enabling);
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  bool need_tmp_reg = false;
++  if (tmp_reg == noreg) {
++    need_tmp_reg = true;
++    tmp_reg = T9;
++  }
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg, AT);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++  Address saved_mark_addr(lock_reg, 0);
++
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ld_ptr(swap_reg, mark_addr);
++  }
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  move(tmp_reg, swap_reg);
++  andi(tmp_reg, tmp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++  dsubu(AT, AT, tmp_reg);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++
++  bne(AT, R0, cas_label);
++  delayed()->nop();
++
++
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  // Note that because there is no current thread register on MIPS we
++  // need to store off the mark word we read out of the object to
++  // avoid reloading it and needing to recheck invariants below. This
++  // store is unfortunate but it makes the overall code shorter and
++  // simpler.
++  st_ptr(swap_reg, saved_mark_addr);
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  xorr(tmp_reg, tmp_reg, swap_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  xorr(swap_reg, swap_reg, tmp_reg);
++#else
++  xorr(swap_reg, TREG, tmp_reg);
++#endif
++
++  move(AT, ~((int) markOopDesc::age_mask_in_place));
++  andr(swap_reg, swap_reg, AT);
++
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(swap_reg, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  beq(swap_reg, R0, done);
++  delayed()->nop();
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++
++  move(AT, markOopDesc::biased_lock_mask_in_place);
++  andr(AT, swap_reg, AT);
++  bne(AT, R0, try_revoke_bias);
++  delayed()->nop();
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++
++  move(AT, markOopDesc::epoch_mask_in_place);
++  andr(AT,swap_reg, AT);
++  bne(AT, R0, try_rebias);
++  delayed()->nop();
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  move(AT, markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place);
++  andr(swap_reg, swap_reg, AT);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++#ifndef OPT_THREAD
++  get_thread(tmp_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, TREG, swap_reg);
++#endif
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(tmp_reg);
++    push(A0);
++    atomic_inc32((address)BiasedLocking::anonymously_biased_lock_entry_count_addr(), 1, A0, tmp_reg);
++    pop(A0);
++    pop(tmp_reg);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++  b(done);
++  delayed()->nop();
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++#ifndef OPT_THREAD
++  get_thread(swap_reg);
++  orr(tmp_reg, tmp_reg, swap_reg);
++#else
++  orr(tmp_reg, tmp_reg, TREG);
++#endif
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::rebiased_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++  if (slow_case != NULL) {
++    beq_far(AT, R0, *slow_case);
++    delayed()->nop();
++  }
++
++  b(done);
++  delayed()->nop();
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  ld_ptr(swap_reg, saved_mark_addr);
++
++  if (need_tmp_reg) {
++    push(tmp_reg);
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(Address(obj_reg, 0), swap_reg, tmp_reg, AT, false, false);
++  if (need_tmp_reg) {
++    pop(tmp_reg);
++  }
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  if (PrintBiasedLockingStatistics) {
++    Label L;
++    bne(AT, R0, L);
++    delayed()->nop();
++    push(AT);
++    push(tmp_reg);
++    atomic_inc32((address)BiasedLocking::revoked_lock_entry_count_addr(), 1, AT, tmp_reg);
++    pop(tmp_reg);
++    pop(AT);
++    bind(L);
++  }
++
++  bind(cas_label);
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ld(temp_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++  andi(temp_reg, temp_reg, markOopDesc::biased_lock_mask_in_place);
++  daddiu(AT, R0, markOopDesc::biased_lock_pattern);
++
++  beq(AT, temp_reg, done);
++  delayed()->nop();
++}
++
++// the stack pointer adjustment is needed. see InterpreterMacroAssembler::super_call_VM_leaf
++// this method will handle the stack problem, you need not to preserve the stack space for the argument now
++void MacroAssembler::call_VM_leaf_base(address entry_point, int number_of_arguments) {
++  Label L, E;
++
++  assert(number_of_arguments <= 4, "just check");
++
++  andi(AT, SP, 0xf);
++  beq(AT, R0, L);
++  delayed()->nop();
++  daddiu(SP, SP, -8);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  daddiu(SP, SP, 8);
++  b(E);
++  delayed()->nop();
++
++  bind(L);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++  bind(E);
++}
++
++
++void MacroAssembler::jmp(address entry) {
++  patchable_set48(T9, (long)entry);
++  jr(T9);
++}
++
++void MacroAssembler::jmp(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      jmp(entry);
++      break;
++    default:
++      {
++      InstructionMark im(this);
++      relocate(rtype);
++      patchable_set48(T9, (long)entry);
++      jr(T9);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::jmp_far(Label& L) {
++  if (L.is_bound()) {
++    address entry = target(L);
++    assert(entry != NULL, "jmp most probably wrong");
++    InstructionMark im(this);
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)entry);
++  } else {
++    InstructionMark im(this);
++    L.add_patch_at(code(), locator());
++
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(T9, (long)pc());
++  }
++
++  jr(T9);
++  delayed()->nop();
++}
++void MacroAssembler::mov_metadata(Address dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(AT, (long)obj);
++  sd(AT, dst);
++}
++
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  relocate(metadata_Relocation::spec(oop_index));
++  patchable_set48(dst, (long)obj);
++}
++
++void MacroAssembler::call(address entry) {
++// c/c++ code assume T9 is entry point, so we just always move entry to t9
++// maybe there is some more graceful method to handle this. FIXME
++// For more info, see class NativeCall.
++  patchable_set48(T9, (long)entry);
++  jalr(T9);
++}
++
++void MacroAssembler::call(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rtype);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::call(address entry, RelocationHolder& rh)
++{
++  switch (rh.type()) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      call(entry);
++      break;
++    default:
++      {
++  InstructionMark im(this);
++  relocate(rh);
++  call(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::ic_call(address entry, jint method_index) {
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  patchable_set48(IC_Klass, (long)Universe::non_oop_word());
++  assert(entry != NULL, "call most probably wrong");
++  InstructionMark im(this);
++  trampoline_call(AddressLiteral(entry, rh));
++}
++
++void MacroAssembler::c2bool(Register r) {
++  sltu(r, R0, r);
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg/*, RegistersForDebugging* regs*/) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++  BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++}
++
++
++void MacroAssembler::stop(const char* msg) {
++  li(A0, (long)msg);
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  brk(17);
++}
++
++void MacroAssembler::warn(const char* msg) {
++  pushad();
++  li(A0, (long)msg);
++  push(S2);
++  move(AT, -(StackAlignmentInBytes));
++  move(S2, SP);     // use S2 as a sender SP holder
++  andr(SP, SP, AT); // align stack as required by ABI
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  move(SP, S2);     // use S2 as a sender SP holder
++  pop(S2);
++  popad();
++}
++
++void MacroAssembler::increment(Register reg, int imm) {
++  if (!imm) return;
++  if (is_simm16(imm)) {
++    daddiu(reg, reg, imm);
++  } else {
++    move(AT, imm);
++    daddu(reg, reg, AT);
++  }
++}
++
++void MacroAssembler::decrement(Register reg, int imm) {
++  increment(reg, -imm);
++}
++
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) move(A1, arg_1);
++  if (arg_2!=A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, NOREG, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  call_VM(oop_result, last_java_sp, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1 != A1) move(A1, arg_1);
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3 != A3) move(A3, arg_3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM(oop_result, last_java_sp, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {
++
++  address before_call_pc;
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "cannot have negative number of arguments");
++  assert(java_thread != oop_result  , "cannot use the same register for java_thread & oop_result");
++  assert(java_thread != last_java_sp, "cannot use the same register for java_thread & last_java_sp");
++
++  assert(last_java_sp != FP, "this code doesn't work for last_java_sp == fp, which currently can't portably work anyway since C2 doesn't save fp");
++
++  // set last Java frame before call
++  before_call_pc = (address)pc();
++  set_last_Java_frame(java_thread, last_java_sp, FP, before_call_pc);
++
++  // do the call
++  move(A0, java_thread);
++  call(entry_point, relocInfo::runtime_call_type);
++  delayed()->nop();
++
++  // restore the thread (cannot use the pushed argument since arguments
++  // may be overwritten by C code generated by an optimizing compiler);
++  // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++  get_thread(java_thread);
++#else
++#ifdef ASSERT
++  {
++    Label L;
++    get_thread(AT);
++    beq(java_thread, AT, L);
++    delayed()->nop();
++    stop("MacroAssembler::call_VM_base: TREG not callee saved?");
++    bind(L);
++  }
++#endif
++#endif
++
++  // discard thread and arguments
++  ld_ptr(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, false);
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    Label L;
++    ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    beq(AT, R0, L);
++    delayed()->nop();
++    li(AT, before_call_pc);
++    push(AT);
++    jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    delayed()->nop();
++    bind(L);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++    ld(oop_result, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    sd(R0, java_thread, in_bytes(JavaThread::vm_result_offset()));
++    verify_oop(oop_result);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {
++
++  move(V0, SP);
++  //we also reserve space for java_thread here
++  move(AT, -(StackAlignmentInBytes));
++  andr(SP, SP, AT);
++  call_VM_base(oop_result, NOREG, V0, entry_point, number_of_arguments, check_exceptions);
++
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  if (arg_0 != A0) move(A0, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  if (arg_0 != A0) move(A0, arg_0);
++  if (arg_1 != A1) move(A1, arg_1); assert(arg_1 != A0, "smashed argument");
++  if (arg_2 != A2) move(A2, arg_2); assert(arg_2 != A0 && arg_2 != A1, "smashed argument");
++  call_VM_leaf(entry_point, 3);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1) {
++  if (arg_1 != A0) move(A0, arg_1);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++void MacroAssembler::super_call_VM_leaf(address entry_point,
++                                                   Register arg_1,
++                                                   Register arg_2,
++                                                   Register arg_3) {
++  if (arg_1 != A0) move(A0, arg_1);
++  if (arg_2 != A1) move(A1, arg_2); assert(arg_2 != A0, "smashed argument");
++  if (arg_3 != A2) move(A2, arg_3); assert(arg_3 != A0 && arg_3 != A1, "smashed argument");
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) {
++}
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) {
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any (non-CC) registers
++    // NOTE: cmpl is plenty here to provoke a segv
++    lw(AT, reg, 0);
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::enter() {
++  push2(RA, FP);
++  move(FP, SP);
++}
++
++void MacroAssembler::leave() {
++  move(SP, FP);
++  pop2(RA, FP);
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::get_thread(Register thread) {
++#ifdef MINIMIZE_RAM_USAGE
++//
++//  In MIPS64, we don't use full 64-bit address space.
++//  Only a small range is actually used.
++//
++//  Example:
++//  $  cat /proc/13352/maps
++//  120000000-120010000 r-xp 00000000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  12001c000-120020000 rw-p 0000c000 08:01 41077                            /mnt/openjdk6-mips-full/build/linux-mips64/j2sdk-image/bin/java
++//  120020000-1208dc000 rwxp 00000000 00:00 0                                [heap]
++//  555d574000-555d598000 r-xp 00000000 08:01 2073768                        /lib/ld-2.12.so
++//  555d598000-555d59c000 rw-p 00000000 00:00 0
++//  ......
++//  558b1f8000-558b23c000 rwxp 00000000 00:00 0
++//  558b23c000-558b248000 ---p 00000000 00:00 0
++//  558b248000-558b28c000 rwxp 00000000 00:00 0
++//  ffff914000-ffff94c000 rwxp 00000000 00:00 0                              [stack]
++//  ffffffc000-10000000000 r-xp 00000000 00:00 0                             [vdso]
++//
++//  All stacks are positioned at 0x55________.
++//  Therefore, we can utilize the same algorithm used in 32-bit.
++  // int index = ((uintptr_t)p >> PAGE_SHIFT) & ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1);
++  // Thread* thread = _sp_map[index];
++  Register tmp;
++
++  if (thread == AT)
++    tmp = T9;
++  else
++    tmp = AT;
++
++  move(thread, SP);
++  shr(thread, PAGE_SHIFT);
++
++  push(tmp);
++  li(tmp, ((1UL << (SP_BITLENGTH - PAGE_SHIFT)) - 1));
++  andr(thread, thread, tmp);
++  shl(thread, Address::times_ptr); // sizeof(Thread *)
++  li48(tmp, (long)ThreadLocalStorage::sp_map_addr());
++  addu(tmp, tmp, thread);
++  ld_ptr(thread, tmp, 0);
++  pop(tmp);
++#else
++  if (thread != V0) {
++    push(V0);
++  }
++  pushad_except_v0();
++
++  push(S5);
++  move(S5, SP);
++  move(AT, -StackAlignmentInBytes);
++  andr(SP, SP, AT);
++  call(CAST_FROM_FN_PTR(address, Thread::current));
++  //MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0);
++  delayed()->nop();
++  move(SP, S5);
++  pop(S5);
++
++  popad_except_v0();
++  if (thread != V0) {
++    move(thread, V0);
++    pop(V0);
++  }
++#endif // MINIMIZE_RAM_USAGE
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T1;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // we must set sp to zero to clear frame
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  st_ptr(R0, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // we must set sp to zero to clear frame
++  sd(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    sd(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  sd(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++// Write serialization page so VM thread can do a pseudo remote membar.
++// We use the current thread pointer to calculate a thread specific
++// offset to write to within the page. This minimizes bus traffic
++// due to cache line collision.
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  int mask = os::vm_page_size() - sizeof(int);
++  assert_different_registers(AT, tmp);
++  assert(is_uimm(mask, 16), "Not a unsigned 16-bit");
++  srl(AT, thread, os::get_serialize_page_shift_count());
++  andi(AT, AT, mask);
++  li(tmp, os::get_memory_serialize_page());
++  addu(tmp, tmp, AT);
++  sw(R0, tmp, 0);
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  } else {
++    li(AT, SafepointSynchronize::address_of_state());
++    lw(AT, AT, 0);
++    addiu(AT, AT, -SafepointSynchronize::_not_synchronized);
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  }
++}
++
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path, Register thread_reg) {
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    ld(AT, thread_reg, in_bytes(Thread::polling_page_offset()));
++    sync();
++    andi(AT, AT, SafepointMechanism::poll_bit());
++    bne(AT, R0, slow_path);
++    delayed()->nop();
++  } else {
++    safepoint_poll(slow_path, thread_reg);
++  }
++}
++
++// Calls to C land
++//
++// When entering C land, the fp, & sp of the last Java frame have to be recorded
++// in the (thread-local) JavaThread object. When leaving C land, the last Java fp
++// has to be reset to 0. This is required to allow proper stack traversal.
++void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++#ifndef OPT_THREAD
++    java_thread = T2;
++    get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    st_ptr(last_java_fp, java_thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, java_thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  st_ptr(last_java_sp, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++}
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = SP;
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  get_thread(thread);
++#endif
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    sd(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++    patchable_set48(AT, (long)last_java_pc);
++    st_ptr(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  sd(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes,
++                                   Register t1, Register t2, Label& slow_case) {
++  Unimplemented();
++  //assert_different_registers(obj, var_size_in_bytes, t1, AT);
++  //BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  //bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++void MacroAssembler::incr_allocated_bytes(Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes,
++                                          Register t1) {
++  if (!thread->is_valid()) {
++#ifndef OPT_THREAD
++    assert(t1->is_valid(), "need temp reg");
++    thread = t1;
++    get_thread(thread);
++#else
++    thread = TREG;
++#endif
++  }
++
++  ld_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++  if (var_size_in_bytes->is_valid()) {
++    addu(AT, AT, var_size_in_bytes);
++  } else {
++    addiu(AT, AT, con_size_in_bytes);
++  }
++  st_ptr(AT, thread, in_bytes(JavaThread::allocated_bytes_offset()));
++}
++
++void MacroAssembler::li(Register rd, long imm) {
++  if (imm <= max_jint && imm >= min_jint) {
++    li32(rd, (int)imm);
++  } else if (julong(imm) <= 0xFFFFFFFF) {
++    assert_not_delayed();
++    // lui sign-extends, so we can't use that.
++    ori(rd, R0, julong(imm) >> 16);
++    dsll(rd, rd, 16);
++    ori(rd, rd, split_low(imm));
++  } else if ((imm > 0) && is_simm16(imm >> 32)) {
++    // A 48-bit address
++    li48(rd, imm);
++  } else {
++    li64(rd, imm);
++  }
++}
++
++void MacroAssembler::li32(Register reg, int imm) {
++  if (is_simm16(imm)) {
++    addiu(reg, R0, imm);
++  } else {
++    lui(reg, split_low(imm >> 16));
++    if (split_low(imm))
++      ori(reg, reg, split_low(imm));
++  }
++}
++
++void MacroAssembler::set64(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++    } else {
++      lui(d, split_low(value >> 16));
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++  } else {  // li64
++    // 6 insts
++    li64(d, value);
++  }
++}
++
++
++int MacroAssembler::insts_for_set64(jlong value) {
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      //daddiu(d, R0, value);
++      count++;
++    } else {
++      //lui(d, split_low(value >> 16));
++      count++;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      //dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        //ori(d, d, split_low(value));
++        count++;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    //li48(d, value);
++    count += 4;
++  } else {  // li64
++    // 6 insts
++    //li64(d, value);
++    count += 6;
++  }
++
++  return count;
++}
++
++void MacroAssembler::patchable_set48(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && is_simm16(value >> 32)) {  // li48
++    // 4 insts
++    li48(d, value);
++    count += 4;
++  } else {  // li64
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_set32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      ori(d, R0, julong(value) >> 16);
++      dsll(d, d, 16);
++      count += 2;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 3) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::patchable_call32(Register d, jlong value) {
++  assert_not_delayed();
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (is_simm16(value)) {
++      daddiu(d, R0, value);
++      count += 1;
++    } else {
++      lui(d, split_low(value >> 16));
++      count += 1;
++      if (split_low(value)) {
++        ori(d, d, split_low(value));
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    nop();
++    count++;
++  }
++}
++
++void MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert(UseCompressedClassPointers, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  long narrowKlass = (long)Klass::encode_klass(k);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, narrowKlass);
++}
++
++
++void MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert(UseCompressedOops, "should only be used for compressed header");
++  assert(oop_recorder() != NULL, "this assembler needs an OopRecorder");
++
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++  relocate(rspec, Assembler::narrow_oop_operand);
++  patchable_set48(dst, oop_index);
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, NOREG);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ld_ptr(mirror, method, in_bytes(Method::const_offset()));
++  ld_ptr(mirror, mirror, in_bytes(ConstMethod::constants_offset()));
++  ld_ptr(mirror, mirror, ConstantPool::pool_holder_offset_in_bytes());
++  ld_ptr(mirror, mirror, mirror_offset);
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::li64(Register rd, long imm) {
++  assert_not_delayed();
++  lui(rd, split_low(imm >> 48));
++  ori(rd, rd, split_low(imm >> 32));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::li48(Register rd, long imm) {
++  assert_not_delayed();
++  assert(is_simm16(imm >> 32), "Not a 48-bit address");
++  lui(rd, imm >> 32);
++  ori(rd, rd, split_low(imm >> 16));
++  dsll(rd, rd, 16);
++  ori(rd, rd, split_low(imm));
++}
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop: %s: %s", reg->name(), s);
++  b = code_string(ss.as_string());
++  pushad();
++  move(A1, reg);
++  li(A0, (long)b);
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  popad();
++}
++
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  if (!VerifyOops) {
++    nop();
++    return;
++  }
++  // Pass register number to verify_oop_subroutine
++  const char * b = NULL;
++  stringStream ss;
++  ss.print("verify_oop_addr: %s",  s);
++  b = code_string(ss.as_string());
++
++  addiu(SP, SP, - 7 * wordSize);
++  st_ptr(T0, SP, 6 * wordSize);
++  st_ptr(T1, SP, 5 * wordSize);
++  st_ptr(RA, SP, 4 * wordSize);
++  st_ptr(A0, SP, 3 * wordSize);
++  st_ptr(A1, SP, 2 * wordSize);
++  st_ptr(AT, SP, 1 * wordSize);
++  st_ptr(T9, SP, 0);
++
++  // addr may contain sp so we will have to adjust it based on the
++  // pushes that we just did.
++  if (addr.uses(SP)) {
++    lea(A1, addr);
++    ld_ptr(A1, Address(A1, 7 * wordSize));
++  } else {
++    ld_ptr(A1, addr);
++  }
++  li(A0, (long)b);
++  // call indirectly to solve generation ordering problem
++  li(AT, (long)StubRoutines::verify_oop_subroutine_entry_address());
++  ld_ptr(T9, AT, 0);
++  jalr(T9);
++  delayed()->nop();
++  ld_ptr(T0, SP, 6* wordSize);
++  ld_ptr(T1, SP, 5* wordSize);
++  ld_ptr(RA, SP, 4* wordSize);
++  ld_ptr(A0, SP, 3* wordSize);
++  ld_ptr(A1, SP, 2* wordSize);
++  ld_ptr(AT, SP, 1* wordSize);
++  ld_ptr(T9, SP, 0* wordSize);
++  addiu(SP, SP, 7 * wordSize);
++}
++
++// used registers :  T0, T1
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++
++  Label exit, error;
++  // increment counter
++  li(T0, (long)StubRoutines::verify_oop_count_addr());
++  lw(AT, T0, 0);
++  daddiu(AT, AT, 1);
++  sw(AT, T0, 0);
++
++  // make sure object is 'reasonable'
++  beq(A1, R0, exit);         // if obj is NULL it is ok
++  delayed()->nop();
++
++  // Check if the oop is in the right area of memory
++  // const int oop_mask = Universe::verify_oop_mask();
++  // const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  li(AT, oop_mask);
++  andr(T0, A1, AT);
++  li(AT, oop_bits);
++  bne(T0, AT, error);
++  delayed()->nop();
++
++  // make sure klass is 'reasonable'
++  // add for compressedoops
++  reinit_heapbase();
++  // add for compressedoops
++  load_klass(T0, A1);
++  beq(T0, R0, error);                        // if klass is NULL it is broken
++  delayed()->nop();
++  // return if everything seems ok
++  bind(exit);
++
++  jr(RA);
++  delayed()->nop();
++
++  // handle errors
++  bind(error);
++  pushad();
++  call(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  delayed()->nop();
++  popad();
++  jr(RA);
++  delayed()->nop();
++}
++
++void MacroAssembler::verify_tlab(Register t1, Register t2) {
++#ifdef ASSERT
++  assert_different_registers(t1, t2, AT);
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++
++    get_thread(t1);
++
++    ld_ptr(t2, t1, in_bytes(JavaThread::tlab_top_offset()));
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_start_offset()));
++    sltu(AT, t2, AT);
++    beq(AT, R0, next);
++    delayed()->nop();
++
++    stop("assert(top >= start)");
++
++    bind(next);
++    ld_ptr(AT, t1, in_bytes(JavaThread::tlab_end_offset()));
++    sltu(AT, AT, t2);
++    beq(AT, R0, ok);
++    delayed()->nop();
++
++    stop("assert(top <= end)");
++
++    bind(ok);
++
++  }
++#endif
++}
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++  return RegisterOrConstant(value + offset);
++  Unimplemented();
++  //AddressLiteral a(delayed_value_addr);
++  // load indirectly to solve generation ordering problem
++  //movptr(tmp, ExternalAddress((address) delayed_value_addr));
++  //ld(tmp, a);
++  if (offset != 0)
++    daddiu(tmp,tmp, offset);
++
++  return RegisterOrConstant(tmp);
++}
++
++void MacroAssembler::hswap(Register reg) {
++  //short
++  //andi(reg, reg, 0xffff);
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  sra(reg, reg, 16);
++  orr(reg, reg, AT);
++}
++
++void MacroAssembler::huswap(Register reg) {
++  dsrl(AT, reg, 8);
++  dsll(reg, reg, 24);
++  dsrl(reg, reg, 16);
++  orr(reg, reg, AT);
++  andi(reg, reg, 0xffff);
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  srl(AT, reg, 8);
++  sll(reg, reg, 24);
++  orr(reg, reg, AT);
++  //reg : 4 1 2 3
++  srl(AT, AT, 16);
++  xorr(AT, AT, reg);
++  andi(AT, AT, 0xff);
++  //AT : 0 0 0 1^3);
++  xorr(reg, reg, AT);
++  //reg : 4 1 2 1
++  sll(AT, AT, 16);
++  xorr(reg, reg, AT);
++  //reg : 4 3 2 1
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register resflag, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++  bind(again);
++  lld(resflag, addr);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++  move(resflag, newval);
++  scd(resflag, addr);
++  beq(resflag, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg(Address addr, Register oldval, Register newval,
++                             Register tmp, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  lld(tmp, addr);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  scd(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval,
++                               Register resflag, bool sign, bool retold, bool barrier) {
++  assert(oldval != resflag, "oldval != resflag");
++  assert(newval != resflag, "newval != resflag");
++  Label again, succ, fail;
++  bind(again);
++  ll(resflag, addr);
++  if (!sign)
++    dinsu(resflag, R0, 32, 32);
++  bne(resflag, oldval, fail);
++  delayed()->nop();
++
++  move(resflag, newval);
++  sc(resflag, addr);
++  beq(resflag, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(fail);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, resflag);
++  move(resflag, R0);
++  bind(succ);
++}
++
++void MacroAssembler::cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                               bool sign, bool retold, bool barrier, Label& succ, Label* fail) {
++  assert(oldval != tmp, "oldval != tmp");
++  assert(newval != tmp, "newval != tmp");
++  Label again, neq;
++
++  bind(again);
++  ll(tmp, addr);
++  if (!sign)
++    dinsu(tmp, R0, 32, 32);
++  bne(tmp, oldval, neq);
++  delayed()->nop();
++  move(tmp, newval);
++  sc(tmp, addr);
++  beq(tmp, R0, again);
++  delayed()->nop();
++  b(succ);
++  delayed()->nop();
++
++  bind(neq);
++  if (barrier)
++    sync();
++  if (retold && oldval != R0)
++    move(oldval, tmp);
++  if (fail) {
++    b(*fail);
++    delayed()->nop();
++  }
++}
++
++void MacroAssembler::cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi) {
++  Label done, again, nequal;
++
++  Register x_reg = x_regLo;
++  dsll32(x_regHi, x_regHi, 0);
++  dsll32(x_regLo, x_regLo, 0);
++  dsrl32(x_regLo, x_regLo, 0);
++  orr(x_reg, x_regLo, x_regHi);
++
++  Register c_reg = c_regLo;
++  dsll32(c_regHi, c_regHi, 0);
++  dsll32(c_regLo, c_regLo, 0);
++  dsrl32(c_regLo, c_regLo, 0);
++  orr(c_reg, c_regLo, c_regHi);
++
++  bind(again);
++
++  if (UseSyncLevel >= 10000 || UseSyncLevel == 1000 || UseSyncLevel == 4000) sync();
++  lld(AT, dest);
++  bne(AT, c_reg, nequal);
++  delayed()->nop();
++
++  //move(AT, x_reg);
++  daddu(AT, x_reg, R0);
++  scd(AT, dest);
++  beq(AT, R0, again);
++  delayed()->nop();
++  b(done);
++  delayed()->nop();
++
++  // not xchged
++  bind(nequal);
++  sync();
++  //move(c_reg, AT);
++  //move(AT, R0);
++  daddu(c_reg, AT, R0);
++  daddu(AT, R0, R0);
++  bind(done);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_s(tmp, fs, ft);
++  trunc_l_s(tmp, tmp);
++  cvt_s_l(tmp, tmp);
++  mul_s(tmp, tmp, ft);
++  sub_s(fd, fs, tmp);
++}
++
++// be sure the three register is different
++void MacroAssembler::rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp) {
++  assert_different_registers(tmp, fs, ft);
++  div_d(tmp, fs, ft);
++  trunc_l_d(tmp, tmp);
++  cvt_d_l(tmp, tmp);
++  mul_d(tmp, tmp, ft);
++  sub_d(fd, fs, tmp);
++}
++
++#ifdef COMPILER2
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (Obj, Self, box, Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, res, tmp, nax scr) where tmp and scr are KILLED.
++//       fast_unlock (nax Obj, box, res, nax tmp) where tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++
++// obj: object to lock
++// box: on-stack box address (displaced header location)
++// tmp: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register resReg,
++                               Register tmpReg, Register scrReg) {
++  Label IsInflated, DONE, DONE_SET;
++
++  // Ensure the register assignents are disjoint
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastLock");
++
++  if (PrintBiasedLockingStatistics) {
++    atomic_inc32((address)BiasedLocking::total_entry_count_addr(), 1, tmpReg, scrReg);
++  }
++
++  if (EmitSync & 1) {
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 2) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL);
++      }
++
++      ld(tmpReg, Address(objReg, 0)) ;          // fetch markword
++      ori(tmpReg, tmpReg, 0x1);
++      sd(tmpReg, Address(boxReg, 0));           // Anticipate successful CAS
++
++      cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_LABEL); // Updates tmpReg
++      delayed()->nop();
++
++      // Recursive locking
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, (7 - os::vm_page_size() ));
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++      bind(DONE_LABEL) ;
++    } else {
++      // Possible cases that we'll encounter in fast_lock
++      // ------------------------------------------------
++      // * Inflated
++      //    -- unlocked
++      //    -- Locked
++      //       = by self
++      //       = by other
++      // * biased
++      //    -- by Self
++      //    -- by other
++      // * neutral
++      // * stack-locked
++      //    -- by self
++      //       = sp-proximity test hits
++      //       = sp-proximity test generates false-negative
++      //    -- by other
++      //
++
++      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++      // order to reduce the number of conditional branches in the most common cases.
++      // Beware -- there's a subtle invariant that fetch of the markword
++      // at [FETCH], below, will never observe a biased encoding (*101b).
++      // If this invariant is not held we risk exclusion (safety) failure.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, succ, NULL);
++        b(fail);
++        delayed()->nop();
++        bind(succ);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(fail);
++      }
++
++      ld(tmpReg, Address(objReg, 0)); //Fetch the markword of the object.
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      bne(AT, R0, IsInflated); // inflated vs stack-locked|neutral|bias
++      delayed()->nop();
++
++      // Attempt stack-locking ...
++      ori(tmpReg, tmpReg, markOopDesc::unlocked_value);
++      sd(tmpReg, Address(boxReg, 0)); // Anticipate successful CAS
++
++      if (PrintBiasedLockingStatistics) {
++        Label SUCC, FAIL;
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, SUCC, &FAIL); // Updates tmpReg
++        bind(SUCC);
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(FAIL);
++      } else {
++        // If cmpxchg is succ, then scrReg = 1
++        cmpxchg(Address(objReg, 0), tmpReg, boxReg, scrReg, true, false, DONE_SET); // Updates tmpReg
++      }
++
++      // Recursive locking
++      // The object is stack-locked: markword contains stack pointer to BasicLock.
++      // Locked by current thread if difference with current SP is less than one page.
++      dsubu(tmpReg, tmpReg, SP);
++      li(AT, 7 - os::vm_page_size());
++      andr(tmpReg, tmpReg, AT);
++      sd(tmpReg, Address(boxReg, 0));
++
++      if (PrintBiasedLockingStatistics) {
++        Label L;
++        // tmpReg == 0 => BiasedLocking::_fast_path_entry_count++
++        bne(tmpReg, R0, L);
++        delayed()->nop();
++        atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, AT, scrReg);
++        bind(L);
++      }
++      b(DONE);
++      delayed()->sltiu(resReg, tmpReg, 1); // resReg = (tmpReg == 0) ? 1 : 0
++
++      bind(IsInflated);
++      // The object's monitor m is unlocked iff m->owner == NULL,
++      // otherwise m->owner may contain a thread or a stack address.
++
++      // TODO: someday avoid the ST-before-CAS penalty by
++      // relocating (deferring) the following ST.
++      // We should also think about trying a CAS without having
++      // fetched _owner.  If the CAS is successful we may
++      // avoid an RTO->RTS upgrade on the $line.
++      // Without cast to int32_t a movptr will destroy r10 which is typically obj
++      li(AT, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      sd(AT, Address(boxReg, 0));
++
++      ld(AT, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      // if (m->owner != 0) => AT = 0, goto slow path.
++      bne(AT, R0, DONE_SET);
++      delayed()->ori(scrReg, R0, 0);
++
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++      // It's inflated and appears unlocked
++      cmpxchg(Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2), R0, TREG, scrReg, false, false) ;
++      // Intentional fall-through into DONE ...
++
++      bind(DONE_SET);
++      move(resReg, scrReg);
++
++      // DONE is a hot target - we'd really like to place it at the
++      // start of cache line by padding with NOPs.
++      // See the AMD and Intel software optimization manuals for the
++      // most efficient "long" NOP encodings.
++      // Unfortunately none of our alignment mechanisms suffice.
++      bind(DONE);
++      // At DONE the resReg is set as follows ...
++      // Fast_Unlock uses the same protocol.
++      // resReg == 1 -> Success
++      // resREg == 0 -> Failure - force control through the slow-path
++
++      // Avoid branch-to-branch on AMD processors
++      // This appears to be superstition.
++      if (EmitSync & 32) nop() ;
++
++    }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.
++// tmp: killed tmp; cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register resReg,
++                                 Register tmpReg, Register scrReg) {
++  Label DONE, DONE_SET, Stacked, Inflated;
++
++  guarantee(objReg != boxReg, "");
++  guarantee(objReg != tmpReg, "");
++  guarantee(objReg != scrReg, "");
++  guarantee(boxReg != tmpReg, "");
++  guarantee(boxReg != scrReg, "");
++
++  block_comment("FastUnlock");
++
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    move(AT, 0x0);
++    return;
++  } else
++    if (EmitSync & 8) {
++      Label DONE_LABEL ;
++      if (UseBiasedLocking) {
++        biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++      }
++      // classic stack-locking code ...
++      ld(tmpReg, Address(boxReg, 0)) ;
++      beq(tmpReg, R0, DONE_LABEL) ;
++      move(AT, 0x1);  // delay slot
++
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++      bind(DONE_LABEL);
++    } else {
++      Label CheckSucc;
++
++      // Critically, the biased locking test must have precedence over
++      // and appear before the (box->dhw == 0) recursive stack-lock test.
++      if (UseBiasedLocking && !UseOptoBiasInlining) {
++        Label succ, fail;
++        biased_locking_exit(objReg, tmpReg, succ);
++        b(fail);
++        delayed()->nop();
++        bind(succ);
++        b(DONE);
++        delayed()->ori(resReg, R0, 1);
++        bind(fail);
++      }
++
++      ld(tmpReg, Address(boxReg, 0)); // Examine the displaced header
++      beq(tmpReg, R0, DONE_SET); // 0 indicates recursive stack-lock
++      delayed()->sltiu(AT, tmpReg, 1);
++
++      ld(tmpReg, Address(objReg, 0)); // Examine the object's markword
++      andi(AT, tmpReg, markOopDesc::monitor_value);
++      beq(AT, R0, Stacked); // Inflated?
++      delayed()->nop();
++
++      bind(Inflated);
++      // It's inflated.
++      // Despite our balanced locking property we still check that m->_owner == Self
++      // as java routines or native JNI code called by this thread might
++      // have released the lock.
++      // Refer to the comments in synchronizer.cpp for how we might encode extra
++      // state in _succ so we can avoid fetching EntryList|cxq.
++      //
++      // I'd like to add more cases in fast_lock() and fast_unlock() --
++      // such as recursive enter and exit -- but we have to be wary of
++      // I$ bloat, T$ effects and BP$ effects.
++      //
++      // If there's no contention try a 1-0 exit.  That is, exit without
++      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++      // we detect and recover from the race that the 1-0 exit admits.
++      //
++      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++      // before it STs null into _owner, releasing the lock.  Updates
++      // to data protected by the critical section must be visible before
++      // we drop the lock (and thus before any other thread could acquire
++      // the lock and observe the fields protected by the lock).
++#ifndef OPT_THREAD
++      get_thread(TREG);
++#endif
++
++      // It's inflated
++      ld(scrReg, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2)) ;
++      xorr(scrReg, scrReg, TREG);
++
++      ld(AT, Address(tmpReg, ObjectMonitor::recursions_offset_in_bytes() - 2)) ;
++      orr(scrReg, scrReg, AT);
++
++      bne(scrReg, R0, DONE_SET);
++      delayed()->ori(AT, R0, 0);
++
++      ld(scrReg, Address(tmpReg, ObjectMonitor::cxq_offset_in_bytes() - 2));
++      ld(AT, Address(tmpReg, ObjectMonitor::EntryList_offset_in_bytes() - 2));
++      orr(scrReg, scrReg, AT);
++
++      bne(scrReg, R0, DONE_SET);
++      delayed()->ori(AT, R0, 0);
++
++      sync();
++      sd(R0, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes() - 2));
++      b(DONE);
++      delayed()->ori(resReg, R0, 1);
++
++      bind(Stacked);
++      ld(tmpReg, Address(boxReg, 0));
++      cmpxchg(Address(objReg, 0), boxReg, tmpReg, AT, false, false);
++
++      bind(DONE_SET);
++      move(resReg, AT);
++
++      if (EmitSync & 65536) {
++        bind (CheckSucc);
++      }
++
++      bind(DONE);
++
++      // Avoid branch to branch on AMD processors
++      if (EmitSync & 32768) { nop() ; }
++    }
++}
++#endif // COMPILER2
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++Register caller_saved_registers[] = {AT, V0, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++Register caller_saved_registers_except_v0[] = {AT, V1, A0, A1, A2, A3, A4, A5, A6, A7, T0, T1, T2, T3, T8, T9, GP, RA, FP};
++
++//In MIPS64, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {F0, F12, F13};
++
++// We preserve all caller-saved register
++void  MacroAssembler::pushad(){
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sd(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++)
++  {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++};
++
++void  MacroAssembler::popad(){
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = 0; i < len; i++)
++  {
++    ld(caller_saved_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++};
++
++// We preserve all caller-saved register except V0
++void MacroAssembler::pushad_except_v0() {
++  int i;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sd(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++
++  // Floating-point registers
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  daddiu(SP, SP, -1 * len * wordSize);
++  for (i = 0; i < len; i++) {
++    sdc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++}
++
++void MacroAssembler::popad_except_v0() {
++  int i;
++
++  // Floating-point registers
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    ldc1(caller_saved_fpu_registers[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++
++  // Fixed-point registers
++  len = sizeof(caller_saved_registers_except_v0) / sizeof(caller_saved_registers_except_v0[0]);
++  for (i = 0; i < len; i++) {
++    ld(caller_saved_registers_except_v0[i], SP, (len - i - 1) * wordSize);
++  }
++  daddiu(SP, SP, len * wordSize);
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  daddiu(SP, SP, -16);
++  sd(reg1, SP, 8);
++  sd(reg2, SP, 0);
++}
++
++void MacroAssembler::pop2(Register reg1, Register reg2) {
++  ld(reg1, SP, 8);
++  ld(reg2, SP, 0);
++  daddiu(SP, SP, 16);
++}
++
++// for UseCompressedOops Option
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    lwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else
++  ld(dst, src, oopDesc::klass_offset_in_bytes());
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if(UseCompressedClassPointers){
++    encode_klass_not_null(src);
++    sw(src, dst, oopDesc::klass_offset_in_bytes());
++  } else {
++    sd(src, dst, oopDesc::klass_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    sw(src, dst, oopDesc::klass_gap_offset_in_bytes());
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                                     Register tmp1, Register tmp2) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, tmp2);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Doesn't do verfication, generates fixed size code
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register tmp2, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, tmp2);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++}
++#endif
++
++
++// Algorithm must match oop.inline.hpp encode_heap_oop.
++void MacroAssembler::encode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(r, LogMinObjAlignmentInBytes);
++    }
++    return;
++  }
++
++  movz(r, S5_heapbase, r);
++  dsubu(r, r, S5_heapbase);
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      movz(dst, S5_heapbase, dst);
++      dsubu(dst, dst, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++    } else {
++      dsubu(dst, src, S5_heapbase);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shr(dst, LogMinObjAlignmentInBytes);
++      }
++      movz(dst, R0, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(r, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(r, r, S5_heapbase);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shr(r, LogMinObjAlignmentInBytes);
++  }
++
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should be compressed");
++#ifdef ASSERT
++  if (CheckCompressedOops) {
++    Label ok;
++    bne(src, R0, ok);
++    delayed()->nop();
++    stop("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++
++  if (Universe::narrow_oop_base() != NULL) {
++    dsubu(dst, src, S5_heapbase);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shr(dst, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      dsrl(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  }
++}
++
++void  MacroAssembler::decode_heap_oop(Register r) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++  } else {
++    move(AT, r);
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      shl(r, LogMinObjAlignmentInBytes);
++    }
++    daddu(r, r, S5_heapbase);
++    movz(r, R0, AT);
++  }
++  verify_oop(r, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      if (dst != src) nop(); // DON'T DELETE THIS GUY.
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++    } else {
++      if (dst != src) move(dst, src);
++    }
++  } else {
++    if (dst == src) {
++      move(AT, dst);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        shl(dst, LogMinObjAlignmentInBytes);
++      }
++      daddu(dst, dst, S5_heapbase);
++      movz(dst, R0, AT);
++    } else {
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        dsll(dst, src, LogMinObjAlignmentInBytes);
++        daddu(dst, dst, S5_heapbase);
++      } else {
++        daddu(dst, src, S5_heapbase);
++      }
++      movz(dst, R0, src);
++    }
++  }
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  // Note: it will change flags
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    shl(r, LogMinObjAlignmentInBytes);
++    if (Universe::narrow_oop_base() != NULL) {
++      daddu(r, r, S5_heapbase);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  //lea(dst, Address(S5_heapbase, src, Address::times_8, 0));
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes == Address::times_8) {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      daddu(dst, dst, S5_heapbase);
++    } else {
++      dsll(dst, src, LogMinObjAlignmentInBytes);
++      if (Universe::narrow_oop_base() != NULL) {
++        daddu(dst, dst, S5_heapbase);
++      }
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      move(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    assert(r != AT, "Encoding a klass in AT");
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    dsubu(r, r, AT);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shr(r, LogKlassAlignmentInBytes);
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      set64(dst, (int64_t)Universe::narrow_klass_base());
++      dsubu(dst, src, dst);
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        shr(dst, LogKlassAlignmentInBytes);
++      }
++    } else {
++      if (Universe::narrow_klass_shift() != 0) {
++        assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++        dsrl(dst, src, LogKlassAlignmentInBytes);
++      } else {
++        move(dst, src);
++      }
++    }
++  }
++}
++
++// Function instr_size_for_decode_klass_not_null() counts the instructions
++// generated by decode_klass_not_null(register r) and reinit_heapbase(),
++// when (Universe::heap() != NULL).  Hence, if the instructions they
++// generate change, then this method needs to be updated.
++int MacroAssembler::instr_size_for_decode_klass_not_null() {
++  assert (UseCompressedClassPointers, "only for compressed klass ptrs");
++  if (Universe::narrow_klass_base() != NULL) {
++    // mov64 + addq + shlq? + mov64  (for reinit_heapbase()).
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 9 : 4 * 10);
++  } else {
++    // longest load decode klass function, mov64, leaq
++    return (Universe::narrow_klass_shift() == 0 ? 4 * 0 : 4 * 1);
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != AT, "Decoding a klass in AT");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    shl(r, LogKlassAlignmentInBytes);
++  }
++  if (Universe::narrow_klass_base() != NULL) {
++    set64(AT, (int64_t)Universe::narrow_klass_base());
++    daddu(r, r, AT);
++    //Not neccessary for MIPS at all.
++    //reinit_heapbase();
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    set64(dst, (int64_t)Universe::narrow_klass_base());
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      dsll(AT, src, Address::times_8);
++      daddu(dst, dst, AT);
++    } else {
++      daddu(dst, src, dst);
++    }
++  }
++}
++
++void MacroAssembler::incrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     addu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { decrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  addu32(reg, reg, AT);
++}
++
++void MacroAssembler::decrementl(Register reg, int value) {
++  if (value == min_jint) {
++     move(AT, value);
++     subu32(reg, reg, AT);
++     return;
++  }
++  if (value <  0) { incrementl(reg, -value); return; }
++  if (value == 0) {                        ; return; }
++
++  move(AT, value);
++  subu32(reg, reg, AT);
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        move(S5_heapbase, R0);
++      } else {
++        set64(S5_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      set64(S5_heapbase, (intptr_t)Universe::narrow_ptrs_base_addr());
++      ld(S5_heapbase, S5_heapbase, 0);
++    }
++  }
++}
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++//implement ind   gen_subtype_check
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  beq(sub_klass, super_klass, *L_success);
++  delayed()->nop();
++  // Check the supertype display:
++  if (must_load_sco) {
++    lwu(temp_reg, super_klass, sco_offset);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  daddu(AT, sub_klass, super_check_offset.register_or_noreg());
++  ld(AT, AT, super_check_offset.constant_or_zero());
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    beq(super_klass, AT, *L_success);
++    delayed()->nop();
++    addiu(AT, super_check_offset.as_register(), -sc_offset);
++    if (L_failure == &L_fallthrough) {
++      beq(AT, R0, *L_slow_path);
++      delayed()->nop();
++    } else {
++      bne_far(AT, R0, *L_failure);
++      delayed()->nop();
++      b(*L_slow_path);
++      delayed()->nop();
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne(super_klass, AT, *L_slow_path);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      beq(super_klass, AT, *L_success);
++      delayed()->nop();
++    } else {
++      bne_far(super_klass, AT, *L_failure);
++      delayed()->nop();
++      b(*L_success);
++      delayed()->nop();
++    }
++  }
++
++  bind(L_fallthrough);
++
++}
++
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg)
++    temp2_reg = TSR;
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++#define IS_A_TEMP(reg) ((reg) == temp_reg || (reg) == temp2_reg)
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ld(temp_reg, secondary_supers_addr);
++  // Load the array length.
++  lw(temp2_reg, Address(temp_reg, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  daddiu(temp_reg, temp_reg, Array<Klass*>::base_offset_in_bytes());
++
++  // OpenJDK8 never compresses klass pointers in secondary-super array.
++  Label Loop, subtype;
++  bind(Loop);
++  beq(temp2_reg, R0, *L_failure);
++  delayed()->nop();
++  ld(AT, temp_reg, 0);
++  beq(AT, super_klass, subtype);
++  delayed()->daddiu(temp_reg, temp_reg, 1 * wordSize);
++  b(Loop);
++  delayed()->daddiu(temp2_reg, temp2_reg, -1);
++
++  bind(subtype);
++  sd(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    b(*L_success);
++    delayed()->nop();
++  }
++
++  // Success.  Cache the super we found and proceed in triumph.
++#undef IS_A_TEMP
++
++  bind(L_fallthrough);
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  sd(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = NOREG;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times_8;
++  }
++  // We don't push RA on stack in prepare_invoke.
++  //  offset += wordSize;           // return PC is on stack
++  if(scale_reg==NOREG) return Address(SP, offset);
++  else {
++  dsll(scale_reg, scale_reg, scale_factor);
++  daddu(scale_reg, SP, scale_reg);
++  return Address(scale_reg, offset);
++  }
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld(dst, src); break;
++  case  4:  lw(dst, src); break;
++  case  2:  is_signed ? lh(dst, src) : lhu(dst, src); break;
++  case  1:  is_signed ? lb( dst, src) : lbu( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  switch (size_in_bytes) {
++  case  8:  sd(src, dst); break;
++  case  4:  sw(src, dst); break;
++  case  2:  sh(src, dst); break;
++  case  1:  sb(src, dst); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, AT);
++  assert_different_registers(method_result, intf_klass, scan_temp, AT);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size() * wordSize;
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  lw(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  dsll(scan_temp, scan_temp, times_vte_scale);
++  daddu(scan_temp, recv_klass, scan_temp);
++  daddiu(scan_temp, scan_temp, vtable_base);
++  if (HeapWordsPerLong > 1) {
++    // Round up to align_object_offset boundary
++    // see code for InstanceKlass::start_of_itable!
++    round_to(scan_temp, BytesPerLong);
++  }
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    if (itable_index.is_constant()) {
++      set64(AT, (int)itable_index.is_constant());
++      dsll(AT, AT, (int)Address::times_ptr);
++    } else {
++      dsll(AT, itable_index.as_register(), (int)Address::times_ptr);
++    }
++    daddu(AT, AT, recv_klass);
++    daddiu(recv_klass, AT, itentry_off);
++  }
++
++  Label search, found_method;
++
++  for (int peel = 1; peel >= 0; peel--) {
++    ld(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++
++    if (peel) {
++      beq(intf_klass, method_result, found_method);
++      delayed()->nop();
++    } else {
++      bne(intf_klass, method_result, search);
++      delayed()->nop();
++      // (invert the test to fall through to found_method...)
++    }
++
++    if (!peel)  break;
++
++    bind(search);
++
++    // Check that the previous entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    beq(method_result, R0, L_no_such_interface);
++    delayed()->nop();
++    daddiu(scan_temp, scan_temp, scan_step);
++  }
++
++  bind(found_method);
++
++  if (return_method) {
++    // Got a hit.
++    lw(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    if (UseLEXT1) {
++      gsldx(method_result, recv_klass, scan_temp, 0);
++    } else {
++      daddu(AT, recv_klass, scan_temp);
++      ld(method_result, AT, 0);
++    }
++  }
++}
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  Register tmp = GP;
++  push(tmp);
++
++  if (vtable_index.is_constant()) {
++    assert_different_registers(recv_klass, method_result, tmp);
++  } else {
++    assert_different_registers(recv_klass, method_result, vtable_index.as_register(), tmp);
++  }
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++  if (vtable_index.is_constant()) {
++    set64(AT, vtable_index.as_constant());
++    dsll(AT, AT, (int)Address::times_ptr);
++  } else {
++    dsll(AT, vtable_index.as_register(), (int)Address::times_ptr);
++  }
++  set64(tmp, base + vtableEntry::method_offset_in_bytes());
++  daddu(tmp, tmp, AT);
++  daddu(tmp, tmp, recv_klass);
++  ld(method_result, tmp, 0);
++
++  pop(tmp);
++}
++
++void MacroAssembler::store_for_type_by_register(Register src_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        sw(src_reg, tmp_reg, disp);
++      } else {
++        st_ptr(src_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      st_ptr(src_reg, tmp_reg, disp);
++      break;
++    case T_INT:
++      sw(src_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++    case T_SHORT:
++      sh(src_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      sb(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(Register src_reg, Address addr, BasicType type, bool wide) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++}
++
++void MacroAssembler::store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      sdc1(src_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      swc1(src_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_for_type(FloatRegister src_reg, Address addr, BasicType type) {
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    store_for_type_by_register(src_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++}
++
++void MacroAssembler::load_for_type_by_register(Register dst_reg, Register tmp_reg, int disp, BasicType type, bool wide) {
++  switch (type) {
++    case T_LONG:
++      ld_ptr(dst_reg, tmp_reg, disp);
++      break;
++    case T_ARRAY:
++    case T_OBJECT:
++      if (UseCompressedOops && !wide) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_ADDRESS:
++      if (UseCompressedClassPointers && disp == oopDesc::klass_offset_in_bytes()) {
++        lwu(dst_reg, tmp_reg, disp);
++      } else {
++        ld_ptr(dst_reg, tmp_reg, disp);
++      }
++      break;
++    case T_INT:
++      lw(dst_reg, tmp_reg, disp);
++      break;
++    case T_CHAR:
++      lhu(dst_reg, tmp_reg, disp);
++      break;
++    case T_SHORT:
++      lh(dst_reg, tmp_reg, disp);
++      break;
++    case T_BYTE:
++    case T_BOOLEAN:
++      lb(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(Register dst_reg, Address addr, BasicType type, bool wide) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type, wide);
++  }
++
++  return code_offset;
++}
++
++#ifdef COMPILER2
++// Compare strings, used for char[] and byte[].
++void MacroAssembler::string_compare(Register str1, Register str2,
++                                    Register cnt1, Register cnt2, Register result,
++                                    int ae) {
++  Label L, Loop, haveResult, done;
++
++  bool isLL = ae == StrIntrinsicNode::LL;
++  bool isLU = ae == StrIntrinsicNode::LU;
++  bool isUL = ae == StrIntrinsicNode::UL;
++
++  bool str1_isL = isLL || isLU;
++  bool str2_isL = isLL || isUL;
++
++  if (!str1_isL) srl(cnt1, cnt1, 1);
++  if (!str2_isL) srl(cnt2, cnt2, 1);
++
++  // compute the and difference of lengths (in result)
++  subu(result, cnt1, cnt2); // result holds the difference of two lengths
++
++  // compute the shorter length (in cnt1)
++  slt(AT, cnt2, cnt1);
++  movn(cnt1, cnt2, AT);
++
++  // Now the shorter length is in cnt1 and cnt2 can be used as a tmp register
++  bind(Loop);                        // Loop begin
++  beq(cnt1, R0, done);
++  if (str1_isL) {
++    delayed()->lbu(AT, str1, 0);
++  } else {
++    delayed()->lhu(AT, str1, 0);
++  }
++
++  // compare current character
++  if (str2_isL) {
++    lbu(cnt2, str2, 0);
++  } else {
++    lhu(cnt2, str2, 0);
++  }
++  bne(AT, cnt2, haveResult);
++  delayed()->addiu(str1, str1, str1_isL ? 1 : 2);
++  addiu(str2, str2, str2_isL ? 1 : 2);
++  b(Loop);
++  delayed()->addiu(cnt1, cnt1, -1);   // Loop end
++
++  bind(haveResult);
++  subu(result, AT, cnt2);
++
++  bind(done);
++}
++
++// Compare char[] or byte[] arrays or substrings.
++void MacroAssembler::arrays_equals(Register str1, Register str2,
++                                   Register cnt, Register tmp, Register result,
++                                   bool is_char) {
++  Label Loop, True, False;
++
++  beq(str1, str2, True);  // same char[] ?
++  delayed()->daddiu(result, R0, 1);
++
++  beq(cnt, R0, True);
++  delayed()->nop(); // count == 0
++
++  bind(Loop);
++
++  // compare current character
++  if (is_char) {
++    lhu(AT, str1, 0);
++    lhu(tmp, str2, 0);
++  } else {
++    lbu(AT, str1, 0);
++    lbu(tmp, str2, 0);
++  }
++  bne(AT, tmp, False);
++  delayed()->addiu(str1, str1, is_char ? 2 : 1);
++  addiu(cnt, cnt, -1);
++  bne(cnt, R0, Loop);
++  delayed()->addiu(str2, str2, is_char ? 2 : 1);
++
++  b(True);
++  delayed()->nop();
++
++  bind(False);
++  daddiu(result, R0, 0);
++
++  bind(True);
++}
++#endif // COMPILER2
++
++void MacroAssembler::load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type) {
++  switch (type) {
++    case T_DOUBLE:
++      ldc1(dst_reg, tmp_reg, disp);
++      break;
++    case T_FLOAT:
++      lwc1(dst_reg, tmp_reg, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++int MacroAssembler::load_for_type(FloatRegister dst_reg, Address addr, BasicType type) {
++  int code_offset = 0;
++  Register tmp_reg = T9;
++  Register index_reg = addr.index();
++  if (index_reg == NOREG) {
++    tmp_reg = NOREG;
++  }
++
++  int scale = addr.scale();
++  if (tmp_reg != NOREG && scale >= 0) {
++    dsll(tmp_reg, index_reg, scale);
++  }
++
++  int disp = addr.disp();
++  bool disp_is_simm16 = true;
++  if (!Assembler::is_simm16(disp)) {
++    disp_is_simm16 = false;
++  }
++
++  Register base_reg = addr.base();
++  if (tmp_reg != NOREG) {
++    assert_different_registers(tmp_reg, base_reg, index_reg);
++  }
++
++  if (tmp_reg != NOREG) {
++    daddu(tmp_reg, base_reg, tmp_reg);
++    if (!disp_is_simm16) {
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, tmp_reg, disp_is_simm16 ? disp : 0, type);
++  } else {
++    if (!disp_is_simm16) {
++      tmp_reg = T9;
++      assert_different_registers(tmp_reg, base_reg);
++      move(tmp_reg, disp);
++      daddu(tmp_reg, base_reg, tmp_reg);
++    }
++    code_offset = offset();
++    load_for_type_by_register(dst_reg, disp_is_simm16 ? base_reg : tmp_reg, disp_is_simm16 ? disp : 0, type);
++  }
++
++  return code_offset;
++}
++
++void MacroAssembler::clear_jweak_tag(Register possibly_jweak) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  // The inverted mask is sign-extended
++  move(AT, inverted_jweak_mask);
++  andr(possibly_jweak, AT, possibly_jweak);
++}
++
++void MacroAssembler::resolve_jobject(Register value,
++                                     Register thread,
++                                     Register tmp) {
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  beq(value, R0, done);                // Use NULL as-is.
++  delayed()->nop();
++  move(AT, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  andr(AT, value, AT);
++  beq(AT, R0, not_weak);
++  delayed()->nop();
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  b(done);
++  delayed()->nop();
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++void MacroAssembler::cmp_cmov(Register  op1,
++                              Register  op2,
++                              Register  dst,
++                              Register  src,
++                              CMCompare cmp,
++                              bool      is_signed) {
++  switch (cmp) {
++    case EQ:
++      subu(AT, op1, op2);
++      movz(dst, src, AT);
++      break;
++
++    case NE:
++      subu(AT, op1, op2);
++      movn(dst, src, AT);
++      break;
++
++    case GT:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case GE:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movz(dst, src, AT);
++      break;
++
++    case LT:
++      if (is_signed) {
++        slt(AT, op1, op2);
++      } else {
++        sltu(AT, op1, op2);
++      }
++      movn(dst, src, AT);
++      break;
++
++    case LE:
++      if (is_signed) {
++        slt(AT, op2, op1);
++      } else {
++        sltu(AT, op2, op1);
++      }
++      movz(dst, src, AT);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              Register      dst,
++                              Register      src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case NE:
++      if (is_float) {
++        c_eq_s(op1, op2);
++      } else {
++        c_eq_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GT:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case GE:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movf(dst, src);
++      break;
++
++    case LT:
++      if (is_float) {
++        c_ult_s(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    case LE:
++      if (is_float) {
++        c_ule_s(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      movt(dst, src);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(FloatRegister op1,
++                              FloatRegister op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  switch(cmp) {
++    case EQ:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case NE:
++      if (!is_float) {
++        c_eq_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_eq_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GT:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case GE:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movf_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movf_s(dst, src);
++      }
++      break;
++
++    case LT:
++      if (!is_float) {
++        c_ult_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ult_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    case LE:
++      if (!is_float) {
++        c_ule_d(op1, op2);
++        movt_d(dst, src);
++      } else {
++        c_ule_s(op1, op2);
++        movt_s(dst, src);
++      }
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmp_cmov(Register      op1,
++                              Register      op2,
++                              FloatRegister dst,
++                              FloatRegister src,
++                              CMCompare     cmp,
++                              bool          is_float) {
++  Label L;
++
++  switch(cmp) {
++    case EQ:
++      bne(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case NE:
++      beq(op1, op2, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GT:
++      slt(AT, op2, op1);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case GE:
++      slt(AT, op1, op2);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LT:
++      slt(AT, op1, op2);
++      beq(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    case LE:
++      slt(AT, op2, op1);
++      bne(AT, R0, L);
++      delayed()->nop();
++      if (is_float) {
++        mov_s(dst, src);
++      } else {
++        mov_d(dst, src);
++      }
++      bind(L);
++      break;
++
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::gs_loadstore(Register reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      gssbx(reg, base, index, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      gsshx(reg, base, index, disp);
++      break;
++    case STORE_INT:
++      gsswx(reg, base, index, disp);
++      break;
++    case STORE_LONG:
++      gssdx(reg, base, index, disp);
++      break;
++    case LOAD_BYTE:
++      gslbx(reg, base, index, disp);
++      break;
++    case LOAD_SHORT:
++      gslhx(reg, base, index, disp);
++      break;
++    case LOAD_INT:
++      gslwx(reg, base, index, disp);
++      break;
++    case LOAD_LONG:
++      gsldx(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      gsswxc1(reg, base, index, disp);
++      break;
++    case STORE_DOUBLE:
++      gssdxc1(reg, base, index, disp);
++      break;
++    case LOAD_FLOAT:
++      gslwxc1(reg, base, index, disp);
++      break;
++    case LOAD_DOUBLE:
++      gsldxc1(reg, base, index, disp);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::loadstore(Register reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_BYTE:
++      sb(reg, base, disp);
++      break;
++    case STORE_CHAR:
++    case STORE_SHORT:
++      sh(reg, base, disp);
++      break;
++    case STORE_INT:
++      sw(reg, base, disp);
++      break;
++    case STORE_LONG:
++      sd(reg, base, disp);
++      break;
++    case LOAD_BYTE:
++      lb(reg, base, disp);
++      break;
++    case LOAD_U_BYTE:
++      lbu(reg, base, disp);
++      break;
++    case LOAD_SHORT:
++      lh(reg, base, disp);
++      break;
++    case LOAD_U_SHORT:
++      lhu(reg, base, disp);
++      break;
++    case LOAD_INT:
++      lw(reg, base, disp);
++      break;
++    case LOAD_U_INT:
++      lwu(reg, base, disp);
++      break;
++    case LOAD_LONG:
++      ld(reg, base, disp);
++      break;
++    case LOAD_LINKED_LONG:
++      lld(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
++
++void MacroAssembler::loadstore(FloatRegister reg, Register base, int disp, int type) {
++  switch (type) {
++    case STORE_FLOAT:
++      swc1(reg, base, disp);
++      break;
++    case STORE_DOUBLE:
++      sdc1(reg, base, disp);
++      break;
++    case LOAD_FLOAT:
++      lwc1(reg, base, disp);
++      break;
++    case LOAD_DOUBLE:
++      ldc1(reg, base, disp);
++      break;
++     default:
++       ShouldNotReachHere();
++    }
++}
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.hpp
+new file mode 100644
+index 0000000000..55ec29e91b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.hpp
+@@ -0,0 +1,818 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/rtmLocking.hpp"
++#include "utilities/macros.hpp"
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++  friend class Runtime1;      // as_Address()
++
++ public:
++  // Compare code
++  typedef enum {
++    EQ = 0x01,
++    NE = 0x02,
++    GT = 0x03,
++    GE = 0x04,
++    LT = 0x05,
++    LE = 0x06
++  } CMCompare;
++
++ protected:
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  #define VIRTUAL virtual
++
++  VIRTUAL void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than TREG will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than sp will be used instead.
++  VIRTUAL void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++  // helpers for FPU flag access
++  // tmp is a temporary register, if none is available use noreg
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  Address as_Address(AddressLiteral adr);
++  Address as_Address(ArrayAddress adr);
++
++  static intptr_t  i[32];
++  static float  f[32];
++  static void print(outputStream *s);
++
++  static int i_offset(unsigned int k);
++  static int f_offset(unsigned int k);
++
++  static void save_registers(MacroAssembler *masm);
++  static void restore_registers(MacroAssembler *masm);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  void pd_patch_instruction(address branch, address target);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++  void incrementl(Register reg, int value = 1);
++  void decrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // Super call_VM calls - correspond to MacroAssembler::call_VM(_leaf) calls
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++
++  // last Java Frame (fills frame anchor)
++  void set_last_Java_frame(Register thread,
++                           Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  // thread in the default location (S6)
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc);
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++
++  // thread in the default location (S6)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // jobjects
++  void clear_jweak_tag(Register possibly_jweak);
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  void resolve_oop_handle(Register result, Register tmp);
++  void load_mirror(Register dst, Register method, Register tmp);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register thread_tmp);
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp2);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp2 = noreg, DecoratorSet decorators = 0);
++
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  void encode_heap_oop(Register r);
++  void encode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r);
++  void decode_heap_oop(Register dst, Register src);
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++
++  // Returns the byte size of the instructions generated by decode_klass_not_null()
++  // when compressed klass pointers are being used.
++  static int instr_size_for_decode_klass_not_null();
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_oop(Register dst, jobject obj);
++
++
++
++
++  // Sign extension
++  void sign_extend_short(Register reg)   { /*dsll32(reg, reg, 16); dsra32(reg, reg, 16);*/ seh(reg, reg); }
++  void sign_extend_byte(Register reg)  { /*dsll32(reg, reg, 24); dsra32(reg, reg, 24);*/ seb(reg, reg); }
++  void rem_s(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++  void rem_d(FloatRegister fd, FloatRegister fs, FloatRegister ft, FloatRegister tmp);
++
++  // allocation
++  void eden_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void incr_allocated_bytes(Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes,
++                            Register t1 = noreg);
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++  void verify_oop_subroutine();
++  // TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++  #define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++  #define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++
++  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg/*, RegistersForDebugging* regs*/);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++  void print_reg(Register reg);
++  void print_reg(FloatRegister reg);
++
++  void untested()                                { stop("untested"); }
++
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here()                   { stop("should not reach here"); }
++
++  void print_CPU_state();
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 32768) {
++      sw(A0, SP, -offset);
++    } else {
++      li(AT, offset);
++      dsubu(AT, SP, AT);
++      sw(A0, AT, 0);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg);
++  void safepoint_poll_acquire(Label& slow_path, Register thread_reg);
++
++  //void verify_tlab();
++  void verify_tlab(Register t1, Register t2);
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // tmp_reg is optional. If it is supplied (i.e., != noreg) it will
++  // be killed; if not supplied, push/pop will be used internally to
++  // allocate a temporary (inefficient, avoid if possible).
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++#ifdef COMPILER2
++  void fast_lock(Register obj, Register box, Register res, Register tmp, Register scr);
++  void fast_unlock(Register obj, Register box, Register res, Register tmp, Register scr);
++#endif
++
++
++  // Arithmetics
++  // Regular vs. d* versions
++  inline void addu_long(Register rd, Register rs, Register rt) {
++    daddu(rd, rs, rt);
++  }
++  inline void addu_long(Register rd, Register rs, long imm32_64) {
++    daddiu(rd, rs, imm32_64);
++  }
++
++  void round_to(Register reg, int modulus) {
++    assert_different_registers(reg, AT);
++    increment(reg, modulus - 1);
++    move(AT, - modulus);
++    andr(reg, reg, AT);
++  }
++
++  // the follow two might use AT register, be sure you have no meanful data in AT before you call them
++  void increment(Register reg, int imm);
++  void decrement(Register reg, int imm);
++
++  void shl(Register reg, int sa)        { dsll(reg, reg, sa); }
++  void shr(Register reg, int sa)        { dsrl(reg, reg, sa); }
++  void sar(Register reg, int sa)        { dsra(reg, reg, sa); }
++
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // Calls
++  void call(address entry);
++  void call(address entry, relocInfo::relocType rtype);
++  void call(address entry, RelocationHolder& rh);
++
++  address trampoline_call(AddressLiteral entry, CodeBuffer *cbuf = NULL);
++
++  // Emit the CompiledIC call idiom
++  void ic_call(address entry, jint method_index = 0);
++
++  // Jumps
++  void jmp(address entry);
++  void jmp(address entry, relocInfo::relocType rtype);
++  void jmp_far(Label& L); // always long jumps
++
++  /* branches may exceed 16-bit offset */
++  void b_far(address entry);
++  void b_far(Label& L);
++
++  void bne_far    (Register rs, Register rt, address entry);
++  void bne_far    (Register rs, Register rt, Label& L);
++
++  void beq_far    (Register rs, Register rt, address entry);
++  void beq_far    (Register rs, Register rt, Label& L);
++
++  // For C2 to support long branches
++  void beq_long   (Register rs, Register rt, Label& L);
++  void bne_long   (Register rs, Register rt, Label& L);
++  void bc1t_long  (Label& L);
++  void bc1f_long  (Label& L);
++
++  void patchable_call(address target);
++  void general_call(address target);
++
++  void patchable_jump(address target);
++  void general_jump(address target);
++
++  static int insts_for_patchable_call(address target);
++  static int insts_for_general_call(address target);
++
++  static int insts_for_patchable_jump(address target);
++  static int insts_for_general_jump(address target);
++
++  // Floating
++  // Data
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a) {
++    ld(rt, a);
++  }
++
++  inline void ld_ptr(Register rt, Register base, int offset16) {
++    ld(rt, base, offset16);
++  }
++
++  // st_ptr will perform sw for 32 bit VMs and sd for 64 bit VMs
++  inline void st_ptr(Register rt, Address a) {
++    sd(rt, a);
++  }
++
++  inline void st_ptr(Register rt, Register base, int offset16) {
++    sd(rt, base, offset16);
++  }
++
++  void ld_ptr(Register rt, Register base, Register offset);
++  void st_ptr(Register rt, Register base, Register offset);
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++
++  // implement the x86 instruction semantic
++  // if c_reg == *dest then *dest <= x_reg
++  // else c_reg <= *dest
++  // the AT indicate if xchg occurred, 1 for xchged, else  0
++  void cmpxchg(Address addr, Register oldval, Register newval, Register resflag,
++               bool retold, bool barrier);
++  void cmpxchg(Address addr, Register oldval, Register newval, Register tmp,
++               bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register resflag,
++                 bool sign, bool retold, bool barrier);
++  void cmpxchg32(Address addr, Register oldval, Register newval, Register tmp,
++                 bool sign, bool retold, bool barrier, Label& succ, Label* fail = NULL);
++  void cmpxchg8(Register x_regLo, Register x_regHi, Address dest, Register c_regLo, Register c_regHi);
++
++  //pop & push
++  void extend_sign(Register rh, Register rl) { stop("extend_sign"); }
++  void neg(Register reg) { dsubu(reg, R0, reg); }
++  void push (Register reg)      { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void push (FloatRegister reg) { daddiu(SP, SP, -8); sdc1(reg, SP, 0); }
++  void pop  (Register reg)      { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  (FloatRegister reg) { ldc1(reg, SP, 0);  daddiu(SP, SP, 8); }
++  void pop  ()                  { daddiu(SP, SP, 8); }
++  void pop2 ()                  { daddiu(SP, SP, 16); }
++  void push2(Register reg1, Register reg2);
++  void pop2 (Register reg1, Register reg2);
++  void dpush (Register reg)     { daddiu(SP, SP, -8); sd  (reg, SP, 0); }
++  void dpop  (Register reg)     { ld  (reg, SP, 0);  daddiu(SP, SP, 8); }
++  //we need 2 fun to save and resotre general register
++  void pushad();
++  void popad();
++  void pushad_except_v0();
++  void popad_except_v0();
++
++  //move an 32-bit immediate to Register
++  void move(Register reg, int imm32)  { li32(reg, imm32); }
++  void li  (Register rd, long imm);
++  void li  (Register rd, address addr) { li(rd, (long)addr); }
++  //replace move(Register reg, int imm)
++  void li32(Register rd, int imm32); // sign-extends to 64 bits on mips64
++  void set64(Register d, jlong value);
++  static int  insts_for_set64(jlong value);
++
++  void patchable_set48(Register d, jlong value);
++  void patchable_set32(Register d, jlong value);
++
++  void patchable_call32(Register d, jlong value);
++
++  static int call_size(address target, bool far, bool patchable);
++
++  static bool reachable_from_cache(address target);
++  static bool reachable_from_cache();
++
++
++  void dli(Register rd, long imm) { li(rd, imm); }
++  void li64(Register rd, long imm);
++  void li48(Register rd, long imm);
++
++  void move(Register rd, Register rs)   { daddu(rd, rs, R0); }
++  void move_u32(Register rd, Register rs)   { addu32(rd, rs, R0); }
++  void dmove(Register rd, Register rs)  { daddu(rd, rs, R0); }
++  void mov_metadata(Register dst, Metadata* obj);
++  void mov_metadata(Address dst, Metadata* obj);
++
++  void store_for_type_by_register(Register src_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void store_for_type_by_register(FloatRegister src_reg, Register tmp_reg, int disp, BasicType type);
++  void store_for_type(Register src_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  void store_for_type(FloatRegister src_reg, Address addr, BasicType type = T_INT);
++  void load_for_type_by_register(Register dst_reg,      Register tmp_reg, int disp, BasicType type, bool wide);
++  void load_for_type_by_register(FloatRegister dst_reg, Register tmp_reg, int disp, BasicType type);
++  int load_for_type(Register dst_reg,      Address addr, BasicType type = T_INT, bool wide = false);
++  int load_for_type(FloatRegister dst_reg, Address addr, BasicType type = T_INT);
++
++#ifndef PRODUCT
++  static void pd_print_patched_instruction(address branch) {
++    jint stub_inst = *(jint*) branch;
++    print_instruction(stub_inst);
++    ::tty->print("%s", " (unresolved)");
++
++  }
++#endif
++
++  //FIXME
++  void empty_FPU_stack(){/*need implemented*/};
++
++#ifdef COMPILER2
++  // Compare strings.
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      int ae);
++
++  // Compare char[] or byte[] arrays.
++  void arrays_equals(Register str1, Register str2,
++                     Register cnt, Register tmp, Register result,
++                     bool is_char);
++#endif
++
++  // method handles (JSR 292)
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++  // Conditional move
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool      is_signed = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                Register        dst,
++                Register        src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(FloatRegister   op1,
++                FloatRegister   op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++  void cmp_cmov(Register        op1,
++                Register        op2,
++                FloatRegister   dst,
++                FloatRegister   src,
++                CMCompare       cmp = EQ,
++                bool       is_float = true);
++
++#undef VIRTUAL
++
++public:
++
++// Memory Data Type
++#define INT_TYPE 0x100
++#define FLOAT_TYPE 0x200
++#define SIGNED_TYPE 0x10
++#define UNSIGNED_TYPE 0x20
++
++  typedef enum {
++    LOAD_BYTE        = INT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_CHAR        = INT_TYPE | SIGNED_TYPE | 0x2,
++    LOAD_SHORT       = INT_TYPE | SIGNED_TYPE | 0x3,
++    LOAD_INT         = INT_TYPE | SIGNED_TYPE | 0x4,
++    LOAD_LONG        = INT_TYPE | SIGNED_TYPE | 0x5,
++    STORE_BYTE       = INT_TYPE | SIGNED_TYPE | 0x6,
++    STORE_CHAR       = INT_TYPE | SIGNED_TYPE | 0x7,
++    STORE_SHORT      = INT_TYPE | SIGNED_TYPE | 0x8,
++    STORE_INT        = INT_TYPE | SIGNED_TYPE | 0x9,
++    STORE_LONG       = INT_TYPE | SIGNED_TYPE | 0xa,
++    LOAD_LINKED_LONG = INT_TYPE | SIGNED_TYPE | 0xb,
++
++    LOAD_U_BYTE      = INT_TYPE | UNSIGNED_TYPE | 0x1,
++    LOAD_U_SHORT     = INT_TYPE | UNSIGNED_TYPE | 0x2,
++    LOAD_U_INT       = INT_TYPE | UNSIGNED_TYPE | 0x3,
++
++    LOAD_FLOAT       = FLOAT_TYPE | SIGNED_TYPE | 0x1,
++    LOAD_DOUBLE      = FLOAT_TYPE | SIGNED_TYPE | 0x2,
++    STORE_FLOAT      = FLOAT_TYPE | SIGNED_TYPE | 0x3,
++    STORE_DOUBLE     = FLOAT_TYPE | SIGNED_TYPE | 0x4
++  } CMLoadStoreDataType;
++
++  void loadstore_enc(Register reg, int base, int index, int scale, int disp, int type) {
++    assert((type & INT_TYPE), "must be General reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++  void loadstore_enc(FloatRegister reg, int base, int index, int scale, int disp, int type) {
++    assert((type & FLOAT_TYPE), "must be Float reg type");
++    loadstore_t(reg, base, index, scale, disp, type);
++  }
++
++private:
++
++  template <typename T>
++  void loadstore_t(T reg, int base, int index, int scale, int disp, int type) {
++    if (index != 0) {
++      if (Assembler::is_simm16(disp)) {
++        if (UseLEXT1 && (type & SIGNED_TYPE) && Assembler::is_simm(disp, 8)) {
++          if (scale == 0) {
++            gs_loadstore(reg, as_Register(base), as_Register(index), disp, type);
++          } else {
++            dsll(AT, as_Register(index), scale);
++            gs_loadstore(reg, as_Register(base), AT, disp, type);
++          }
++        } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          loadstore(reg, AT, disp, type);
++        }
++      } else {
++          if (scale == 0) {
++            addu(AT, as_Register(base), as_Register(index));
++          } else {
++            dsll(AT, as_Register(index), scale);
++            addu(AT, as_Register(base), AT);
++          }
++          move(RT9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, AT, RT9, 0, type);
++          } else {
++            addu(AT, AT, RT9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++      } else {
++        if (Assembler::is_simm16(disp)) {
++          loadstore(reg, as_Register(base), disp, type);
++        } else {
++          move(RT9, disp);
++          if (UseLEXT1 && (type & SIGNED_TYPE)) {
++            gs_loadstore(reg, as_Register(base), RT9, 0, type);
++          } else {
++            addu(AT, as_Register(base), RT9);
++            loadstore(reg, AT, 0, type);
++          }
++        }
++    }
++  }
++  void loadstore(Register reg, Register base, int disp, int type);
++  void loadstore(FloatRegister reg, Register base, int disp, int type);
++  void gs_loadstore(Register reg, Register base, Register index, int disp, int type);
++  void gs_loadstore(FloatRegister reg, Register base, Register index, int disp, int type);
++};
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++private:
++  MacroAssembler* _masm;
++  Label _label;
++
++public:
++  inline SkipIfEqual(MacroAssembler* masm, const bool* flag_addr, bool value)
++    : _masm(masm) {
++    _masm->li(AT, (address)flag_addr);
++    _masm->lb(AT, AT, 0);
++    if (value) {
++      _masm->bne(AT, R0, _label);
++    } else {
++      _masm->beq(AT, R0, _label);
++    }
++    _masm->delayed()->nop();
++  }
++
++  ~SkipIfEqual();
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp
+new file mode 100644
+index 0000000000..92c05fb726
+--- /dev/null
++++ b/src/hotspot/cpu/mips/macroAssembler_mips.inline.hpp
+@@ -0,0 +1,34 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++#endif // CPU_MIPS_VM_MACROASSEMBLER_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/methodHandles_mips.cpp b/src/hotspot/cpu/mips/methodHandles_mips.cpp
+new file mode 100644
+index 0000000000..e9788ac52c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/methodHandles_mips.cpp
+@@ -0,0 +1,576 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/preserveException.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) // nothing
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ld(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ lw(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ sra(temp, temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT);
++  __ move(AT, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK);
++  __ andr(temp, temp, AT);
++  __ move(AT, ref_kind);
++  __ beq(temp, AT, L);
++  __ delayed()->nop();
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ STOP(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ bind(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {
++  assert(method == Rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++  __ beq(method, R0, L_no_such_method);
++  __ delayed()->nop();
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    Register rthread = TREG;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ lbu(AT, rthread, in_bytes(JavaThread::interp_only_mode_offset()));
++    __ beq(AT, R0, run_compiled_code);
++    __ delayed()->nop();
++    __ ld(T9, method, in_bytes(Method::interpreter_entry_offset()));
++    __ jr(T9);
++    __ delayed()->nop();
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ ld(T9, method, in_bytes(entry_offset));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_method);
++  address wrong_method = StubRoutines::throw_AbstractMethodError_entry();
++  __ jmp(wrong_method, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2);
++  assert(recv != noreg, "required register");
++  assert(method_temp == Rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())));
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())), noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ld(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    Address recv_addr = __ argument_address(temp2, -1);
++    __ ld(AT, recv_addr);
++    __ beq(recv, AT, L);
++    __ delayed()->nop();
++
++    recv_addr = __ argument_address(temp2, -1);
++    __ ld(V0, recv_addr);
++    __ STOP("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // Rmethod: Method*
++  // T9: argument locator (parameter slot count, added to sp)
++  // S7: used as temp to hold mh or receiver
++  Register t9_argp   = T9;   // argument list ptr, live on error paths
++  Register s7_mh     = S7;   // MH receiver; dies quickly and is recycled
++  Register rm_method = Rmethod;   // eventual target of this invocation
++
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ lhu(AT, rm_method, Method::intrinsic_id_offset_in_bytes());
++    guarantee(Assembler::is_simm16(iid), "Oops, iid is not simm16! Change the instructions.");
++    __ addiu(AT, AT, -1 * (int) iid);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ STOP("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address t9_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ld(t9_argp, Address(rm_method, Method::const_offset()));
++    __ load_sized_value(t9_argp,
++                        Address(t9_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    t9_first_arg_addr = __ argument_address(t9_argp, -1);
++  } else {
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ld(s7_mh, t9_first_arg_addr);
++    DEBUG_ONLY(t9_argp = noreg);
++  }
++
++  // t9_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, s7_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register r_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ld(r_recv = T2, t9_first_arg_addr);
++    }
++    DEBUG_ONLY(t9_argp = noreg);
++    Register rm_member = rm_method;  // MemberName ptr; incoming method ptr is dead now
++    __ pop(rm_member);         // extract last argument
++    generate_method_handle_dispatch(_masm, iid, r_recv, rm_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rm_method = Rmethod;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++  Register j_rarg0 = T0;
++  Register j_rarg1 = A0;
++  Register j_rarg2 = A1;
++  Register j_rarg3 = A2;
++  Register j_rarg4 = A3;
++  Register j_rarg5 = A4;
++
++  Register temp1 = T8;
++  Register temp2 = T9;
++  Register temp3 = V0;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rm_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( rm_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ STOP("receiver class disagrees with MemberName.clazz");
++        __ bind(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rm_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ slt(AT, R0, temp2_index);
++        __ bne(AT, R0, L_index_ok);
++        __ delayed()->nop();
++        __ STOP("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rm_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rm_index = rm_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rm_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ slt(AT, rm_index, R0);
++        __ beq(AT, R0, L);
++        __ delayed()->nop();
++        __ STOP("invalid vtable index for MH.invokeInterface");
++        __ bind(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rm_index, rm_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rm_method
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that r_recv be shifted out.
++    __ verify_method_ptr(rm_method);
++    jump_from_method_handle(_masm, rm_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      address icce_entry= StubRoutines::throw_IncompatibleClassChangeError_entry();
++      __ jmp(icce_entry, relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "s7_mh" : "s7";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++      }
++    }
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {
++}
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/mips/methodHandles_mips.hpp b/src/hotspot/cpu/mips/methodHandles_mips.hpp
+new file mode 100644
+index 0000000000..03b65fc8ef
+--- /dev/null
++++ b/src/hotspot/cpu/mips/methodHandles_mips.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++// Additional helper methods for MethodHandles code generation:
++public:
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return I29;
++  }
+diff --git a/src/hotspot/cpu/mips/mips.ad b/src/hotspot/cpu/mips/mips.ad
+new file mode 100644
+index 0000000000..3563bbe0e5
+--- /dev/null
++++ b/src/hotspot/cpu/mips/mips.ad
+@@ -0,0 +1,25 @@
++//
++// Copyright (c) 2011, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
+diff --git a/src/hotspot/cpu/mips/mips_64.ad b/src/hotspot/cpu/mips/mips_64.ad
+new file mode 100644
+index 0000000000..b4acbd83f7
+--- /dev/null
++++ b/src/hotspot/cpu/mips/mips_64.ad
+@@ -0,0 +1,12243 @@
++//
++// Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// GodSon3 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++register %{
++
++// General Registers
++// Integer Registers
++  reg_def R0      ( NS,  NS,   Op_RegI,  0, VMRegImpl::Bad());
++  reg_def AT    ( NS,  NS,   Op_RegI,  1, AT->as_VMReg());
++  reg_def AT_H    ( NS,  NS,  Op_RegI,  1, AT->as_VMReg()->next());
++  reg_def V0    (SOC, SOC,  Op_RegI,  2, V0->as_VMReg());
++  reg_def V0_H  (SOC, SOC,  Op_RegI,  2, V0->as_VMReg()->next());
++  reg_def V1    (SOC, SOC,  Op_RegI,  3, V1->as_VMReg());
++  reg_def V1_H  (SOC, SOC,  Op_RegI,  3, V1->as_VMReg()->next());
++  reg_def A0    (SOC, SOC,  Op_RegI,  4, A0->as_VMReg());
++  reg_def A0_H  (SOC, SOC,  Op_RegI,  4, A0->as_VMReg()->next());
++  reg_def A1    (SOC, SOC,  Op_RegI,  5, A1->as_VMReg());
++  reg_def A1_H  (SOC, SOC,  Op_RegI,  5, A1->as_VMReg()->next());
++  reg_def A2    (SOC, SOC,  Op_RegI,  6, A2->as_VMReg());
++  reg_def A2_H  (SOC, SOC,  Op_RegI,  6, A2->as_VMReg()->next());
++  reg_def A3    (SOC, SOC,  Op_RegI,  7, A3->as_VMReg());
++  reg_def A3_H  (SOC, SOC,  Op_RegI,  7, A3->as_VMReg()->next());
++  reg_def A4    (SOC, SOC,  Op_RegI,  8, A4->as_VMReg());
++  reg_def A4_H  (SOC, SOC,  Op_RegI,  8, A4->as_VMReg()->next());
++  reg_def A5    (SOC, SOC,  Op_RegI,  9, A5->as_VMReg());
++  reg_def A5_H  (SOC, SOC,  Op_RegI,  9, A5->as_VMReg()->next());
++  reg_def A6    (SOC, SOC,  Op_RegI,  10, A6->as_VMReg());
++  reg_def A6_H  (SOC, SOC,  Op_RegI,  10, A6->as_VMReg()->next());
++  reg_def A7    (SOC, SOC,  Op_RegI,  11, A7->as_VMReg());
++  reg_def A7_H  (SOC, SOC,  Op_RegI,  11, A7->as_VMReg()->next());
++  reg_def T0    (SOC, SOC,  Op_RegI,  12, T0->as_VMReg());
++  reg_def T0_H  (SOC, SOC,  Op_RegI,  12, T0->as_VMReg()->next());
++  reg_def T1    (SOC, SOC,  Op_RegI,  13, T1->as_VMReg());
++  reg_def T1_H  (SOC, SOC,  Op_RegI,  13, T1->as_VMReg()->next());
++  reg_def T2    (SOC, SOC,  Op_RegI,  14, T2->as_VMReg());
++  reg_def T2_H  (SOC, SOC,  Op_RegI,  14, T2->as_VMReg()->next());
++  reg_def T3    (SOC, SOC,  Op_RegI,  15, T3->as_VMReg());
++  reg_def T3_H  (SOC, SOC,  Op_RegI,  15, T3->as_VMReg()->next());
++  reg_def S0    (SOC, SOE,  Op_RegI,  16, S0->as_VMReg());
++  reg_def S0_H  (SOC, SOE,  Op_RegI,  16, S0->as_VMReg()->next());
++  reg_def S1    (SOC, SOE,  Op_RegI,  17, S1->as_VMReg());
++  reg_def S1_H  (SOC, SOE,  Op_RegI,  17, S1->as_VMReg()->next());
++  reg_def S2    (SOC, SOE,  Op_RegI,  18, S2->as_VMReg());
++  reg_def S2_H  (SOC, SOE,  Op_RegI,  18, S2->as_VMReg()->next());
++  reg_def S3    (SOC, SOE,  Op_RegI,  19, S3->as_VMReg());
++  reg_def S3_H  (SOC, SOE,  Op_RegI,  19, S3->as_VMReg()->next());
++  reg_def S4    (SOC, SOE,  Op_RegI,  20, S4->as_VMReg());
++  reg_def S4_H  (SOC, SOE,  Op_RegI,  20, S4->as_VMReg()->next());
++  reg_def S5    (SOC, SOE,  Op_RegI,  21, S5->as_VMReg());
++  reg_def S5_H  (SOC, SOE,  Op_RegI,  21, S5->as_VMReg()->next());
++  reg_def S6    (SOC, SOE,  Op_RegI,  22, S6->as_VMReg());
++  reg_def S6_H  (SOC, SOE,  Op_RegI,  22, S6->as_VMReg()->next());
++  reg_def S7    (SOC, SOE,  Op_RegI,  23, S7->as_VMReg());
++  reg_def S7_H  (SOC, SOE,  Op_RegI,  23, S7->as_VMReg()->next());
++  reg_def T8    (SOC, SOC,  Op_RegI,  24, T8->as_VMReg());
++  reg_def T8_H  (SOC, SOC,  Op_RegI,  24, T8->as_VMReg()->next());
++  reg_def T9    (SOC, SOC,  Op_RegI,  25, T9->as_VMReg());
++  reg_def T9_H  (SOC, SOC,  Op_RegI,  25, T9->as_VMReg()->next());
++
++// Special Registers
++  reg_def K0    ( NS,  NS,  Op_RegI, 26, K0->as_VMReg());
++  reg_def K1    ( NS,  NS,  Op_RegI, 27, K1->as_VMReg());
++  reg_def GP    ( NS,  NS,  Op_RegI, 28, GP->as_VMReg());
++  reg_def GP_H  ( NS,  NS,  Op_RegI, 28, GP->as_VMReg()->next());
++  reg_def SP    ( NS,  NS,  Op_RegI, 29, SP->as_VMReg());
++  reg_def SP_H  ( NS,  NS,  Op_RegI, 29, SP->as_VMReg()->next());
++  reg_def FP    ( NS,  NS,  Op_RegI, 30, FP->as_VMReg());
++  reg_def FP_H  ( NS,  NS,  Op_RegI, 30, FP->as_VMReg()->next());
++  reg_def RA    ( NS,  NS,  Op_RegI, 31, RA->as_VMReg());
++  reg_def RA_H  ( NS,  NS,  Op_RegI, 31, RA->as_VMReg()->next());
++
++// Floating registers.
++reg_def F0          ( SOC, SOC, Op_RegF, 0, F0->as_VMReg());
++reg_def F0_H        ( SOC, SOC, Op_RegF, 0, F0->as_VMReg()->next());
++reg_def F1          ( SOC, SOC, Op_RegF, 1, F1->as_VMReg());
++reg_def F1_H        ( SOC, SOC, Op_RegF, 1, F1->as_VMReg()->next());
++reg_def F2          ( SOC, SOC, Op_RegF, 2, F2->as_VMReg());
++reg_def F2_H        ( SOC, SOC, Op_RegF, 2, F2->as_VMReg()->next());
++reg_def F3          ( SOC, SOC, Op_RegF, 3, F3->as_VMReg());
++reg_def F3_H        ( SOC, SOC, Op_RegF, 3, F3->as_VMReg()->next());
++reg_def F4          ( SOC, SOC, Op_RegF, 4, F4->as_VMReg());
++reg_def F4_H        ( SOC, SOC, Op_RegF, 4, F4->as_VMReg()->next());
++reg_def F5          ( SOC, SOC, Op_RegF, 5, F5->as_VMReg());
++reg_def F5_H        ( SOC, SOC, Op_RegF, 5, F5->as_VMReg()->next());
++reg_def F6          ( SOC, SOC, Op_RegF, 6, F6->as_VMReg());
++reg_def F6_H        ( SOC, SOC, Op_RegF, 6, F6->as_VMReg()->next());
++reg_def F7          ( SOC, SOC, Op_RegF, 7, F7->as_VMReg());
++reg_def F7_H        ( SOC, SOC, Op_RegF, 7, F7->as_VMReg()->next());
++reg_def F8          ( SOC, SOC, Op_RegF, 8, F8->as_VMReg());
++reg_def F8_H        ( SOC, SOC, Op_RegF, 8, F8->as_VMReg()->next());
++reg_def F9          ( SOC, SOC, Op_RegF, 9, F9->as_VMReg());
++reg_def F9_H        ( SOC, SOC, Op_RegF, 9, F9->as_VMReg()->next());
++reg_def F10         ( SOC, SOC, Op_RegF, 10, F10->as_VMReg());
++reg_def F10_H       ( SOC, SOC, Op_RegF, 10, F10->as_VMReg()->next());
++reg_def F11         ( SOC, SOC, Op_RegF, 11, F11->as_VMReg());
++reg_def F11_H       ( SOC, SOC, Op_RegF, 11, F11->as_VMReg()->next());
++reg_def F12         ( SOC, SOC, Op_RegF, 12, F12->as_VMReg());
++reg_def F12_H       ( SOC, SOC, Op_RegF, 12, F12->as_VMReg()->next());
++reg_def F13         ( SOC, SOC, Op_RegF, 13, F13->as_VMReg());
++reg_def F13_H       ( SOC, SOC, Op_RegF, 13, F13->as_VMReg()->next());
++reg_def F14         ( SOC, SOC, Op_RegF, 14, F14->as_VMReg());
++reg_def F14_H       ( SOC, SOC, Op_RegF, 14, F14->as_VMReg()->next());
++reg_def F15         ( SOC, SOC, Op_RegF, 15, F15->as_VMReg());
++reg_def F15_H       ( SOC, SOC, Op_RegF, 15, F15->as_VMReg()->next());
++reg_def F16         ( SOC, SOC, Op_RegF, 16, F16->as_VMReg());
++reg_def F16_H       ( SOC, SOC, Op_RegF, 16, F16->as_VMReg()->next());
++reg_def F17         ( SOC, SOC, Op_RegF, 17, F17->as_VMReg());
++reg_def F17_H       ( SOC, SOC, Op_RegF, 17, F17->as_VMReg()->next());
++reg_def F18         ( SOC, SOC, Op_RegF, 18, F18->as_VMReg());
++reg_def F18_H       ( SOC, SOC, Op_RegF, 18, F18->as_VMReg()->next());
++reg_def F19         ( SOC, SOC, Op_RegF, 19, F19->as_VMReg());
++reg_def F19_H       ( SOC, SOC, Op_RegF, 19, F19->as_VMReg()->next());
++reg_def F20         ( SOC, SOC, Op_RegF, 20, F20->as_VMReg());
++reg_def F20_H       ( SOC, SOC, Op_RegF, 20, F20->as_VMReg()->next());
++reg_def F21         ( SOC, SOC, Op_RegF, 21, F21->as_VMReg());
++reg_def F21_H       ( SOC, SOC, Op_RegF, 21, F21->as_VMReg()->next());
++reg_def F22         ( SOC, SOC, Op_RegF, 22, F22->as_VMReg());
++reg_def F22_H       ( SOC, SOC, Op_RegF, 22, F22->as_VMReg()->next());
++reg_def F23         ( SOC, SOC, Op_RegF, 23, F23->as_VMReg());
++reg_def F23_H       ( SOC, SOC, Op_RegF, 23, F23->as_VMReg()->next());
++reg_def F24         ( SOC, SOC, Op_RegF, 24, F24->as_VMReg());
++reg_def F24_H       ( SOC, SOC, Op_RegF, 24, F24->as_VMReg()->next());
++reg_def F25         ( SOC, SOC, Op_RegF, 25, F25->as_VMReg());
++reg_def F25_H       ( SOC, SOC, Op_RegF, 25, F25->as_VMReg()->next());
++reg_def F26         ( SOC, SOC, Op_RegF, 26, F26->as_VMReg());
++reg_def F26_H       ( SOC, SOC, Op_RegF, 26, F26->as_VMReg()->next());
++reg_def F27         ( SOC, SOC, Op_RegF, 27, F27->as_VMReg());
++reg_def F27_H       ( SOC, SOC, Op_RegF, 27, F27->as_VMReg()->next());
++reg_def F28         ( SOC, SOC, Op_RegF, 28, F28->as_VMReg());
++reg_def F28_H       ( SOC, SOC, Op_RegF, 28, F28->as_VMReg()->next());
++reg_def F29         ( SOC, SOC, Op_RegF, 29, F29->as_VMReg());
++reg_def F29_H       ( SOC, SOC, Op_RegF, 29, F29->as_VMReg()->next());
++reg_def F30         ( SOC, SOC, Op_RegF, 30, F30->as_VMReg());
++reg_def F30_H       ( SOC, SOC, Op_RegF, 30, F30->as_VMReg()->next());
++reg_def F31         ( SOC, SOC, Op_RegF, 31, F31->as_VMReg());
++reg_def F31_H       ( SOC, SOC, Op_RegF, 31, F31->as_VMReg()->next());
++
++
++// ----------------------------
++// Special Registers
++//S6 is used for get_thread(S6)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S7, S7_H,
++                     S0, S0_H,
++                     S1, S1_H,
++                     S2, S2_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S6, S6_H,
++                     S3, S3_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     T8, T8_H,
++                     T9, T9_H,
++                     T1, T1_H, // inline_cache_reg
++                     V1, V1_H,
++                     A7, A7_H,
++                     A6, A6_H,
++                     A5, A5_H,
++                     A4, A4_H,
++                     V0, V0_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     GP, GP_H
++                     RA, RA_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H  // frame_pointer
++                 );
++
++alloc_class chunk1(  F0, F0_H,
++                     F1, F1_H,
++                     F2, F2_H,
++                     F3, F3_H,
++                     F4, F4_H,
++                     F5, F5_H,
++                     F6, F6_H,
++                     F7, F7_H,
++                     F8, F8_H,
++                     F9, F9_H,
++                     F10, F10_H,
++                     F11, F11_H,
++                     F20, F20_H,
++                     F21, F21_H,
++                     F22, F22_H,
++                     F23, F23_H,
++                     F24, F24_H,
++                     F25, F25_H,
++                     F26, F26_H,
++                     F27, F27_H,
++                     F28, F28_H,
++                     F19, F19_H,
++                     F18, F18_H,
++                     F17, F17_H,
++                     F16, F16_H,
++                     F15, F15_H,
++                     F14, F14_H,
++                     F13, F13_H,
++                     F12, F12_H,
++                     F29, F29_H,
++                     F30, F30_H,
++                     F31, F31_H);
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5, S6, S7 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++reg_class s6_reg( S6 );
++reg_class s7_reg( S7 );
++
++reg_class t_reg( T0, T1, T2, T3, T8, T9 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t8_reg( T8 );
++reg_class t9_reg( T9 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5, A6, A7 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++reg_class a6_reg( A6 );
++reg_class a7_reg( A7 );
++
++reg_class v0_reg( V0 );
++reg_class v1_reg( V1 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class v0_long_reg( V0, V0_H );
++reg_class v1_long_reg( V1, V1_H );
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++reg_class a6_long_reg( A6, A6_H );
++reg_class a7_long_reg( A7, A7_H );
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t9_long_reg( T9, T9_H );
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++reg_class s6_long_reg( S6, S6_H );
++reg_class s7_long_reg( S7, S7_H );
++
++reg_class int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, A7, A6, A5, A4, V0, A3, A2, A1, A0, T0 );
++
++reg_class no_Ax_int_reg( S7, S0, S1, S2, S4, S3, T8, T2, T3, T1, V1, V0, T0 );
++
++reg_class p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T8, T8_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class no_T8_p_reg(
++                 S7, S7_H,
++                 S0, S0_H,
++                 S1, S1_H,
++                 S2, S2_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A7, A7_H,
++                 A6, A6_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H
++               );
++
++reg_class long_reg(
++                    S7, S7_H,
++                    S0, S0_H,
++                    S1, S1_H,
++                    S2, S2_H,
++                    S4, S4_H,
++                    S3, S3_H,
++                    T8, T8_H,
++                    T2, T2_H,
++                    T3, T3_H,
++                    T1, T1_H,
++                    A7, A7_H,
++                    A6, A6_H,
++                    A5, A5_H,
++                    A4, A4_H,
++                    A3, A3_H,
++                    A2, A2_H,
++                    A1, A1_H,
++                    A0, A0_H,
++                    T0, T0_H
++                  );
++
++
++// Floating point registers.
++// F31 are not used as temporary registers in D2I
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17, F18, F19, F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F31);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F23, F23_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++                   F28, F28_H,
++                   F29, F29_H,
++                   F31, F31_H);
++
++reg_class flt_arg0( F12 );
++reg_class dbl_arg0( F12, F12_H );
++reg_class dbl_arg1( F14, F14_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST   (    200, DEFAULT_COST * 2);
++
++  // Branches are even more expensive.
++  int_def BRANCH_COST       (    300, DEFAULT_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST         (    500, DEFAULT_COST * 5);
++/*
++        int_def EQUAL             (   1, 1  );
++        int_def NOT_EQUAL         (   2, 2  );
++        int_def GREATER           (   3, 3  );
++        int_def GREATER_EQUAL     (   4, 4  );
++        int_def LESS              (   5, 5  );
++        int_def LESS_EQUAL        (   6, 6  );
++*/
++%}
++
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeCall::instruction_size;
++    const uintx m = 16 - 1;
++    return mask_bits(size + m, ~m);
++    //return round_to(size, 16);
++  }
++};
++
++%} // end source_hpp
++
++source %{
++
++#define   NO_INDEX    0
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++
++
++#define __ _masm.
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++int HandlerImpl::emit_exception_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_exception_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_exception_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)OptoRuntime::exception_blob()->entry_point());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++
++  int offset = __ offset();
++
++  __ block_comment("; emit_deopt_handler");
++
++  cbuf.set_insts_mark();
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_call(SharedRuntime::deopt_blob()->unpack());
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  switch (opcode) {
++    //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz.
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++      if (!UseCountLeadingZerosInstructionMIPS64)
++        return false;
++      break;
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      if (!UseCountTrailingZerosInstructionMIPS64)
++        return false;
++      break;
++  }
++
++  return true;  // Per default match rules are supported.
++}
++
++const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  // TODO
++  // identify extra cases that we might want to provide match rules for
++  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
++  bool ret_value = match_rule_supported(opcode);
++  // Add rules here.
++
++  return ret_value;  // Per default match rules are supported.
++}
++
++const bool Matcher::has_predicated_vectors(void) {
++  return false;
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  Unimplemented();
++  return default_pressure_threshold;
++}
++
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  int offs = offset - br_size + 4;
++  // To be conservative on MIPS
++  // branch node should be end with:
++  //   branch inst
++  //   delay slot
++  const int safety_zone = 3 * BytesPerInstWord;
++  return Assembler::is_simm16((offs<0 ? offs-safety_zone : offs+safety_zone) >> 2);
++}
++
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++  return false;
++}
++
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP.
++  return true;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // TODO: Either support matching DecodeNKlass (heap-based) in operand
++  //       or condisider the following:
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  //return Universe::narrow_klass_base() == NULL;
++  return true;
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  if (MaxVectorSize == 0)
++    return 0;
++  assert(MaxVectorSize == 8, "");
++  return 8;
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 8, "");
++  switch(size) {
++    case  8: return Op_VecD;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
++
++
++const bool Matcher::convi2l_type_required = true;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++void Compile::reshape_address(AddPNode* addp) {
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt); // Same as max.
++}
++
++// MIPS supports misaligned vectors store/load? FIXME
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++// Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++int Matcher::regnum_to_fpu_offset(int regnum) {
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  return true;
++}
++
++
++// Return whether or not this register is ever used as an argument.  This
++// function is used on startup to build the trampoline stubs in generateOptoStub.
++// Registers not mentioned will be killed by the VM call in the trampoline, and
++// arguments in those registers not be available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  // Refer to: [sharedRuntime_mips_64.cpp] SharedRuntime::java_calling_convention()
++  if (    reg == T0_num || reg == T0_H_num
++       || reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num
++       || reg == A6_num || reg == A6_H_num
++       || reg == A7_num || reg == A7_H_num )
++    return true;
++
++  if (    reg == F12_num || reg == F12_H_num
++       || reg == F13_num || reg == F13_H_num
++       || reg == F14_num || reg == F14_H_num
++       || reg == F15_num || reg == F15_H_num
++       || reg == F16_num || reg == F16_H_num
++       || reg == F17_num || reg == F17_H_num
++       || reg == F18_num || reg == F18_H_num
++       || reg == F19_num || reg == F19_H_num )
++    return true;
++
++  return false;
++}
++
++bool Matcher::is_spillable_arg( int reg ) {
++  return can_be_java_arg(reg);
++}
++
++bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
++  return false;
++}
++
++// Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++  ShouldNotReachHere();
++  return RegMask();
++}
++
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++  return FP_REG_mask();
++}
++
++// MIPS doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallLeafDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++  const uintx m = alignment_required() - 1;
++  return mask_bits(current_offset + m, ~m) - current_offset;
++}
++
++// If CPU can load and store mis-aligned doubles directly then no fixup is
++// needed.  Else we split the double into 2 integer pieces and move it
++// piece-by-piece.  Only happens when passing doubles into C code as the
++// Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false;
++// Do floats take an entire double register or just half?
++//const bool Matcher::float_in_double = true;
++bool Matcher::float_in_double() { return false; }
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++// Is it better to copy float constants, or load them directly from memory?
++// Intel can load a float constant from a direct address, requiring no
++// extra registers.  Most RISCs will have to materialize an address into a
++// register first, so they would do better to copy the constant from stack.
++const bool Matcher::rematerialize_float_constants = false;
++// Advertise here if the CPU requires explicit rounding operations
++// to implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = false;
++// false => size gets scaled to BytesPerLong, ok.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// Indicate if the safepoint node needs the polling page as an input.
++// it does if the polling page is more than disp32 away.
++bool SafePointNode::needs_polling_address_input() {
++  return SafepointMechanism::uses_thread_local_poll();
++}
++
++#ifndef PRODUCT
++void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("BRK");
++}
++#endif
++
++void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++  MacroAssembler _masm(&cbuf);
++  __ brk(5);
++}
++
++uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++// !!!!! Special hack to get all type of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset() {
++  //lui
++  //ori
++  //nop
++  //nop
++  //jalr
++  //nop
++  return 24;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset() {
++  //lui IC_Klass,
++  //ori IC_Klass,
++  //dsll IC_Klass
++  //ori IC_Klass
++
++  //lui T9
++  //ori T9
++  //nop
++  //nop
++  //jalr T9
++  //nop
++  return 4 * 4 + 4 * 6;
++}
++
++//=============================================================================
++
++// Figure out which register class each belongs in: rc_int, rc_float, rc_stack
++enum RC { rc_bad, rc_int, rc_float, rc_stack };
++static enum RC rc_class( OptoReg::Name reg ) {
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++  if (OptoReg::is_stack(reg)) return rc_stack;
++  VMReg r = OptoReg::as_VMReg(reg);
++  if (r->is_Register()) return rc_int;
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this );
++  OptoReg::Name dst_first = ra_->get_reg_first(this );
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
++
++  // Generate spill code!
++
++  if( src_first == dst_first && src_second == dst_second )
++    return 0;            // Self copy, no move
++
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(AT, Address(SP, src_offset));
++          __ sd(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    AT, [SP + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                    "sd    AT, [SP + #%d]",
++                    src_offset, dst_offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lw(AT, Address(SP, src_offset));
++          __ sw(AT, Address(SP, dst_offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lw    AT, [SP + #%d] spill 2\n\t"
++                    "sw    AT, [SP + #%d]\n\t",
++                    src_offset, dst_offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ld(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ld    %s, [SP + #%d]\t# spill 3",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ lw(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++          else
++            __ lwu(as_Register(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++          } else {
++            st->print("\n\t");
++            if (this->ideal_reg() == Op_RegI)
++              st->print("lw    %s, [SP + #%d]\t# spill 4",
++                        Matcher::regName[dst_first],
++                        offset);
++            else
++              st->print("lwu    %s, [SP + #%d]\t# spill 5",
++                        Matcher::regName[dst_first],
++                        offset);
++#endif
++          }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ldc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("ldc1  %s, [SP + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ lwc1( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("lwc1   %s, [SP + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sd(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sd    %s, [SP + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sw(as_Register(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sw    %s, [SP + #%d]\t# spill 9",
++                    Matcher::regName[src_first], offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ move(as_Register(Matcher::_regEncode[dst_first]),
++                  as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(64bit)    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ move_u32(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ daddu(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]), R0);
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++        return 0;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmtc1(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmtc1   %s, %s\t# spill 12",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mtc1( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mtc1   %s, %s\t# spill 13",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ sdc1( as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset) );
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("sdc1   %s, [SP + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ swc1(as_FloatRegister(Matcher::_regEncode[src_first]), Address(SP, offset));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("swc1   %s, [SP + #%d]\t# spill 15",
++                    Matcher::regName[src_first],
++                    offset);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ dmfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("dmfc1   %s, %s\t# spill 16",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mfc1( as_Register(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mfc1   %s, %s\t# spill 17",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_d  %s <-- %s\t# spill 18",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          st->print("\n\t");
++          st->print("mov_s  %s <-- %s\t# spill 19",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++#endif
++        }
++      }
++      return 0;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  implementation( NULL, ra_, false, st );
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  implementation( &cbuf, ra_, false, NULL );
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile *C = ra_->C;
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  st->print_cr("daddiu   SP, SP, %d # Rlease stack @ MachEpilogNode", framesize);
++  st->print("\t");
++  if (UseLEXT1) {
++    st->print_cr("gslq  RA, FP, SP, %d # Restore FP & RA @ MachEpilogNode", -wordSize*2);
++  } else {
++    st->print_cr("ld    RA, SP, %d # Restore RA @ MachEpilogNode", -wordSize);
++    st->print("\t");
++    st->print_cr("ld    FP, SP, %d # Restore FP @ MachEpilogNode", -wordSize*2);
++  }
++
++  if( do_polling() && C->is_method_compilation() ) {
++    st->print("\t");
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      st->print_cr("ld    AT, poll_offset[thread] #polling_page_address\n\t"
++                   "lw    AT, [AT]\t"
++                   "# Safepoint: poll for GC");
++    } else {
++      st->print_cr("Poll Safepoint # MachEpilogNode");
++    }
++  }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile *C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++  int framesize = C->frame_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(framesize), "daddiu uses a signed 16-bit int");
++
++  if (UseLEXT1) {
++    __ gslq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ ld(RA, SP, framesize - wordSize );
++    __ ld(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(SP, SP, framesize);
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if( do_polling() && C->is_method_compilation() ) {
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      __ ld(AT, thread, in_bytes(Thread::polling_page_offset()));
++      __ relocate(relocInfo::poll_return_type);
++      __ lw(AT, AT, 0);
++    } else {
++      __ set64(AT, (long)os::get_polling_page());
++      __ relocate(relocInfo::poll_return_type);
++      __ lw(AT, AT, 0);
++    }
++  }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way  fujie debug
++}
++
++int MachEpilogNode::reloc() const {
++  return 0; // a large enough number
++}
++
++const Pipeline * MachEpilogNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const { return 0; }
++
++//=============================================================================
++
++#ifndef PRODUCT
++void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("ADDI %s, SP, %d   @BoxLockNode",Matcher::regName[reg],offset);
++}
++#endif
++
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
++  return 4;
++}
++
++void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++
++  __ addiu(as_Register(reg), SP, offset);
++}
++
++
++//static int sizeof_FFree_Float_Stack_All = -1;
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  //lui
++  //ori
++  //dsll
++  //ori
++  //jalr
++  //nop
++  assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()");
++  return NativeCall::instruction_size;
++}
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++//=============================================================================
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  st->print_cr("load_klass(T9, T0)");
++  st->print_cr("\tbeq(T9, iCache, L)");
++  st->print_cr("\tnop");
++  st->print_cr("\tjmp(SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type)");
++  st->print_cr("\tnop");
++  st->print_cr("\tnop");
++  st->print_cr("    L:");
++}
++#endif
++
++
++void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int  ic_reg = Matcher::inline_cache_reg_encode();
++  Label L;
++  Register receiver = T0;
++  Register   iCache = as_Register(ic_reg);
++
++  __ load_klass(T9, receiver);
++  __ beq(T9, iCache, L);
++  __ delayed()->nop();
++  __ jmp((address)SharedRuntime::get_ic_miss_stub(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(L);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++
++
++//=============================================================================
++
++const RegMask& MachConstantBaseNode::_out_RegMask = P_REG_mask();
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  Compile* C = ra_->C;
++  Compile::ConstantTable& constant_table = C->constant_table();
++  MacroAssembler _masm(&cbuf);
++
++  Register Rtoc = as_Register(ra_->get_encode(this));
++  CodeSection* consts_section = __ code()->consts();
++  int consts_size = consts_section->align_at_start(consts_section->size());
++  assert(constant_table.size() == consts_size, "must be equal");
++
++  if (consts_section->size()) {
++    // Materialize the constant table base.
++    address baseaddr = consts_section->start() + -(constant_table.table_base_offset());
++    // RelocationHolder rspec = internal_word_Relocation::spec(baseaddr);
++    __ relocate(relocInfo::internal_word_type);
++    __ patchable_set48(Rtoc, (long)baseaddr);
++  }
++}
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  // patchable_set48 (4 insts)
++  return 4 * 4;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Register r = as_Register(ra_->get_encode(this));
++  st->print("patchable_set48    %s, &constanttable (constant table base) @ MachConstantBaseNode", r->name());
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang"); st->print("\t");
++  }
++  if (UseLEXT1) {
++    st->print("gssq     RA, FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  } else {
++    st->print("sd       RA, %d(SP)  @ MachPrologNode\n\t", -wordSize);
++    st->print("sd       FP, %d(SP)  @ MachPrologNode\n\t", -wordSize*2);
++  }
++  st->print("daddiu   FP, SP, -%d \n\t", wordSize*2);
++  st->print("daddiu   SP, SP, -%d \t",framesize);
++}
++#endif
++
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  assert(Assembler::is_simm16(-framesize), "daddiu uses a signed 16-bit int");
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  if (C->need_stack_bang(bangsize)) {
++    __ generate_stack_overflow_check(bangsize);
++  }
++
++  __ daddiu(SP, SP, -framesize);
++  if (UseLEXT1) {
++    __ gssq(RA, FP, SP, framesize - wordSize * 2);
++  } else {
++    __ sd(RA, SP, framesize - wordSize);
++    __ sd(FP, SP, framesize - wordSize * 2);
++  }
++  __ daddiu(FP, SP, framesize - wordSize * 2);
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++
++uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_); // too many variables; just compute it the hard way
++}
++
++int MachPrologNode::reloc() const {
++  return 0; // a large enough number
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    // This is the instruction starting address for relocation info.
++    __ block_comment("Java_To_Runtime");
++    cbuf.set_insts_mark();
++    __ relocate(relocInfo::runtime_call_type);
++    __ patchable_call((address)$meth$$method);
++  %}
++
++  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
++    // who we intended to call.
++    MacroAssembler _masm(&cbuf);
++    address addr = (address)$meth$$method;
++    address call;
++    __ block_comment("Java_Static_Call");
++
++    if ( !_method ) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      call = __ trampoline_call(AddressLiteral(addr, relocInfo::runtime_call_type), &cbuf);
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                     : static_call_Relocation::spec(method_index);
++      call = __ trampoline_call(AddressLiteral(addr, rspec), &cbuf);
++
++      // Emit stub for static call
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++    }
++    if (call == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return;
++    }
++  %}
++
++
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++  %}
++
++
++  enc_class enc_PartialSubtypeCheck(mRegP result, mRegP sub, mRegP super, mRegI tmp) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = $tmp$$Register;
++    Register tmp    = T9;
++    Label miss;
++
++    // result may be the same as sub
++    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++    //    4bc     mov   S2, NULL #@loadConP
++    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, tmp,
++        NULL, &miss,
++        /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ move(result, 0);
++    __ b(done);
++    __ delayed()->nop();
++
++    __ bind(miss);
++    __ move(result, 1);
++    __ bind(done);
++  %}
++
++%}
++
++
++//---------MIPS FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                  |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame %{
++
++  stack_direction(TOWARDS_LOW);
++
++  // These two registers define part of the calling convention
++  // between compiled code and the interpreter.
++  // SEE StartI2CNode::calling_convention & StartC2INode::calling_convention & StartOSRNode::calling_convention
++  // for more information.
++
++  inline_cache_reg(T1);                // Inline Cache Register
++  interpreter_method_oop_reg(S3);      // Method Oop Register when calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  // generate Compile::sync_stack_slots
++  sync_stack_slots(2);
++
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++
++  interpreter_frame_pointer(FP);
++
++  // generate Matcher::stack_alignment
++  stack_alignment(StackAlignmentInBytes);  //wordSize = sizeof(char*);
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots.
++  in_preserve_stack_slots(4);  //Now VerifyStackAtCalls is defined as false ! Leave two stack slots for ra and fp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(0);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->sync()*Compile::current()->sync_stack_slots(),WordsPerLong));
++  return_addr(REG RA);
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // will generated to Matcher::calling_convention(OptoRegPair *sig, uint length, bool is_outgoing)
++  // StartNode::calling_convention call this.
++  calling_convention %{
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++
++
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++
++  // SEE CallRuntimeNode::calling_convention for more information.
++  c_calling_convention %{
++   (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  c_return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,    V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,  V0_H_num };
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++  // Location of return values
++  // register(s) contain(s) return value for Op_StartC2I and Op_Start.
++  // SEE Matcher::match.
++
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++                               /* -- , -- , Op_RegN, Op_RegI, Op_RegP, Op_RegF, Op_RegD, Op_RegL */
++    static int lo[Op_RegL+1] = { 0, 0, V0_num,       V0_num,       V0_num,       F0_num,       F0_num,     V0_num };
++    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, V0_H_num,     OptoReg::Bad, F0_H_num,   V0_H_num};
++    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);         // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++operand vecD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(VecD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand FlagsReg() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegFlags);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// TODO: Should we need to define some more special immediate number ?
++// Immediate Operands
++// Integer Immediate
++operand immI() %{
++  match(ConI);
++  // TODO: should not match immI8 here LEE
++  match(immI8);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI8() %{
++  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI16() %{
++  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M65536() %{
++  predicate(n->get_int() == -65536);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() %{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for test vs zero
++operand immI_0() %{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for increment
++operand immI_1() %{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constants for increment
++operand immI_16() %{
++  predicate(n->get_int() == 16);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24() %{
++  predicate(n->get_int() == 24);
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32() %{
++  predicate(n->get_int() == 32);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255() %{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_65535() %{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_M32767_32768() %{
++  predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768));
++  match(ConI);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI_0_3() %{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_31() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 31);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_32767() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 32767);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_0_65535() %{
++  predicate(n->get_int() >= 0 && n->get_int() <= 65535);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_32_63() %{
++  predicate(n->get_int() >= 32 && n->get_int() <= 63);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive integer mask
++operand immI_nonneg_mask() %{
++  predicate((n->get_int() >= 0) && (Assembler::is_int_mask(n->get_int()) != -1));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate
++operand immL() %{
++  match(ConL);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 8-bit
++operand immL8() %{
++  predicate(-0x80L <= n->get_long() && n->get_long() < 0x80L);
++  match(ConL);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(5);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL16() %{
++  predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32() %{
++  predicate(n->get_long() == (int)(n->get_long()));
++  match(ConL);
++
++  op_cost(15);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 3..6 zero
++operand immL_M121() %{
++  predicate(n->get_long() == -121L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..2 zero
++operand immL_M8() %{
++  predicate(n->get_long() == -8L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 1..2 zero
++operand immL_M7() %{
++  predicate(n->get_long() == -7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 2 zero
++operand immL_M5() %{
++  predicate(n->get_long() == -5L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// bit 0..1 zero
++operand immL_M4() %{
++  predicate(n->get_long() == -4L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M1() %{
++  predicate(n->get_long() == -1L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL_0() %{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_7() %{
++  predicate(n->get_long() == 7L);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate: low 32-bit mask
++operand immL_MaxUI() %{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(20);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_M32767_32768() %{
++  predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768));
++  match(ConL);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL_0_65535() %{
++  predicate(n->get_long() >= 0 && n->get_long() <= 65535);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Operand for non-negtive long mask
++operand immL_nonneg_mask() %{
++  predicate((n->get_long() >= 0) && (Assembler::is_jlong_mask(n->get_long()) != -1));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP() %{
++  match(ConP);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP_0() %{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate: 64-bit
++operand immP_no_oop_cheap() %{
++  predicate(!n->bottom_type()->isa_oop_ptr() && (MacroAssembler::insts_for_set64(n->get_ptr()) <= 3));
++  match(ConP);
++
++  op_cost(5);
++  // formats are generated automatically for constants and base registers
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(10);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN_0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point immediate
++operand immF() %{
++  match(ConF);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Single-precision floating-point zero
++operand immF_0() %{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point immediate
++operand immD() %{
++  match(ConD);
++
++  op_cost(20);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double-precision floating-point zero
++operand immD_0() %{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(5);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand mRegI() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand no_Ax_mRegI() %{
++  constraint(ALLOC_IN_RC(no_Ax_int_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand mS0RegI() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S0" %}
++  interface(REG_INTER);
++%}
++
++operand mS1RegI() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S1" %}
++  interface(REG_INTER);
++%}
++
++operand mS3RegI() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S3" %}
++  interface(REG_INTER);
++%}
++
++operand mS4RegI() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S4" %}
++  interface(REG_INTER);
++%}
++
++operand mS5RegI() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S5" %}
++  interface(REG_INTER);
++%}
++
++operand mS6RegI() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S6" %}
++  interface(REG_INTER);
++%}
++
++operand mS7RegI() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "S7" %}
++  interface(REG_INTER);
++%}
++
++
++operand mT0RegI() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T0" %}
++  interface(REG_INTER);
++%}
++
++operand mT1RegI() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T1" %}
++  interface(REG_INTER);
++%}
++
++operand mT2RegI() %{
++  constraint(ALLOC_IN_RC(t2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T2" %}
++  interface(REG_INTER);
++%}
++
++operand mT3RegI() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T3" %}
++  interface(REG_INTER);
++%}
++
++operand mT8RegI() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T8" %}
++  interface(REG_INTER);
++%}
++
++operand mT9RegI() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "T9" %}
++  interface(REG_INTER);
++%}
++
++operand mA0RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand mA1RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand mA2RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand mA3RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand mA4RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand mA5RegI() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A5" %}
++  interface(REG_INTER);
++%}
++
++operand mA6RegI() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A6" %}
++  interface(REG_INTER);
++%}
++
++operand mA7RegI() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "A7" %}
++  interface(REG_INTER);
++%}
++
++operand mV0RegI() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V0" %}
++  interface(REG_INTER);
++%}
++
++operand mV1RegI() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegI);
++  match(mRegI);
++
++  format %{ "V1" %}
++  interface(REG_INTER);
++%}
++
++operand mRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegN() %{
++  constraint(ALLOC_IN_RC(t0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegN() %{
++  constraint(ALLOC_IN_RC(t1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegN() %{
++  constraint(ALLOC_IN_RC(t3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegN() %{
++  constraint(ALLOC_IN_RC(t8_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegN() %{
++  constraint(ALLOC_IN_RC(t9_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegN() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegN() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegN() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegN() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegN() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5_RegN() %{
++  constraint(ALLOC_IN_RC(a5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegN() %{
++  constraint(ALLOC_IN_RC(a6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegN() %{
++  constraint(ALLOC_IN_RC(a7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0_RegN() %{
++  constraint(ALLOC_IN_RC(s0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegN() %{
++  constraint(ALLOC_IN_RC(s1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegN() %{
++  constraint(ALLOC_IN_RC(s2_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegN() %{
++  constraint(ALLOC_IN_RC(s3_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegN() %{
++  constraint(ALLOC_IN_RC(s4_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegN() %{
++  constraint(ALLOC_IN_RC(s5_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegN() %{
++  constraint(ALLOC_IN_RC(s6_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegN() %{
++  constraint(ALLOC_IN_RC(s7_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegN() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegN() %{
++  constraint(ALLOC_IN_RC(v1_reg));
++  match(RegN);
++  match(mRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand mRegP() %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(RegP);
++  match(a0_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand no_T8_mRegP() %{
++  constraint(ALLOC_IN_RC(no_T8_p_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s1_RegP()
++%{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4_RegP()
++%{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s5_RegP()
++%{
++  constraint(ALLOC_IN_RC(s5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s6_RegP()
++%{
++  constraint(ALLOC_IN_RC(s6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7_RegP()
++%{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0_RegP()
++%{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1_RegP()
++%{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t2_RegP()
++%{
++  constraint(ALLOC_IN_RC(t2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3_RegP()
++%{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegP()
++%{
++  constraint(ALLOC_IN_RC(t9_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6_RegP()
++%{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7_RegP()
++%{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1_RegP()
++%{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegP);
++  match(mRegP);
++  match(no_T8_mRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++/*
++operand mSPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(sp_reg));
++  match(reg);
++
++  format %{ "SP"  %}
++  interface(REG_INTER);
++%}
++
++operand mFPRegP(mRegP reg) %{
++  constraint(ALLOC_IN_RC(fp_reg));
++  match(reg);
++
++  format %{ "FP"  %}
++  interface(REG_INTER);
++%}
++*/
++
++operand mRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v1RegL() %{
++  constraint(ALLOC_IN_RC(v1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0RegL() %{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1RegL() %{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2RegL() %{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3RegL() %{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t0RegL() %{
++  constraint(ALLOC_IN_RC(t0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t1RegL() %{
++  constraint(ALLOC_IN_RC(t1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t3RegL() %{
++  constraint(ALLOC_IN_RC(t3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8RegL() %{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4RegL() %{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a5RegL() %{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a6RegL() %{
++  constraint(ALLOC_IN_RC(a6_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a7RegL() %{
++  constraint(ALLOC_IN_RC(a7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s0RegL() %{
++  constraint(ALLOC_IN_RC(s0_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s1RegL() %{
++  constraint(ALLOC_IN_RC(s1_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3RegL() %{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s4RegL() %{
++  constraint(ALLOC_IN_RC(s4_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s7RegL() %{
++  constraint(ALLOC_IN_RC(s7_long_reg));
++  match(RegL);
++  match(mRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Floating register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//Double Precision Floating register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++//----------Memory Operands----------------------------------------------------
++// Indirect Memory Operand
++operand indirect(mRegP reg) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);  /* NO_INDEX */
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8(mRegP reg, immL8 off)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg off);
++
++  op_cost(10);
++  format %{ "[$reg + $off (8-bit)] @ indOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0); /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register
++operand indIndexScale(mRegP reg, mRegL lreg, immI_0_3 scale)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP reg (LShiftL lreg scale));
++
++  op_cost(10);
++  format %{"[$reg + $lreg << $scale] @ indIndexScale" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++
++// [base + index + offset]
++operand baseIndexOffset8(mRegP base, mRegL index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base index) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index + offset]
++operand baseIndexOffset8_convI2L(mRegP base, mRegI index, immL8 off)
++%{
++  predicate(UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(5);
++  match(AddP (AddP base (ConvI2L index)) off);
++
++  format %{ "[$base + $index + $off (8-bit)] @ baseIndexOffset8_convI2L" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// [base + index<<scale + offset]
++operand basePosIndexScaleOffset8(mRegP base, mRegI index, immL8 off, immI_0_31 scale)
++%{
++  constraint(ALLOC_IN_RC(p_reg));
++  //predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  op_cost(10);
++  match(AddP (AddP base (LShiftL (ConvI2L index) scale)) off);
++
++  format %{ "[$base + $index << $scale + $off (8-bit)] @ basePosIndexScaleOffset8" %}
++  interface(MEMORY_INTER) %{
++    base($base);
++    index($index);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++//FIXME: I think it's better to limit the immI to be 16-bit at most!
++// Indirect Memory Plus Long Offset Operand
++operand indOffset32(mRegP reg, immL32 off) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(20);
++  match(AddP reg off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);   /* NO_INDEX */
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register
++operand indIndex(mRegP addr, mRegL index) %{
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP addr index);
++
++  op_cost(20);
++  format %{"[$addr + $index] @ indIndex" %}
++  interface(MEMORY_INTER) %{
++    base($addr);
++    index($index);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indirectNarrowKlass(mRegN reg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeNKlass reg);
++
++  format %{ "[$reg] @ indirectNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++operand indOffset8NarrowKlass(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indOffset32NarrowKlass(mRegN reg, immL32 off)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeNKlass reg) off);
++
++  format %{ "[$reg + $off (32-bit)] @ indOffset32NarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexOffsetNarrowKlass(mRegN reg, mRegL lreg, immL32 off)
++%{
++  predicate(UseLEXT1);
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeNKlass reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffsetNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++operand indIndexNarrowKlass(mRegN reg, mRegL lreg)
++%{
++  predicate(Universe::narrow_klass_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (DecodeNKlass reg) lreg);
++
++  op_cost(10);
++  format %{"[$reg + $lreg] @ indIndexNarrowKlass" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(mRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(DecodeN reg);
++
++  format %{ "[$reg] @ indirectNarrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset8Narrow(mRegN reg, immL8 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(p_reg));
++  op_cost(10);
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (8-bit)] @ indOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x0);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexOffset8Narrow(mRegN reg, mRegL lreg, immL8 off)
++%{
++  predicate((Universe::narrow_oop_shift() == 0) && UseLEXT1);
++  constraint(ALLOC_IN_RC(p_reg));
++  match(AddP (AddP (DecodeN reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg] @ indIndexOffset8Narrow" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++// Comparision Code
++// Comparison Code, unsigned compare.  Used by FP also, with
++// C2 (unordered) turned into GT or LT already.  The other bits
++// C0 and C3 are turned into Carry & Zero flags.
++operand cmpOpU() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x01);
++    not_equal(0x02);
++    greater(0x03);
++    greater_equal(0x04);
++    less(0x05);
++    less_equal(0x06);
++    overflow(0x7);
++    no_overflow(0x8);
++  %}
++%}
++
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg) %{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++  op_cost(50);
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1d);  // SP
++    index(0x0);  // No Index
++    scale(0x0);  // No Scale
++    disp($reg);  // Stack Offset
++  %}
++%}
++
++
++//------------------------OPERAND CLASSES--------------------------------------
++//opclass memory( direct, indirect, indOffset16, indOffset32, indOffset32X, indIndexOffset );
++opclass memory( indirect, indirectNarrow, indOffset8, indOffset32, indIndex, indIndexScale, baseIndexOffset8, baseIndexOffset8_convI2L, indOffset8Narrow, indIndexOffset8Narrow);
++
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++
++pipeline %{
++
++  //----------ATTRIBUTES---------------------------------------------------------
++  attributes %{
++    fixed_size_instructions;          // Fixed size instructions
++    branch_has_delay_slot;      // branch have delay slot in gs2
++    max_instructions_per_bundle = 1;     // 1 instruction per bundle
++    max_bundles_per_cycle = 4;         // Up to 4 bundles per cycle
++         bundle_unit_size=4;
++    instruction_unit_size = 4;           // An instruction is 4 bytes long
++    instruction_fetch_unit_size = 16;    // The processor fetches one line
++    instruction_fetch_units = 1;         // of 16 bytes
++
++    // List of nop instructions
++    nops( MachNop );
++  %}
++
++  //----------RESOURCES----------------------------------------------------------
++  // Resources are the functional units available to the machine
++
++  resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++  //----------PIPELINE DESCRIPTION-----------------------------------------------
++  // Pipeline Description specifies the stages in the machine's pipeline
++
++  // IF: fetch
++  // ID: decode
++  // RD: read
++  // CA: caculate
++  // WB: write back
++  // CM: commit
++
++  pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++  //----------PIPELINE CLASSES---------------------------------------------------
++  // Pipeline Classes describe the stages in which input and output are
++  // referenced by the hardware pipeline.
++
++  //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regI_regI(mRegI dst, mRegI src1, mRegI src2) %{
++    single_instruction;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+1;
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.19 Integer mult operation : dst <-- reg1 mult reg2
++  pipe_class ialu_mult(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  pipe_class mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer div operation : dst <-- reg1 div reg2
++  pipe_class ialu_div(mRegI dst, mRegI src1, mRegI src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.19 Integer mod operation : dst <-- reg1 mod reg2
++  pipe_class ialu_mod(mRegI dst, mRegI src1, mRegI src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write)+10;
++    DECODE : ID;
++    ALU2   : CA;
++  %}
++
++  //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class ialu_regL_regL(mRegL dst, mRegL src1, mRegL src2) %{
++    instruction_count(2);
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regL_imm16(mRegL dst, mRegL src) %{
++    instruction_count(2);
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //no.16 load Long from memory :
++  pipe_class ialu_loadL(mRegL dst, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    dst    : WB(write)+5;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.17 Store Long to Memory :
++  pipe_class ialu_storeL(mRegL src, memory mem) %{
++    instruction_count(2);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++  pipe_class ialu_regI_imm16(mRegI dst, mRegI src) %{
++         single_instruction;
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.3 Integer move operation : dst <-- reg
++  pipe_class ialu_regI_mov(mRegI dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    ALU    : CA;
++  %}
++
++  //No.4 No instructions : do nothing
++  pipe_class empty( ) %{
++    instruction_count(0);
++  %}
++
++  //No.5 UnConditional branch :
++  pipe_class pipe_jump( label labl ) %{
++    multiple_bundles;
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //No.6 ALU Conditional branch :
++  pipe_class pipe_alu_branch(mRegI src1, mRegI src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++  //no.7 load integer from memory :
++  pipe_class ialu_loadI(mRegI dst, memory mem) %{
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.8 Store Integer to Memory :
++  pipe_class ialu_storeI(mRegI src, memory mem) %{
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++
++  //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++  pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  //No.22 Floating div operation : dst <-- reg1 div reg2
++  pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  pipe_class fcvt_I2D(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class fcvt_D2I(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU1   : CA;
++  %}
++
++  pipe_class pipe_mfc1(mRegI dst, regD src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  pipe_class pipe_mtc1(regD dst, mRegI src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    MEM    : RD(5);
++  %}
++
++  //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++  pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU2   : CA;
++  %}
++
++  //No.11 Load Floating from Memory :
++  pipe_class fpu_loadF(regF dst, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    dst    : WB(write)+3;
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.12 Store Floating to Memory :
++  pipe_class fpu_storeF(regF src, memory mem) %{
++    instruction_count(1);
++    mem    : RD(read);
++    src    : RD(read);
++    DECODE : ID;
++    MEM    : RD;
++  %}
++
++  //No.13 FPU Conditional branch :
++  pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++    multiple_bundles;
++    src1   : RD(read);
++    src2   : RD(read);
++    DECODE : ID;
++    BR     : RD;
++  %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++  pipe_class fpu1_regF(regF dst, regF src) %{
++    src    : RD(read);
++    dst    : WB(write);
++    DECODE : ID;
++    FPU    : CA;
++  %}
++
++  pipe_class long_memory_op() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(30);
++  %}
++
++  pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++  %}
++
++  pipe_class call() %{
++    instruction_count(10); multiple_bundles; force_serialization;
++    fixed_latency(200);
++  %}
++
++  //FIXME:
++  //No.9 Piple slow : for multi-instructions
++  pipe_class pipe_slow(  ) %{
++    instruction_count(20);
++    force_serialization;
++    multiple_bundles;
++    fixed_latency(50);
++  %}
++
++%}
++
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               respectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++
++// Load Integer
++instruct loadI(mRegI dst, memory mem) %{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadI_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(125);
++  format %{ "lw    $dst, $mem   #@loadI_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb  $dst, $mem\t# int -> byte #@loadI2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(mRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lbu  $dst, $mem\t# int -> ubyte #@loadI2UB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(mRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "lh  $dst, $mem\t# int -> short #@loadI2S" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(mRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "lhu  $dst, $mem\t# int -> ushort/char #@loadI2US" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++// Load Long.
++instruct loadL(mRegL dst, memory mem) %{
++//  predicate(!((LoadLNode*)n)->require_atomic_access());
++  match(Set dst (LoadL mem));
++
++  ins_cost(250);
++  format %{ "ld    $dst, $mem   #@loadL" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(mRegL dst, memory mem) %{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "ld    $dst, $mem   #@loadL_unaligned\n\t" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Store Long
++instruct storeL_reg(memory mem, mRegL src) %{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(200);
++  format %{ "sd    $mem,   $src #@storeL_reg\n" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++instruct storeL_immL_0(memory mem, immL_0 zero) %{
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(180);
++  format %{ "sd    zero, $mem #@storeL_immL_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeL );
++%}
++
++// Load Compressed Pointer
++instruct loadN(mRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# compressed ptr @ loadN" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2P(mRegP dst, memory mem)
++%{
++   match(Set dst (DecodeN (LoadN mem)));
++   predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++   ins_cost(125); // XXX
++   format %{ "lwu    $dst, $mem\t# @ loadN2P" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++   ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Pointer
++instruct loadP(mRegP dst, memory mem) %{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125);
++  format %{ "ld    $dst, $mem #@loadP" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Klass Pointer
++instruct loadKlass(mRegP dst, memory mem) %{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(mRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadNKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++instruct loadN2PKlass(mRegP dst, memory mem)
++%{
++  match(Set dst (DecodeNKlass (LoadNKlass mem)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "lwu    $dst, $mem\t# compressed klass ptr @ loadN2PKlass" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe( ialu_loadI ); // XXX
++%}
++
++// Load Constant
++instruct loadConI(mRegI dst, immI src) %{
++  match(Set dst src);
++
++  ins_cost(150);
++  format %{ "mov    $dst, $src #@loadConI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int    value = $src$$constant;
++    __ move(dst, value);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct loadConL_set64(mRegL dst, immL src) %{
++  match(Set dst src);
++  ins_cost(120);
++  format %{ "li   $dst, $src @ loadConL_set64" %}
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConL16(mRegL dst, immL16 src) %{
++  match(Set dst src);
++  ins_cost(105);
++  format %{ "mov    $dst, $src #@loadConL16" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    int      value   = $src$$constant;
++    __ daddiu(dst_reg, R0, value);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct loadConL_immL_0(mRegL dst, immL_0 src) %{
++  match(Set dst src);
++  ins_cost(100);
++  format %{ "mov    $dst, zero #@loadConL_immL_0" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Load Range
++instruct loadRange(mRegI dst, memory mem) %{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125);
++  format %{ "MOV    $dst,$mem @ loadRange" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_INT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct storeP(memory mem, mRegP src ) %{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(125);
++  format %{ "sd    $src, $mem #@storeP" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store NULL Pointer, mark word, or other simple pointer constant.
++instruct storeImmP_immP_0(memory mem, immP_0 zero) %{
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(125);
++  format %{ "mov    $mem, $zero #@storeImmP_immP_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Compressed Pointer
++instruct storeN(memory mem, mRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed ptr @ storeN" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2N(memory mem, mRegP src)
++%{
++  match(Set mem (StoreN mem (EncodeP src)));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2N" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeNKlass(memory mem, mRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# compressed klass ptr @ storeNKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeP2NKlass(memory mem, mRegP src)
++%{
++  match(Set mem (StoreNKlass mem (EncodePKlass src)));
++  predicate(Universe::narrow_klass_base() == NULL && Universe::narrow_klass_shift() == 0);
++
++  ins_cost(125); // XXX
++  format %{ "sw    $mem, $src\t# @ storeP2NKlass" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeImmN_immN_0(memory mem, immN_0 zero)
++%{
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "storeN0    zero, $mem\t# compressed ptr" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Byte
++instruct storeB_immB_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreB mem zero));
++
++  format %{ "mov    $mem, zero #@storeB_immB_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB(memory mem, mRegI src) %{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeB_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreB mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sb    $src, $mem #@storeB_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Byte (8bit signed)
++instruct loadB(mRegI dst, memory mem) %{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "lb   $dst, $mem #@loadB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Byte (8bit UNsigned)
++instruct loadUB(mRegI dst, memory mem) %{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUB_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "lbu   $dst, $mem #@loadUB_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16bit signed)
++instruct loadS(mRegI dst, memory mem) %{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(mRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "lb $dst, $mem\t# short -> byte #@loadS2B" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "lh   $dst, $mem #@loadS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Integer Immediate
++instruct storeI_immI_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreI mem zero));
++
++  format %{ "mov    $mem, zero #@storeI_immI_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Store Integer
++instruct storeI(memory mem, mRegI src) %{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeI_convL2I(memory mem, mRegL src) %{
++  match(Set mem (StoreI mem (ConvL2I src)));
++
++  ins_cost(125);
++  format %{ "sw    $mem, $src #@storeI_convL2I" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem) %{
++  match(Set dst (LoadF mem));
++
++  ins_cost(150);
++  format %{ "loadF $dst, $mem #@loadF" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_FLOAT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadConP_general(mRegP dst, immP src) %{
++  match(Set dst src);
++
++  ins_cost(120);
++  format %{ "li   $dst, $src #@loadConP_general" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    long* value = (long*)$src$$constant;
++
++    if($src->constant_reloc() == relocInfo::metadata_type){
++      int klass_index = __ oop_recorder()->find_index((Klass*)value);
++      RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if($src->constant_reloc() == relocInfo::oop_type){
++      int oop_index = __ oop_recorder()->find_index((jobject)value);
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++
++      __ relocate(rspec);
++      __ patchable_set48(dst, (long)value);
++    } else if ($src->constant_reloc() == relocInfo::none) {
++        __ set64(dst, (long)value);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_no_oop_cheap(mRegP dst, immP_no_oop_cheap src) %{
++  match(Set dst src);
++
++  ins_cost(80);
++  format %{ "li    $dst, $src @ loadConP_no_oop_cheap" %}
++
++  ins_encode %{
++    __ set64($dst$$Register, $src$$constant);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct loadConP_poll(mRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "li   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ set64(dst, (jlong)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConP_immP_0(mRegP dst, immP_0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov    $dst, R0\t# ptr" %}
++  ins_encode %{
++     Register dst_reg = $dst$$Register;
++     __ daddu(dst_reg, R0, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN_immN_0(mRegN dst, immN_0 src) %{
++  match(Set dst src);
++  format %{ "move    $dst, R0\t# compressed NULL ptr" %}
++  ins_encode %{
++    __ move($dst$$Register, R0);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConN(mRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed ptr @ loadConN" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_oop(dst, (jobject)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++instruct loadConNKlass(mRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "li    $dst, $src\t# compressed klass ptr @ loadConNKlass" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    __ set_narrow_klass(dst, (Klass*)$src$$constant);
++  %}
++  ins_pipe( ialu_regI_regI ); // XXX
++%}
++
++//FIXME
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(mRegP jump_target, mRegP method_oop) %{
++  match(TailCall jump_target method_oop );
++  ins_cost(300);
++  format %{ "JMP    $jump_target \t# @TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++
++    // RA will be used in generate_forward_exception()
++    __ push(RA);
++
++    __ move(S3, oop);
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException( a0_RegP ex_oop )
++%{
++  match(Set ex_oop (CreateEx));
++
++  // use the following format syntax
++  format %{ "# exception oop is in A0; no code emitted @CreateException" %}
++  ins_encode %{
++    // X86 leaves this function empty
++    __ block_comment("CreateException is empty in MIPS");
++  %}
++  ins_pipe( empty );
++//  ins_pipe( pipe_jump );
++%}
++
++
++/* The mechanism of exception handling is clear now.
++
++- Common try/catch:
++  [stubGenerator_mips.cpp] generate_forward_exception()
++      |- V0, V1 are created
++      |- T9 <= SharedRuntime::exception_handler_for_return_address
++      `- jr T9
++           `- the caller's exception_handler
++                 `- jr OptoRuntime::exception_blob
++                        `- here
++- Rethrow(e.g. 'unwind'):
++  * The callee:
++     |- an exception is triggered during execution
++     `- exits the callee method through RethrowException node
++          |- The callee pushes exception_oop(T0) and exception_pc(RA)
++          `- The callee jumps to OptoRuntime::rethrow_stub()
++  * In OptoRuntime::rethrow_stub:
++     |- The VM calls _rethrow_Java to determine the return address in the caller method
++     `- exits the stub with tailjmpInd
++          |- pops exception_oop(V0) and exception_pc(V1)
++          `- jumps to the return address(usually an exception_handler)
++  * The caller:
++     `- continues processing the exception_blob with V0/V1
++*/
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "JMP    rethrow_stub #@RethrowException" %}
++  ins_encode %{
++    __ block_comment("@ RethrowException");
++
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Branch Instructions --- long offset versions
++
++// Jump Direct
++instruct jmpDir_long(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_long" %}
++
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp_far(*L);
++  %}
++
++  ins_pipe( pipe_jump );
++  //ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_long(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_long(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_long" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label*     L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_long(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_long" %}
++
++  ins_encode %{
++    Label*    L =  $labl$$label;
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        __ bne_long($cr$$Register, R0, *L);
++        break;
++      case 0x02: //not equal
++        __ beq_long($cr$$Register, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_long(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConN2P_zero_long(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConP_long(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_long" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_null_branch_long(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_long" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1, op2, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1, op2, *L);
++      break;
++    default:
++          Unimplemented();
++    }
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct cmpN_reg_branch_long(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_long" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label*    L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      __ beq_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x02: //not_equal
++      __ bne_long(op1_reg, op2_reg, *L);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      __ bne_long(R0, AT, *L);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      __ beq_long(AT, R0, *L);
++      break;
++    default:
++      Unimplemented();
++    }
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_reg_long(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        __ bne_long(AT, R0, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_imm_long(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_long(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, op2, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, op2, *L);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_immI_0_long(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, R0, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, R0);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, R0);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, R0, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm_long(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++       break;
++      default:
++          Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConIU_reg_immI_0_long(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(op1, R0, *L);
++        break;
++      case 0x02: //not_equal
++        __ bne_long(op1, R0, *L);
++        break;
++      case 0x03: //above
++        __ bne_long(R0, op1, *L);
++        break;
++      case 0x04: //above_equal
++        __ beq_long(R0, R0, *L);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        __ beq_long(op1, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConIU_reg_immI16_long(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_long" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label*     L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        __ beq_long(op1, AT, *L);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        __ bne_long(op1, AT, *L);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        __ beq_long(AT, R0, *L);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        __ bne_long(R0, AT, *L);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        __ beq_long(AT, R0, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++
++instruct branchConL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_regL_long(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_long" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_0_long(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_long" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = R0;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x05: // less
++        __ beq_long(R0, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++instruct branchConL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: //not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConUL_regL_immL_long(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_long" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label*   target = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        __ beq_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x02: // not_equal
++        __ bne_long(opr1_reg, opr2_reg, *target);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        __ bne_long(AT, R0, *target);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        __ beq_long(AT, R0, *target);
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_long(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++instruct branchConD_reg_reg_long(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_long" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label* L = $labl$$label;
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1f_long(*L);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        __ bc1t_long(*L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++%}
++
++
++// ============================================================================
++// Branch Instructions -- short offset versions
++
++// Jump Direct
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir_short" %}
++
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ b(L);
++    else
++       __ b(int(0));
++    __ delayed()->nop();
++  %}
++
++    ins_pipe( pipe_jump );
++    ins_pc_relative(1);
++    ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd_short(cmpOp cop, mRegI src1, mRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++instruct  jmpLoopEnd_reg_immI_short(cmpOp cop, mRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = AT;
++    Label     &L = *($labl$$label);
++    int     flag = $cop$$cmpcode;
++
++    __ move(op2, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags_short(cmpOp cop, FlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #mips uses T0 as equivalent to eflag @jmpCon_flags_short" %}
++
++  ins_encode %{
++    Label    &L =  *($labl$$label);
++    switch($cop$$cmpcode) {
++      case 0x01: //equal
++        if (&L)
++          __ bne($cr$$Register, R0, L);
++        else
++          __ bne($cr$$Register, R0, (int)0);
++        break;
++      case 0x02: //not equal
++        if (&L)
++          __ beq($cr$$Register, R0, L);
++        else
++          __ beq($cr$$Register, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++  ins_pc_relative(1);
++  ins_short_branch(1);
++%}
++
++// Conditional jumps
++instruct branchConP_zero_short(cmpOpU cmp, mRegP op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConN2P_zero_short(cmpOpU cmp, mRegN op1, immP_0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag)
++    {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOpU cmp, mRegP op1, mRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++                 __ beq(AT, R0, L);
++        else
++                 __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, mRegN op1, immN_0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1, op2, L);
++      else
++        __ beq(op1, op2, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1, op2, L);
++      else
++        __ bne(op1, op2, (int)0);
++      break;
++    default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOp cmp, mRegN op1, mRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1_reg = $op1$$Register;
++    Register op2_reg = $op2$$Register;
++    Label    &L  = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++    case 0x01: //equal
++      if (&L)
++        __ beq(op1_reg, op2_reg, L);
++      else
++        __ beq(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x02: //not_equal
++      if (&L)
++        __ bne(op1_reg, op2_reg, L);
++      else
++        __ bne(op1_reg, op2_reg, (int)0);
++      break;
++    case 0x03: //above
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x04: //above_equal
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    case 0x05: //below
++      __ sltu(AT, op1_reg, op2_reg);
++      if(&L)
++        __ bne(R0, AT, L);
++      else
++        __ bne(R0, AT, (int)0);
++      break;
++    case 0x06: //below_equal
++      __ sltu(AT, op2_reg, op1_reg);
++      if(&L)
++        __ beq(AT, R0, L);
++      else
++        __ beq(AT, R0, (int)0);
++      break;
++    default:
++      Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOpU cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ bne(AT, R0, L);
++        else
++                __ bne(AT, R0, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, op2);
++        if(&L)
++           __ bne(AT, R0, L);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOpU cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltu(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltu(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, mRegI src1, mRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, op2, L);
++        else
++          __ beq(op1, op2, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, op2, L);
++        else
++          __ bne(op1, op2, (int)0);
++        break;
++      case 0x03: //above
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ slt(AT, op1, op2);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ slt(AT, op1, op2);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ slt(AT, op2, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_immI_0_short(cmpOp cmp, mRegI src1, immI_0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //greater
++        if(&L)
++               __ bgtz(op1, L);
++        else
++               __ bgtz(op1, (int)0);
++        break;
++      case 0x04: //greater_equal
++        if(&L)
++               __ bgez(op1, L);
++        else
++               __ bgez(op1, (int)0);
++        break;
++      case 0x05: //less
++        if(&L)
++                __ bltz(op1, L);
++        else
++                __ bltz(op1, (int)0);
++        break;
++      case 0x06: //less_equal
++        if(&L)
++               __ blez(op1, L);
++        else
++               __ blez(op1, (int)0);
++       break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConI_reg_imm_short(cmpOp cmp, mRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L =  *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ move(AT, val);
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //greater
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++                __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //greater_equal
++        __ slt(AT, op1, AT);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++                __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //less
++        __ slt(AT, op1, AT);
++        if(&L)
++           __ bne(R0, AT, L);
++        else
++           __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //less_equal
++        __ slt(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++       break;
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_immI_0_short(cmpOpU cmp, mRegI src1, immI_0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_immI_0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      case 0x02: //not_equal
++        if (&L)
++          __ bne(op1, R0, L);
++        else
++          __ bne(op1, R0, (int)0);
++        break;
++      case 0x03: //above
++        if(&L)
++          __ bne(R0, op1, L);
++        else
++          __ bne(R0, op1, (int)0);
++        break;
++      case 0x04: //above_equal
++        if(&L)
++          __ beq(R0, R0, L);
++        else
++          __ beq(R0, R0, (int)0);
++        break;
++      case 0x05: //below
++        return;
++        break;
++      case 0x06: //below_equal
++        if(&L)
++          __ beq(op1, R0, L);
++        else
++          __ beq(op1, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++    %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_immI16_short(cmpOpU cmp, mRegI src1, immI16 src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     &L = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        __ move(AT, val);
++        if (&L)
++          __ beq(op1, AT, L);
++        else
++          __ beq(op1, AT, (int)0);
++        break;
++      case 0x02: //not_equal
++        __ move(AT, val);
++        if (&L)
++          __ bne(op1, AT, L);
++        else
++          __ bne(op1, AT, (int)0);
++        break;
++      case 0x03: //above
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x04: //above_equal
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      case 0x05: //below
++        __ sltiu(AT, op1, val);
++        if(&L)
++          __ bne(R0, AT, L);
++        else
++          __ bne(R0, AT, (int)0);
++        break;
++      case 0x06: //below_equal
++        __ move(AT, val);
++        __ sltu(AT, AT, op1);
++        if(&L)
++          __ beq(AT, R0, L);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++
++instruct branchConL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_regL_short(cmpOp cmp, mRegL src1, mRegL src2, label labl) %{
++  match( If cmp (CmpUL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_regL_short" %}
++  ins_cost(250);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = as_Register($src2$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        __ delayed()->nop();
++        break;
++
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x03: //greater
++        if(&target)
++           __ bgtz(opr1_reg, target);
++        else
++           __ bgtz(opr1_reg, (int)0);
++       break;
++
++      case 0x04: //greater_equal
++        if(&target)
++           __ bgez(opr1_reg, target);
++        else
++           __ bgez(opr1_reg, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, R0);
++        if(&target)
++           __ bne(AT, R0, target);
++        else
++           __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        if (&target)
++           __ blez(opr1_reg, target);
++        else
++           __ blez(opr1_reg, int(0));
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_0_short(cmpOp cmp, mRegL src1, immL_0 zero, label labl) %{
++  match(If cmp (CmpUL src1 zero));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConUL_regL_immL_0_short" %}
++  ins_cost(150);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++      case 0x04: // greater_equal
++      case 0x06: // less_equal
++        if (&target)
++           __ beq(opr1_reg, R0, target);
++        else
++           __ beq(opr1_reg, R0, int(0));
++        break;
++
++      case 0x02: // not_equal
++      case 0x03: // greater
++        if(&target)
++           __ bne(opr1_reg, R0, target);
++        else
++           __ bne(opr1_reg, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        if(&target)
++           __ beq(R0, R0, target);
++        else
++           __ beq(R0, R0, (int)0);
++        break;
++
++      default:
++          Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: //equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: //not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: //greater
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: //greater_equal
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: //less
++        __ slt(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: //less_equal
++        __ slt(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++  ins_short_branch(1);
++%}
++
++instruct branchConUL_regL_immL_short(cmpOp cmp, mRegL src1, immL src2, label labl) %{
++  match(If cmp (CmpUL src1 src2));
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConUL_regL_immL_short" %}
++  ins_cost(180);
++
++  ins_encode %{
++    Register opr1_reg = as_Register($src1$$reg);
++    Register opr2_reg = AT;
++    Label   &target = *($labl$$label);
++    int     flag = $cmp$$cmpcode;
++
++    __ set64(opr2_reg, $src2$$constant);
++
++    switch(flag) {
++      case 0x01: // equal
++        if (&target)
++          __ beq(opr1_reg, opr2_reg, target);
++        else
++          __ beq(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x02: // not_equal
++        if(&target)
++          __ bne(opr1_reg, opr2_reg, target);
++        else
++          __ bne(opr1_reg, opr2_reg, (int)0);
++        break;
++
++      case 0x03: // greater
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x04: // greater_equal
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      case 0x05: // less
++        __ sltu(AT, opr1_reg, opr2_reg);
++        if(&target)
++          __ bne(AT, R0, target);
++        else
++          __ bne(AT, R0, (int)0);
++        break;
++
++      case 0x06: // less_equal
++        __ sltu(AT, opr2_reg, opr1_reg);
++        if(&target)
++          __ beq(AT, R0, target);
++        else
++          __ beq(AT, R0, (int)0);
++        break;
++
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_alu_branch);
++  ins_short_branch(1);
++%}
++
++//FIXME
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        __ c_eq_s(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_s(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $src1$$FloatRegister;
++    FloatRegister reg_op2 = $src2$$FloatRegister;
++    Label& L = *($labl$$label);
++    int flag = $cmp$$cmpcode;
++
++    switch(flag) {
++      case 0x01: // equal
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x02: // not_equal
++        // c_ueq_d cannot distinguish NaN from equal. Double.isNaN(Double) is implemented by 'f != f', so the use of c_ueq_d causes bugs.
++        __ c_eq_d(reg_op1, reg_op2);
++        if (&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x03: // greater
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x04: // greater_equal
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1f(L);
++        else
++          __ bc1f((int)0);
++        break;
++      case 0x05: // less
++        __ c_ult_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      case 0x06: // less_equal
++        __ c_ule_d(reg_op1, reg_op2);
++        if(&L)
++          __ bc1t(L);
++        else
++          __ bc1t((int)0);
++        break;
++      default:
++        Unimplemented();
++    }
++    __ delayed()->nop();
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_fpu_branch);
++  ins_short_branch(1);
++%}
++
++// =================== End of branch instructions ==========================
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth) %{
++  match(CallRuntime );
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,runtime #@CallRuntimeDirect" %}
++  ins_encode( Java_To_Runtime( meth ) );
++  ins_pipe( pipe_slow );
++  ins_alignment(16);
++%}
++
++
++
++//------------------------MemBar Instructions-------------------------------
++//Memory barrier flavors
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(400);
++
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock()
++%{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  ins_cost(400);
++
++  format %{ "MEMBAR-release @ membar_release" %}
++
++  ins_encode %{
++    // Attention: DO NOT DELETE THIS GUY!
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct store_fence() %{
++  match(StoreFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ store_fence" %}
++
++  ins_encode %{
++    __ sync();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock()
++%{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ sync();
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  match(MemBarVolatile);
++  predicate(Matcher::post_store_load_barrier(n));
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
++  ins_encode( );
++  ins_pipe(empty);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(400);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    __ sync();
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++instruct castX2P(mRegP dst, mRegL src) %{
++  match(Set dst (CastX2P src));
++  format %{ "castX2P  $dst, $src @ castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_cost(10);
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct castP2X(mRegL dst, mRegP src ) %{
++  match(Set dst (CastP2X src));
++
++  format %{ "mov    $dst, $src\t  #@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++  if(src != dst)
++    __ move(dst, src);
++  %}
++  ins_pipe( ialu_regI_mov );
++%}
++
++instruct MoveF2I_reg_reg(mRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ mfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveI2F_reg_reg(regF dst, mRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ mtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_reg(mRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ dmfc1(dst, src);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, mRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ dmtc1(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//----------Conditional Move---------------------------------------------------
++// Conditional move
++instruct cmovI_cmpI_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpP_reg_reg(mRegI dst, mRegI src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpN_reg_reg(mRegI dst, mRegI src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpU_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpF_reg_reg(mRegP dst, mRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpN_reg_reg(mRegP dst, mRegP src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpP_reg_reg(mRegN dst, mRegN src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpD_reg_reg(mRegP dst, mRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovN_cmpN_reg_reg(mRegN dst, mRegN src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovI_cmpU_reg_reg(mRegI dst, mRegI src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovI_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovI_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpUL_reg_reg(mRegI dst, mRegI src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovP_cmpL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst     = $dst$$Register;
++    Register src     = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpUL_reg_reg(mRegP dst, mRegP src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveP (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovP_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovP_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(mRegI dst, mRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovP_cmpP_reg_reg(mRegP dst, mRegP src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovP_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_cmpI_reg_reg(mRegP dst, mRegP src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovP_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovP_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpP_reg_reg(mRegL dst, mRegL src, mRegP tmp1, mRegP tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpP_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpP_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpU_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovN_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovN_cmpUL_reg_reg(mRegN dst, mRegN src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveN (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovN_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovN_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = $dst$$Register;
++    Register src  = $src$$Register;
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_reg(mRegN dst, mRegN src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop $tmp1,$tmp2\t @cmovN_cmpI_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovN_cmpI_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpU_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpU_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpU_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpF_reg_reg(mRegL dst, mRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpI_reg_reg(mRegL dst, mRegL src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpI_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovL_cmpUL_reg_reg(mRegL dst, mRegL src, mRegL tmp1, mRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveL (Binary cop (CmpUL tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpUL_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpUL_reg_reg"
++         %}
++  ins_encode %{
++    Register opr1 = as_Register($tmp1$$reg);
++    Register opr2 = as_Register($tmp2$$reg);
++    Register dst  = as_Register($dst$$reg);
++    Register src  = as_Register($src$$reg);
++    int     flag  = $cop$$cmpcode;
++
++    __ cmp_cmov(opr1, opr2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovL_cmpN_reg_reg(mRegL dst, mRegL src, mRegN tmp1, mRegN tmp2, cmpOpU cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMPU$cop $tmp1,$tmp2\t @cmovL_cmpN_reg_reg\n\t"
++             "CMOV $dst,$src\t @cmovL_cmpN_reg_reg"
++         %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_signed */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct cmovL_cmpD_reg_reg(mRegL dst, mRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovL_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovL_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpD_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovD_cmpD_reg_reg"
++         %}
++  ins_encode %{
++    FloatRegister reg_op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister reg_op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovF_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, mRegI tmp1, mRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpI_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpI_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, mRegP tmp1, mRegP tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovD_cmpP_reg_reg\n"
++             "\tCMOV  $dst, $src \t @cmovD_cmpP_reg_reg"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++
++    __ cmp_cmov(op1, op2, dst, src, (MacroAssembler::CMCompare) flag, false /* is_float */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//FIXME
++instruct cmovI_cmpF_reg_reg(mRegI dst, mRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovI_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovI_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++  ins_cost(200);
++  format %{
++             "CMP$cop  $tmp1, $tmp2\t  @cmovF_cmpF_reg_reg\n"
++             "\tCMOV  $dst,$src \t @cmovF_cmpF_reg_reg"
++         %}
++
++  ins_encode %{
++    FloatRegister reg_op1 = $tmp1$$FloatRegister;
++    FloatRegister reg_op2 = $tmp2$$FloatRegister;
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++    int flag = $cop$$cmpcode;
++
++    __ cmp_cmov(reg_op1, reg_op2, dst, src, (MacroAssembler::CMCompare) flag, true /* is_float */);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Manifest a CmpL result in an integer register.  Very painful.
++// This is the test to avoid.
++instruct cmpL3_reg_reg(mRegI dst, mRegL src1, mRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = as_Register($src1$$reg);
++    Register opr2 = as_Register($src2$$reg);
++    Register dst  = as_Register($dst$$reg);
++
++    __ slt(AT, opr1, opr2);
++    __ slt(dst, opr2, opr1);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(mRegI dst, regF src1, regF src2) %{
++  match(Set dst (CmpF3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_s(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_s(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(mRegI dst, regD src1, regD src2) %{
++  match(Set dst (CmpD3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    __ ori(dst, R0, 1);
++    __ ori(AT, R0, 1);
++    __ c_olt_d(src2, src1);
++    __ movf(dst, R0);
++    __ c_ult_d(src1, src2);
++    __ movf(AT, R0);
++    __ subu(dst, dst, AT);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct clear_array(mRegL cnt, mRegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register num  = $cnt$$Register;
++    Label Loop, done;
++
++    __ beq(num, R0, done);
++    __ delayed()->daddu(AT, base, R0);
++
++    __ move(T9, num);  /* T9 = words */
++
++    __ bind(Loop);
++    __ sd(R0, AT, 0);
++    __ daddiu(T9, T9, -1);
++    __ bne(T9, R0, Loop);
++    __ delayed()->daddiu(AT, AT, wordSize);
++
++    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare char[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareLU(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareLU" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::LU);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct string_compareUL(a4_RegP str1, mA5RegI cnt1, a6_RegP str2,  mA7RegI cnt2, no_Ax_mRegI result) %{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2);
++
++  format %{ "String Compare byte[] $str1[len: $cnt1], $str2[len: $cnt2] -> $result @ string_compareUL" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      StrIntrinsicNode::UL);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// intrinsic optimization
++instruct string_equals(a4_RegP str1, a5_RegP str2, mA6RegI cnt, mA7RegI temp, no_Ax_mRegI result) %{
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL temp);
++
++  format %{ "String Equal $str1, $str2, len:$cnt  tmp:$temp -> $result @ string_equals" %}
++  ins_encode %{
++    __ arrays_equals($str1$$Register, $str2$$Register,
++                     $cnt$$Register, $temp$$Register, $result$$Register,
++                     false/* byte */);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++//----------Arithmetic Instructions-------------------------------------------
++//----------Addition Instructions---------------------------------------------
++instruct addI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add   $dst, $src1, $src2 #@addI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ addu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_Reg_imm(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "add    $dst, $src1, $src2 #@addI_Reg_imm" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++    if(Assembler::is_simm16(imm)) {
++       __ addiu32(dst, src1, imm);
++    } else {
++       __ move(AT, imm);
++       __ addu32(dst, src1, AT);
++    }
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg(mRegP dst, mRegP src1, mRegL src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_reg_convI2L(mRegP dst, mRegP src1, mRegI src2) %{
++  match(Set dst (AddP src1 (ConvI2L src2)));
++
++  format %{ "dadd    $dst, $src1, $src2 #@addP_reg_reg_convI2L" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ daddu(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addP_reg_imm(mRegP dst, mRegP src1,  immL16 src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "daddi   $dst, $src1, $src2 #@addP_reg_imm" %}
++  ins_encode %{
++    Register  src1 = $src1$$Register;
++    long      src2 = $src2$$constant;
++    Register  dst  = $dst$$Register;
++
++    __ daddiu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++// Add Long Register with Register
++instruct addL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AddL src1 src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_imm(mRegL dst, mRegL src1, immL16 src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_imm(mRegL dst, mRegI src1, immL16 src2)
++%{
++  match(Set dst (AddL (ConvI2L src1) src2));
++
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_imm " %}
++  ins_encode %{
++    Register dst_reg  = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    int      src2_imm = $src2$$constant;
++
++    __ daddiu(dst_reg, src1_reg, src2_imm);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (AddL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_Reg\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (AddL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_RegI2L_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AddL src1 (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "ADD    $dst, $src1, $src2 #@addL_Reg_RegI2L\t" %}
++
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ daddu(dst_reg, src1_reg, src2_reg);
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++//----------Abs Instructions-------------------------------------------
++
++// Integer Absolute Instructions
++instruct absI_rReg(mRegI dst, mRegI src)
++%{
++  match(Set dst (AbsI src));
++  effect(TEMP dst);
++  format %{ "AbsI $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ sra(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu32(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Long Absolute Instructions
++instruct absL_rReg(mRegL dst, mRegL src)
++%{
++  match(Set dst (AbsL src));
++  effect(TEMP dst);
++  format %{ "AbsL $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dsra32(AT, src, 31);
++    __ xorr(dst, src, AT);
++    __ subu(dst, dst, AT);
++  %}
++
++  ins_pipe(ialu_regL_regL);
++%}
++
++//----------Subtraction Instructions-------------------------------------------
++// Integer Subtraction Instructions
++instruct subI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(100);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ subu32(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subI_Reg_immI_M32767_32768(mRegI dst, mRegI src1,  immI_M32767_32768 src2) %{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subI_Reg_immI_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ addiu32(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negI_Reg(mRegI dst, immI_0 zero,  mRegI src) %{
++  match(Set dst (SubI zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negI_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu32(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct negL_Reg(mRegL dst, immL_0 zero,  mRegL src) %{
++  match(Set dst (SubL zero src));
++  ins_cost(80);
++
++  format %{ "neg    $dst, $src #@negL_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subu(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct subL_Reg_immL_M32767_32768(mRegL dst, mRegL src1,  immL_M32767_32768 src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(80);
++
++  format %{ "sub    $dst, $src1, $src2 #@subL_Reg_immL_M32767_32768" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    __ daddiu(dst, src1, -1 * $src2$$constant);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Subtract Long Register with Register.
++instruct subL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (SubL src1 src2));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_Reg_RegI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (SubL src1 (ConvI2L src2)));
++  ins_cost(100);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_Reg_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_Reg(mRegL dst, mRegI src1, mRegL src2) %{
++  match(Set dst (SubL (ConvI2L src1) src2));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_Reg" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct subL_RegI2L_RegI2L(mRegL dst, mRegI src1, mRegI src2) %{
++  match(Set dst (SubL (ConvI2L src1) (ConvI2L src2)));
++  ins_cost(200);
++  format %{ "SubL    $dst, $src1, $src2 @ subL_RegI2L_RegI2L" %}
++  ins_encode %{
++    Register dst  = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    __ subu(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Integer MOD with Register
++instruct modI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (ModI src1 src2));
++  ins_cost(300);
++  format %{ "modi   $dst, $src1, $src2 @ modI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    //if (UseLEXT1) {
++    if (0) {
++      // Experiments show that gsmod is slower that div+mfhi.
++      // So I just disable it here.
++      __ gsmod(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++      __ mfhi(dst);
++    }
++  %}
++
++  //ins_pipe( ialu_mod );
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (ModL src1 src2));
++  format %{ "modL  $dst, $src1, $src2 @modL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmod(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mfhi(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mul   $dst, $src1, $src2 @ mulI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mul(dst, src1, src2);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct maddI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2, mRegI src3) %{
++  match(Set dst (AddI (MulI src1 src2) src3));
++
++  ins_cost(999);
++  format %{ "madd   $dst, $src1 * $src2 + $src3 #@maddI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register src3 = $src3$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mtlo(src3);
++     __ madd(src1, src2);
++     __ mflo(dst);
++  %}
++  ins_pipe( ialu_mult );
++%}
++
++instruct divI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (DivI src1 src2));
++
++  ins_cost(300);
++  format %{ "div   $dst, $src1, $src2 @ divI_Reg_Reg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++    // In MIPS, div does not cause exception.
++    //   We must trap an exception manually.
++    __ teq(R0, src2, 0x7);
++
++    if (UseLEXT1) {
++      __ gsdiv(dst, src1, src2);
++    } else {
++      __ div(src1, src2);
++
++      __ nop();
++      __ nop();
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( ialu_mod );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (MulL src1 src2));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulL_reg_regI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (MulL src1 (ConvI2L src2)));
++  format %{ "mulL  $dst, $src1, $src2 @mulL_reg_regI2L" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsdmult(dst, op1, op2);
++    } else {
++      __ dmult(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divL_reg_reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (DivL src1 src2));
++  format %{ "divL  $dst, $src1, $src2 @divL_reg_reg" %}
++
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    if (UseLEXT1) {
++      __ gsddiv(dst, op1, op2);
++    } else {
++      __ ddiv(op1, op2);
++      __ mflo(dst);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ neg_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ abs_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// src1 * src2 + src3
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary src1 src2)));
++
++  format %{ "madd_s  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 + src3
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary src1 src2)));
++
++  format %{ "madd_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ madd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary src1 src2)));
++
++  format %{ "msub_s $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// src1 * src2 - src3
++instruct msubD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary src1 src2)));
++
++  format %{ "msub_d  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ msub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++              as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddF_reg_reg(regF dst, regF src1, regF src2, regF src3, immF_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF (NegF src3) (Binary (NegF src1) src2)));
++  match(Set dst (FmaF (NegF src3) (Binary src1 (NegF src2))));
++
++  format %{ "nmadds  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 - src3
++instruct mnaddD_reg_reg(regD dst, regD src1, regD src2, regD src3, immD_0 zero) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD (NegD src3) (Binary (NegD src1) src2)));
++  match(Set dst (FmaD (NegD src3) (Binary src1 (NegD src2))));
++
++  format %{ "nmaddd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmadd_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaF src3 (Binary (NegF src1) src2)));
++  match(Set dst (FmaF src3 (Binary src1 (NegF src2))));
++
++  format %{ "nmsubs  $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_s(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++
++// -src1 * src2 + src3
++instruct mnsubD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  predicate(UseFMA);
++  match(Set dst (FmaD src3 (Binary (NegD src1) src2)));
++  match(Set dst (FmaD src3 (Binary src1 (NegD src2))));
++
++  format %{ "nmsubd   $dst, $src3, $src2, $src1" %}
++
++  ins_encode %{
++    __ nmsub_d(as_FloatRegister($dst$$reg), as_FloatRegister($src3$$reg),
++               as_FloatRegister($src2$$reg), as_FloatRegister($src1$$reg));
++  %}
++
++  ins_pipe(fpu_regF_regF);
++%}
++//----------------------------------Logical Instructions----------------------
++//__________________________________Integer Logical Instructions-------------
++
++//And Instuctions
++// And Register with Immediate
++instruct andI_Reg_immI(mRegI dst, mRegI src1,  immI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_immI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ move(AT, val);
++    __ andr(dst, src, AT);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andI_Reg_immI_nonneg_mask(mRegI dst, mRegI src1,  immI_nonneg_mask mask) %{
++  match(Set dst (AndI src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andI_Reg_immI_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_nonneg_mask(mRegL dst, mRegL src1,  immL_nonneg_mask mask) %{
++  match(Set dst (AndL src1 mask));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $mask #@andL_Reg_immL_nonneg_mask" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int     size = Assembler::is_jlong_mask($mask$$constant);
++
++    __ dext(dst, src, 0, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_imm_0_65535(mRegI dst, mRegI src1,  immI_0_65535 src2) %{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorI_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorI_Reg_immI_M1(mRegI dst, mRegI src1,  immI_M1 M1) %{
++  match(Set dst (XorI src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorI_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL2I_Reg_immI_M1(mRegI dst, mRegL src1,  immI_M1 M1) %{
++  match(Set dst (XorI (ConvL2I src1) M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL2I_Reg_immI_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct xorL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xori  $dst, $src1, $src2 #@xorL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++       __ xori(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct xorL_Reg_immL_M1(mRegL dst, mRegL src1,  immL_M1 M1) %{
++  match(Set dst (XorL src1 M1));
++  predicate(UseLEXT3);
++  ins_cost(60);
++
++  format %{ "xor  $dst, $src1, $M1 #@xorL_Reg_immL_M1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++
++    __ gsorn(dst, R0, src);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct lbu_and_lmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI mask (LoadB mem)));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_lmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct lbu_and_rmask(mRegI dst, memory mem,  immI_255 mask) %{
++  match(Set dst (AndI (LoadB mem) mask));
++  ins_cost(60);
++
++  format %{ "lhu  $dst, $mem #@lbu_and_rmask" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_BYTE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct andI_Reg_Reg(mRegI dst, mRegI src1,  mRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_Reg_nReg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI src1 (XorI src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornI_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andnI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (AndI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct ornI_nReg_Reg(mRegI dst, mRegI src1,  mRegI src2, immI_M1 M1) %{
++  match(Set dst (OrI (XorI src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornI_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Long Register with Register
++instruct andL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (AndL src1 src2));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_Reg_convI2L(mRegL dst, mRegL src1, mRegI src2) %{
++  match(Set dst (AndL src1 (ConvI2L src2)));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ andr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_65535(mRegL dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL2I_Reg_imm_0_65535(mRegI dst, mRegL src1,  immL_0_65535 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_65535" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++       __ andi(dst, src, val);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct andnL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src1, $src2 #@andnL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_Reg_nReg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL src1 (XorL src2 M1)));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src1, $src2 #@ornL_Reg_nReg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct andnL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (AndL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "andn   $dst, $src2, $src1 #@andnL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsandn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/*
++instruct ornL_nReg_Reg(mRegL dst, mRegL src1,  mRegL src2, immL_M1 M1) %{
++  match(Set dst (OrL (XorL src1 M1) src2));
++  predicate(UseLEXT3);
++
++  format %{ "orn    $dst, $src2, $src1 #@ornL_nReg_Reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    __ gsorn(dst, src2, src1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++instruct andL_Reg_immL_M8(mRegL dst,  immL_M8 M8) %{
++  match(Set dst (AndL dst M8));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M8 #@andL_Reg_immL_M8" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 3);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M5(mRegL dst,  immL_M5 M5) %{
++  match(Set dst (AndL dst M5));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M5 #@andL_Reg_immL_M5" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 2, 1);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M7(mRegL dst,  immL_M7 M7) %{
++  match(Set dst (AndL dst M7));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M7 #@andL_Reg_immL_M7" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 1, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M4(mRegL dst,  immL_M4 M4) %{
++  match(Set dst (AndL dst M4));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M4 #@andL_Reg_immL_M4" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 0, 2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct andL_Reg_immL_M121(mRegL dst,  immL_M121 M121) %{
++  match(Set dst (AndL dst M121));
++  ins_cost(60);
++
++  format %{ "and  $dst, $dst, $M121 #@andL_Reg_immL_M121" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ dins(dst, R0, 3, 4);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Long Register with Register
++instruct orL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (OrL src1 src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct orL_Reg_P2XReg(mRegL dst, mRegP src1, mRegL src2) %{
++  match(Set dst (OrL (CastP2X src1) src2));
++  format %{ "OR    $dst, $src1, $src2 @ orL_Reg_P2XReg\t" %}
++  ins_encode %{
++    Register dst_reg  = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++
++    __ orr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Long Register with Register
++instruct xorL_Reg_Reg(mRegL dst, mRegL src1, mRegL src2) %{
++  match(Set dst (XorL src1 src2));
++  format %{ "XOR    $dst, $src1, $src2 @ xorL_Reg_Reg\t" %}
++  ins_encode %{
++    Register dst_reg = as_Register($dst$$reg);
++    Register src1_reg = as_Register($src1$$reg);
++    Register src2_reg = as_Register($src2$$reg);
++
++    __ xorr(dst_reg, src1_reg, src2_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salL2I_Reg_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salL2I_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct salI_Reg_imm_and_M65536(mRegI dst, mRegI src, immI_16 shift, immI_M65536 mask) %{
++  match(Set dst (AndI (LShiftI src shift) mask));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_imm_and_M65536" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ sll(dst, src, 16);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct land7_2_s(mRegI dst, mRegL src, immL_7 seven, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI (ConvL2I (AndL src seven)) sixteen) sixteen));
++
++  format %{ "andi  $dst, $src, 7\t# @land7_2_s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ andi(dst, src, 7);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(mRegI dst, mRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "i2s  $dst, $src\t# @i2s" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seh(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(mRegI dst, mRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "i2b  $dst, $src\t# @i2b" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ seb(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++instruct salI_RegL2I_imm(mRegI dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftI (ConvL2I src) shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_RegL2I_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++
++    __ sll(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Shift Left by 8-bit immediate
++instruct salI_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "SHL    $dst, $src, $shift #@salI_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shamt = $shift$$Register;
++    __ sllv(dst, src, shamt);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++// Shift Left Long
++instruct salL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct salL_RegI2L_imm(mRegL dst, mRegI src, immI8 shift) %{
++  match(Set dst (LShiftL (ConvI2L src) shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_RegI2L_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = $shift$$constant;
++
++    if (__ is_simm(shamt, 5))
++        __ dsll(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsll(dst_reg, src_reg, sa);
++      } else {
++        __ dsll32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Left Long
++instruct salL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (LShiftL src shift));
++  ins_cost(100);
++  format %{ "salL    $dst, $src, $shift @ salL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsllv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long
++instruct sarL_Reg_imm(mRegL dst, mRegL src, immI8 shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_imm" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt = ($shift$$constant & 0x3f);
++    if (__  is_simm(shamt, 5))
++      __ dsra(dst_reg, src_reg, shamt);
++    else {
++      int sa = Assembler::low(shamt, 6);
++      if (sa < 32) {
++        __ dsra(dst_reg, src_reg, sa);
++      } else {
++        __ dsra32(dst_reg, src_reg, sa - 32);
++      }
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct sarL2I_Reg_immI_32_63(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (RShiftL src shift)));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL2I_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int      shamt   = $shift$$constant;
++
++    __ dsra32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long arithmetically
++instruct sarL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (RShiftL src shift));
++  ins_cost(100);
++  format %{ "sarL    $dst, $src, $shift @ sarL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrav(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Shift Right Long logically
++instruct slrL_Reg_Reg(mRegL dst, mRegL src, mRegI shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(100);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_Reg" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++
++    __ dsrlv(dst_reg, src_reg, $shift$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_0_31_and_max_int(mRegI dst, mRegL src, immI_0_31 shift, immI_MaxI max_int) %{
++  match(Set dst (AndI (ConvL2I (URShiftL src shift)) max_int));
++  ins_cost(80);
++  format %{ "dext    $dst, $src, $shift, 31 @ slrL_Reg_immI_0_31_and_max_int" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dext(dst_reg, src_reg, shamt, 31);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_0_31(mRegL dst, mRegP src, immI_0_31 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_0_31" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl(dst_reg, src_reg, shamt);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_Reg_immI_convL2I(mRegI dst, mRegL src, immI_32_63 shift) %{
++  match(Set dst (ConvL2I (URShiftL src shift)));
++  predicate(n->in(1)->in(2)->get_int() > 32);
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_Reg_immI_convL2I" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct slrL_P2XReg_immI_32_63(mRegL dst, mRegP src, immI_32_63 shift) %{
++  match(Set dst (URShiftL (CastP2X src) shift));
++  ins_cost(80);
++  format %{ "slrL    $dst, $src, $shift @ slrL_P2XReg_immI_32_63" %}
++  ins_encode %{
++    Register src_reg = as_Register($src$$reg);
++    Register dst_reg = as_Register($dst$$reg);
++    int        shamt = $shift$$constant;
++
++    __ dsrl32(dst_reg, src_reg, shamt - 32);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (XorI src1 src2));
++
++  format %{ "XOR    $dst, $src1, $src2 #@xorI_Reg_Reg" %}
++
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ xorr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Or Instructions
++instruct orI_Reg_imm(mRegI dst, mRegI src1, immI_0_32767 src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_imm" %}
++  ins_encode %{
++    __ ori($dst$$Register, $src1$$Register, $src2$$constant);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++// Or Register with Register
++instruct orI_Reg_Reg(mRegI dst, mRegI src1, mRegI src2) %{
++  match(Set dst (OrI src1 src2));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_Reg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rotI_shr_logical_Reg(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift, immI_1 one) %{
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI (AndI src one) lshift)));
++  predicate(32 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int())));
++
++  format %{ "rotr     $dst, $src, 1 ...\n\t"
++            "srl      $dst, $dst, ($rshift-1) @ rotI_shr_logical_Reg" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    int     rshift = $rshift$$constant;
++
++    __ rotr(dst, src, 1);
++    if (rshift - 1) {
++      __ srl(dst, dst, rshift - 1);
++    }
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct orI_Reg_castP2X(mRegL dst, mRegL src1, mRegP src2) %{
++  match(Set dst (OrI src1 (CastP2X src2)));
++
++  format %{ "OR     $dst, $src1, $src2 #@orI_Reg_castP2X" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ orr(dst, src1, src2);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right by 8-bit immediate
++instruct shr_logical_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (URShiftI src shift));
++  //effect(KILL cr);
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++
++    __ srl(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_logical_Reg_imm_nonneg_mask(mRegI dst, mRegI src, immI_0_31 shift, immI_nonneg_mask mask) %{
++  match(Set dst (AndI (URShiftI src shift) mask));
++
++  format %{ "ext    $dst, $src, $shift, one-bits($mask) #@shr_logical_Reg_imm_nonneg_mask" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int      pos = $shift$$constant;
++    int     size = Assembler::is_int_mask($mask$$constant);
++
++    __ ext(dst, src, pos, size);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolI_Reg_immI_0_31(mRegI dst, immI_0_31 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $dst, $rshift #@rolI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, dst, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_0_31(mRegL dst, mRegL src, immI_32_63 lshift, immI_0_31 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rolL_Reg_immI_32_63(mRegL dst, mRegL src, immI_0_31 lshift, immI_32_63 rshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (LShiftL src lshift) (URShiftL src rshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rolL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorI_Reg_immI_0_31(mRegI dst, mRegI src, immI_0_31 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI src rshift) (LShiftI src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorI_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ rotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_0_31(mRegL dst, mRegL src, immI_0_31 rshift, immI_32_63 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_0_31" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr(dst, src, sa);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct rorL_Reg_immI_32_63(mRegL dst, mRegL src, immI_32_63 rshift, immI_0_31 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++
++  ins_cost(100);
++  format %{ "rotr    $dst, $src, $rshift #@rorL_Reg_immI_32_63" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      sa  = $rshift$$constant;
++
++    __ drotr32(dst, src, sa - 32);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// Logical Shift Right
++instruct shr_logical_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "SRL    $dst, $src, $shift #@shr_logical_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srlv(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct shr_arith_Reg_imm(mRegI dst, mRegI src, immI8 shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_imm" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shift = $shift$$constant;
++    __ sra(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct shr_arith_Reg_Reg(mRegI dst, mRegI src, mRegI shift) %{
++  match(Set dst (RShiftI src shift));
++ // effect(KILL cr);
++
++  format %{ "SRA    $dst, $src, $shift #@shr_arith_Reg_Reg" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    __ srav(dst, src, shift);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++//----------Convert Int to Boolean---------------------------------------------
++
++instruct convI2B(mRegI dst, mRegI src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convI2B    $dst, $src @ convI2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct convI2L_reg( mRegL dst, mRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "SLL    $dst, $src @ convI2L_reg\t"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ sll(dst, src, 0);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convL2I_reg( mRegI dst, mRegL src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "MOV    $dst, $src @ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2I2L_reg( mRegL dst, mRegL src ) %{
++  match(Set dst (ConvI2L (ConvL2I src)));
++
++  format %{ "sll    $dst, $src, 0 @ convL2I2L_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ sll(dst, src, 0);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2D_reg( regD dst, mRegL src ) %{
++  match(Set dst (ConvL2D src));
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(src, dst);
++    __ cvt_d_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_fast( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(150);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_fast" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label Done;
++
++    __ trunc_l_d(F30, src);
++    // max_long:    0x7fffffffffffffff
++    // __ set64(AT, 0x7fffffffffffffff);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->daddiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_slow( mRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(250);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    Label L;
++
++    __ c_un_d(src, src);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2I_reg_fast( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(150);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_w_s(F30, fval);
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++
++instruct convF2I_reg_slow( mRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(250);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_slow" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dreg, R0);
++
++    __ trunc_w_s(F30, fval);
++
++    /* Call SharedRuntime:f2i() to do valid convention */
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dreg, F30);
++
++    __ mov_s(F12, fval);
++
++    //This bug was found when running ezDS's control-panel.
++    //    J 982 C2 javax.swing.text.BoxView.layoutMajorAxis(II[I[I)V (283 bytes) @ 0x000000555c46aa74
++    //
++    // An interger array index has been assigned to V0, and then changed from 1 to Integer.MAX_VALUE.
++    // V0 is corrupted during call_VM_leaf(), and should be preserved.
++    //
++    __ push(fval);
++    if(dreg != V0) {
++      __ push(V0);
++    }
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
++    if(dreg != V0) {
++      __ move(dreg, V0);
++      __ pop(V0);
++    }
++    __ pop(fval);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_fast( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(150);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_fast" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ trunc_l_s(F30, fval);
++    __ daddiu(AT, R0, -1);
++    __ dsrl(AT, AT, 1);
++    __ dmfc1(dreg, F30);
++    __ c_un_s(fval, fval);    //NaN?
++    __ movt(dreg, R0);
++
++    __ bne(AT, dreg, L);
++    __ delayed()->lui(T9, 0x8000);
++
++    __ mfc1(AT, fval);
++    __ andr(AT, AT, T9);
++
++    __ dsll32(T9, T9, 0);
++    __ movn(dreg, T9, AT);
++
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_slow( mRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(250);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister fval = $src$$FloatRegister;
++    Label L;
++
++    __ c_un_s(fval, fval);    //NaN?
++    __ bc1t(L);
++    __ delayed();
++    __ move(dst, R0);
++
++    __ trunc_l_s(F30, fval);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->dmfc1(dst, F30);
++
++    __ mov_s(F12, fval);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
++    __ move(dst, V0);
++    __ bind(L);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convL2F_reg( regF dst, mRegL src ) %{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    Label L;
++
++    __ dmtc1(src, dst);
++    __ cvt_s_l(dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg( regF dst, mRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(src, dst);
++    __ cvt_s_w(dst, dst);
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct cmpLTMask_immI_0( mRegI dst, mRegI p, immI_0 zero ) %{
++  match(Set dst (CmpLTMask p zero));
++  ins_cost(100);
++
++  format %{ "sra    $dst, $p, 31 @ cmpLTMask_immI_0" %}
++    ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ sra(dst, src, 31);
++    %}
++    ins_pipe( pipe_slow );
++%}
++
++
++instruct cmpLTMask( mRegI dst, mRegI p, mRegI q ) %{
++  match(Set dst (CmpLTMask p q));
++  ins_cost(400);
++
++  format %{ "cmpLTMask    $dst, $p, $q @ cmpLTMask" %}
++  ins_encode %{
++    Register p   = $p$$Register;
++    Register q   = $q$$Register;
++    Register dst = $dst$$Register;
++
++    __ slt(dst, p, q);
++    __ subu(dst, R0, dst);
++    %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convP2B(mRegI dst, mRegP src) %{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "convP2B    $dst, $src @ convP2B"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if (dst != src) {
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, src);
++    } else {
++      __ move(AT, src);
++      __ daddiu(dst, R0, 1);
++      __ movz(dst, R0, AT);
++    }
++  %}
++
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++instruct convI2D_reg_reg(regD dst, mRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ mtc1(src, dst);
++    __ cvt_d_w(dst, dst);
++    %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
++instruct convD2I_reg_reg_fast( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(150);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++
++    Label Done;
++
++    __ trunc_w_d(F30, src);
++    // max_int: 2147483647
++    __ move(AT, 0x7fffffff);
++    __ mfc1(dst, F30);
++
++    __ bne(dst, AT, Done);
++    __ delayed()->mtc1(R0, F30);
++
++    __ cvt_d_w(F30, F30);
++    __ c_ult_d(src, F30);
++    __ bc1f(Done);
++    __ delayed()->addiu(T9, R0, -1);
++
++    __ c_un_d(src, src);    //NaN?
++    __ subu32(dst, T9, AT);
++    __ movt(dst, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2I_reg_reg_slow( mRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(250);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %}
++
++  ins_encode %{
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++    Label L;
++
++    __ trunc_w_d(F30, src);
++    __ cfc1(AT, 31);
++    __ li(T9, 0x10000);
++    __ andr(AT, AT, T9);
++    __ beq(AT, R0, L);
++    __ delayed()->mfc1(dst, F30);
++
++    __ mov_d(F12, src);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
++    __ move(dst, V0);
++    __ bind(L);
++
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ encode_heap_oop(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeHeapOop_not_null(mRegN dst, mRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeHeapOop_not_null" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop $dst,$src @ decodeHeapOop" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++
++    __ decode_heap_oop(d, s);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeHeapOop_not_null(mRegP dst, mRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src @ decodeHeapOop_not_null" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct encodeKlass_not_null(mRegN dst, mRegP src) %{
++  match(Set dst (EncodePKlass src));
++  format %{ "encode_heap_oop_not_null $dst,$src @ encodeKlass_not_null" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct decodeKlass_not_null(mRegP dst, mRegN src) %{
++  match(Set dst (DecodeNKlass src));
++  format %{ "decode_heap_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++//FIXME
++instruct tlsLoadP(mRegP dst) %{
++  match(Set dst (ThreadLocal));
++
++  ins_cost(0);
++  format %{ " get_thread in $dst #@tlsLoadP" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++#ifdef OPT_THREAD
++    __ move(dst, TREG);
++#else
++    __ get_thread(dst);
++#endif
++  %}
++
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct checkCastPP( mRegP dst ) %{
++  match(Set dst (CheckCastPP dst));
++
++  format %{ "#checkcastPP of $dst (empty encoding) #@chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(mRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "# castPP of $dst" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII( mRegI dst ) %{
++  match(Set dst (CastII dst));
++  format %{ "#castII of $dst  empty encoding" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++instruct Ret() %{
++  match(Return);
++  format %{ "RET #@Ret" %}
++
++  ins_encode %{
++   __ jr(RA);
++   __ delayed()->nop();
++  %}
++
++  ins_pipe( pipe_jump );
++%}
++
++/*
++// For Loongson CPUs, jr seems too slow, so this rule shouldn't be imported.
++instruct jumpXtnd(mRegL switch_val) %{
++  match(Jump switch_val);
++
++  ins_cost(350);
++
++  format %{  "load   T9 <-- [$constanttablebase, $switch_val, $constantoffset] @ jumpXtnd\n\t"
++             "jr     T9\n\t"
++             "nop" %}
++  ins_encode %{
++    Register table_base = $constanttablebase;
++    int      con_offset = $constantoffset;
++    Register switch_reg = $switch_val$$Register;
++
++    if (UseLEXT1) {
++       if (Assembler::is_simm(con_offset, 8)) {
++         __ gsldx(T9, table_base, switch_reg, con_offset);
++       } else if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ gsldx(T9, AT, T9, 0);
++       }
++    } else {
++       if (Assembler::is_simm16(con_offset)) {
++         __ daddu(T9, table_base, switch_reg);
++         __ ld(T9, T9, con_offset);
++       } else {
++         __ move(T9, con_offset);
++         __ daddu(AT, table_base, switch_reg);
++         __ daddu(AT, T9, AT);
++         __ ld(T9, AT, 0);
++       }
++    }
++
++    __ jr(T9);
++    __ delayed()->nop();
++
++  %}
++  ins_pipe(pipe_jump);
++%}
++*/
++
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++// TailJump is used in only one place, the rethrow_Java stub (fancy_jump=2).
++// ex_oop (Exception Oop) is needed in %o0 at the jump. As there would be a
++// "restore" before this instruction (in Epilogue), we need to materialize it
++// in %i0.
++//FIXME
++instruct tailjmpInd(mRegP jump_target,mRegP ex_oop) %{
++  match( TailJump jump_target ex_oop );
++  ins_cost(200);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop #@tailjmpInd" %}
++  ins_encode %{
++    Register target = $jump_target$$Register;
++
++    // V0, V1 are indicated in:
++    //     [stubGenerator_mips.cpp] generate_forward_exception()
++    //     [runtime_mips.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register oop  = $ex_oop$$Register;
++    Register exception_oop = V0;
++    Register exception_pc = V1;
++
++    __ move(exception_pc, RA);
++    __ move(exception_oop, oop);
++
++    __ jr(target);
++    __ delayed()->nop();
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL,static #@CallStaticJavaDirect " %}
++  ins_encode( Java_Static_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth) %{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{"MOV IC_Klass, #Universe::non_oop_word()\n\t"
++           "CallDynamic @ CallDynamicJavaDirect" %}
++  ins_encode( Java_Dynamic_Call( meth ) );
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++instruct CallLeafNoFPDirect(method meth) %{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF_NOFP,runtime " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Prefetch instructions for allocation.
++
++instruct prefetchAllocNTA( memory mem ) %{
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++  format %{ "pref $mem\t# Prefetch allocation @ prefetchAllocNTA" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_BYTE);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth) %{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "CALL_LEAF,runtime #@CallLeafDirect " %}
++  ins_encode(Java_To_Runtime(meth));
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++  ins_alignment(16);
++%}
++
++// Load Char (16bit unsigned)
++instruct loadUS(mRegI dst, memory mem) %{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadC" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct loadUS_convI2L(mRegL dst, memory mem) %{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "loadUS  $dst,$mem @ loadUS_convI2L" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Store Char (16bit unsigned)
++instruct storeC(memory mem, mRegI src) %{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125);
++  format %{ "storeC  $src, $mem @ storeC" %}
++  ins_encode %{
++    __ loadstore_enc($src$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_CHAR);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeC_0(memory mem, immI_0 zero) %{
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125);
++  format %{ "storeC  $zero, $mem @ storeC_0" %}
++  ins_encode %{
++     __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_SHORT);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct loadConF_immF_0(regF dst, immF_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConF_immF_0\n"%}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++
++    __ mtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConF(regF dst, immF src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "lwc1  $dst, $constantoffset[$constanttablebase] # load FLOAT $src from table @ loadConF" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ lwc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gslwxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ lwc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++
++instruct loadConD_immD_0(regD dst, immD_0 zero) %{
++  match(Set dst zero);
++  ins_cost(100);
++
++  format %{ "mov  $dst, zero @ loadConD_immD_0"%}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ dmtc1(R0, dst);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++instruct loadConD(regD dst, immD src) %{
++  match(Set dst src);
++  ins_cost(125);
++
++  format %{ "ldc1  $dst, $constantoffset[$constanttablebase] # load DOUBLE $src from table @ loadConD" %}
++  ins_encode %{
++    int con_offset = $constantoffset($src);
++
++    if (Assembler::is_simm16(con_offset)) {
++      __ ldc1($dst$$FloatRegister, $constanttablebase, con_offset);
++    } else {
++      __ set64(AT, con_offset);
++      if (UseLEXT1) {
++        __ gsldxc1($dst$$FloatRegister, $constanttablebase, AT, 0);
++      } else {
++        __ daddu(AT, $constanttablebase, AT);
++        __ ldc1($dst$$FloatRegister, AT, 0);
++      }
++    }
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store register Float value (it is faster than store from FPU register)
++instruct storeF_reg( memory mem, regF src) %{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeF_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_FLOAT);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeF_immF_0( memory mem, immF_0 zero) %{
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeF_immF_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_INT);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Load Double
++instruct loadD(regD dst, memory mem) %{
++  match(Set dst (LoadD mem));
++
++  ins_cost(150);
++  format %{ "loadD   $dst, $mem #@loadD" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++// Load Double - UNaligned
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  format %{ "loadD_unaligned   $dst, $mem #@loadD_unaligned" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++instruct storeD_reg( memory mem, regD src) %{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store   $mem, $src\t# store float @ storeD_reg" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct storeD_immD_0( memory mem, immD_0 zero) %{
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(40);
++  format %{ "store   $mem, zero\t# store float @ storeD_immD_0" %}
++  ins_encode %{
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_LONG);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct loadSSI(mRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lw    $dst, $src\t# int stk @ loadSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSI) !");
++    __ lw($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSI(stackSlotI dst, mRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sw    $dst, $src\t# int stk @ storeSSI" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSI) !");
++    __ sw($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSL(mRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# long stk @ loadSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSL) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSL(stackSlotL dst, mRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# long stk @ storeSSL" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSL) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSP(mRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ld    $dst, $src\t# ptr stk @ loadSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSP) !");
++    __ ld($dst$$Register, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSP(stackSlotP dst, mRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sd    $dst, $src\t# ptr stk @ storeSSP" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSP) !");
++    __ sd($src$$Register, SP, $dst$$disp);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "lwc1   $dst, $src\t# float stk @ loadSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSF) !");
++    __ lwc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "swc1    $dst, $src\t# float stk @ storeSSF" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSF) !");
++    __ swc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldc1   $dst, $src\t# double stk @ loadSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($src$$disp), "disp too long (loadSSD) !");
++    __ ldc1($dst$$FloatRegister, SP, $src$$disp);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "sdc1    $dst, $src\t# double stk @ storeSSD" %}
++  ins_encode %{
++    guarantee( Assembler::is_simm16($dst$$disp), "disp too long (storeSSD) !");
++    __ sdc1($src$$FloatRegister, SP, $dst$$disp);
++  %}
++  ins_pipe(fpu_storeF);
++%}
++
++instruct cmpFastLock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTLOCK $cr <-- $object, $box, $tmp, $scr #@ cmpFastLock" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(FlagsReg cr, mRegP object, mRegP box, mRegI tmp, mRegI scr) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, TEMP scr);
++  ins_cost(300);
++  format %{ "FASTUNLOCK $cr <-- $object, $box, $tmp #@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $cr$$Register, $tmp$$Register, $scr$$Register);
++  %}
++
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++// Store CMS card-mark Immediate 0
++instruct storeImmCM(memory mem, immI_0 zero) %{
++  match(Set mem (StoreCM mem zero));
++
++  ins_cost(150);
++  format %{ "MEMBAR\n\t"
++            "sb   $mem, zero\t! CMS card-mark imm0" %}
++  ins_encode %{
++    __ sync();
++    __ loadstore_enc(R0, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_BYTE);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++// Die now
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    // Here we should emit illtrap !
++
++    __ stop("in ShoudNotReachHere");
++
++  %}
++  ins_pipe( pipe_jump );
++%}
++
++instruct leaP8Narrow(mRegP dst, indOffset8Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# ptr off8narrow @ leaP8Narrow" %}
++  ins_encode %{
++    Register  dst  = $dst$$Register;
++    Register  base = as_Register($mem$$base);
++    int       disp = $mem$$disp;
++
++    __ daddiu(dst, base, disp);
++  %}
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPPosIdxScaleOff8(mRegP dst, basePosIndexScaleOffset8 mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ PosIdxScaleOff8" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++    int       disp  = $mem$$disp;
++
++    if (scale == 0) {
++      __ daddu(AT, base, index);
++      __ daddiu(dst, AT, disp);
++    } else {
++      __ dsll(AT, index, scale);
++      __ daddu(AT, base, AT);
++      __ daddiu(dst, AT, disp);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++instruct leaPIdxScale(mRegP dst, indIndexScale mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "leaq    $dst, $mem\t# @ leaPIdxScale" %}
++  ins_encode %{
++    Register  dst   = $dst$$Register;
++    Register  base  = as_Register($mem$$base);
++    Register  index = as_Register($mem$$index);
++    int       scale = $mem$$scale;
++
++    if (scale == 0) {
++       __ daddu(dst, base, index);
++    } else {
++       __ dsll(AT, index, scale);
++       __ daddu(dst, base, AT);
++    }
++ %}
++
++  ins_pipe( ialu_regI_imm16 );
++%}
++
++
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
++// array for an instance of the superklass.  Set a hidden internal cache on a
++// hit (cache is checked with exposed code in gen_subtype_check()).  Return
++// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
++instruct partialSubtypeCheck( mRegP result, no_T8_mRegP sub, no_T8_mRegP super, mT8RegI tmp ) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=$tmp " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super, tmp) );
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++
++instruct storePConditional(memory heap_top_ptr, mRegP oldval, mRegP newval, FlagsReg cr) %{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "move AT, $newval\n\t"
++            "sc_d $heap_top_ptr, AT\t# (ptr) @storePConditional \n\t"
++            "move $cr, AT\n" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Address addr(as_Register($heap_top_ptr$$base), $heap_top_ptr$$disp);
++
++    int     index = $heap_top_ptr$$index;
++    int     scale = $heap_top_ptr$$scale;
++    int      disp = $heap_top_ptr$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storePConditional: index != 0");
++    } else {
++      __ move(AT, newval);
++      __ scd(AT, addr);
++      __ move($cr$$Register, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// AT flag is set on success, reset otherwise.
++instruct storeIConditional(memory mem, mRegI oldval, mRegI newval, FlagsReg cr) %{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  format %{ "CMPXCHG  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address  addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg32(addr, oldval, newval, cr, true, false, true);
++      } else {
++        __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++        __ move(cr, AT);
++      }
++    }
++%}
++
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, mRegL oldval, mRegL newval, FlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++
++  format %{ "cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode%{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr     = $cr$$Register;
++    Address addr(as_Register($mem$$base), $mem$$disp);
++
++    int     index = $mem$$index;
++    int     scale = $mem$$scale;
++    int      disp = $mem$$disp;
++
++    guarantee(Assembler::is_simm16(disp), "");
++
++    if (index != 0) {
++      __ stop("in storeIConditional: index != 0");
++    } else {
++      if (cr != addr.base() && cr != oldval && cr != newval) {
++        __ cmpxchg(addr, oldval, newval, cr, false, true);
++      } else {
++        __ cmpxchg(addr, oldval, newval, AT, false, true);
++        __ move(cr, AT);
++      }
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++// Implement LoadPLocked. Must be ordered against changes of the memory location
++// by storePConditional.
++instruct loadPLocked(mRegP dst, memory mem) %{
++  match(Set dst (LoadPLocked mem));
++  ins_cost(MEMORY_REF_COST);
++
++  format %{ "lld    $dst, $mem #@loadPLocked\n\t" %}
++  size(12);
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_LINKED_LONG);
++  %}
++  ins_pipe( ialu_loadI );
++%}
++
++
++instruct compareAndSwapI(mRegI res, mRegP mem_ptr, mRegI oldval, mRegI newval) %{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapI" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, true, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, true, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapL(mRegI res, mRegP mem_ptr, mRegL oldval, mRegL newval) %{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapL" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapP(mRegI res, mRegP mem_ptr, mRegP oldval, mRegP newval) %{
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapP" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg(addr, oldval, newval, res, false, true);
++    } else {
++      __ cmpxchg(addr, oldval, newval, AT, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++instruct compareAndSwapN(mRegI res, mRegP mem_ptr, mRegN oldval, mRegN newval) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  format %{ "CMPXCHG $newval, [$mem_ptr], $oldval @ compareAndSwapN" %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++    Address  addr($mem_ptr$$Register, 0);
++
++    if (res != addr.base() && res != oldval && res != newval) {
++      __ cmpxchg32(addr, oldval, newval, res, false, false, true);
++    } else {
++      __ cmpxchg32(addr, oldval, newval, AT, false, false, true);
++      __ move(res, AT);
++    }
++  %}
++  ins_pipe(long_memory_op);
++%}
++
++//----------Max and Min--------------------------------------------------------
++// Min Instructions
++////
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for min
++//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVlt $op2,$op1\t! min" %}
++//  opcode(0x4C,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++//// Min Register with Register (P6 version)
++//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MinI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_lt(op2,op1,cr);
++//  %}
++//%}
++
++// Min Register with Register (generic version)
++instruct minI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(80);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, src, dst);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Max Register with Register
++//   *** Min and Max using the conditional move are slower than the
++//   *** branch version on a Pentium III.
++// // Conditional move for max
++//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
++//  effect( USE_DEF op2, USE op1, USE cr );
++//  format %{ "CMOVgt $op2,$op1\t! max" %}
++//  opcode(0x4F,0x0F);
++//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
++//  ins_pipe( pipe_cmov_reg );
++//%}
++//
++// // Max Register with Register (P6 version)
++//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
++//  predicate(VM_Version::supports_cmov() );
++//  match(Set op2 (MaxI op1 op2));
++//  ins_cost(200);
++//  expand %{
++//    eFlagsReg cr;
++//    compI_eReg(cr,op1,op2);
++//    cmovI_reg_gt(op2,op1,cr);
++//  %}
++//%}
++
++// Max Register with Register (generic version)
++instruct maxI_Reg_Reg(mRegI dst, mRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ slt(AT, dst, src);
++    __ movn(dst, src, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(mRegI dst, immI_0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ slt(AT, dst, R0);
++    __ movn(dst, R0, AT);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct zerox_long_reg_reg(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL src mask));
++
++  format %{ "movl    $dst, $src\t# zero-extend long @ zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct combine_i2l(mRegL dst, mRegI src1, immL_MaxUI mask, mRegI src2, immI_32 shift32)
++%{
++  match(Set dst (OrL (AndL (ConvI2L src1) mask) (LShiftL (ConvI2L src2) shift32)));
++
++  format %{ "combine_i2l    $dst, $src2(H), $src1(L) @ combine_i2l" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++
++    if (src1 == dst) {
++       __ dinsu(dst, src2, 32, 32);
++    } else if (src2 == dst) {
++       __ dsll32(dst, dst, 0);
++       __ dins(dst, src1, 0, 32);
++    } else {
++       __ dext(dst, src1, 0, 32);
++       __ dinsu(dst, src2, 32, 32);
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(mRegL dst, mRegI src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(mRegL dst, mRegL src, immL_MaxUI mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "movl    $dst, $src\t# i2l zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ dext(dst, src, 0, 32);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Match loading integer and casting it to unsigned int in long register.
++// LoadI + ConvI2L + AndL 0xffffffff.
++instruct loadUI2L_rmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_rmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++instruct loadUI2L_lmask(mRegL dst, memory mem, immL_MaxUI mask) %{
++  match(Set dst (AndL mask (ConvI2L (LoadI mem))));
++
++  format %{ "lwu     $dst, $mem \t// zero-extend to long @ loadUI2L_lmask" %}
++  ins_encode %{
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ loadstore_enc($dst$$Register, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_U_INT);
++  %}
++  ins_pipe(ialu_loadI);
++%}
++
++
++// ============================================================================
++// Safepoint Instruction
++
++instruct safePoint_poll() %{
++  predicate(SafepointMechanism::uses_global_page_poll());
++  match(SafePoint);
++
++  ins_cost(105);
++  format %{ "poll for GC @ safePoint_poll" %}
++
++  ins_encode %{
++    __ block_comment("Safepoint:");
++    __ set64(T9, (long)os::get_polling_page());
++    __ relocate(relocInfo::poll_type);
++    __ lw(AT, T9, 0);
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++instruct safePoint_poll_tls(mRegP poll) %{
++  match(SafePoint poll);
++  predicate(SafepointMechanism::uses_thread_local_poll());
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "lw AT, [$poll]\t"
++            "Safepoint @ [$poll] : poll for GC" %}
++  size(4);
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    address pre_pc = __ pc();
++    __ lw(AT, poll_reg, 0);
++    assert(nativeInstruction_at(pre_pc)->is_safepoint_poll(), "must emit lw AT, [$poll]");
++  %}
++
++  ins_pipe( ialu_storeI );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "clz  $dst, $src\t# count leading zeros (int)" %}
++  ins_encode %{
++    __ clz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountLeadingZerosInstructionMIPS64);
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "dclz  $dst, $src\t# count leading zeros (long)" %}
++  ins_encode %{
++    __ dclz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosI(mRegI dst, mRegI src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "ctz    $dst, $src\t# count trailing zeros (int)" %}
++  ins_encode %{
++    // ctz and dctz is gs instructions.
++    __ ctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(mRegI dst, mRegL src) %{
++  predicate(UseCountTrailingZerosInstructionMIPS64);
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "dcto    $dst, $src\t# count trailing zeros (long)" %}
++  ins_encode %{
++    __ dctz($dst$$Register, $src$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++// ====================VECTOR INSTRUCTIONS=====================================
++
++// Load vectors (8 bytes long)
++instruct loadV8(vecD dst, memory mem) %{
++  predicate(n->as_LoadVector()->memory_size() == 8);
++  match(Set dst (LoadVector mem));
++  ins_cost(125);
++  format %{ "load    $dst, $mem\t! load vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($dst$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::LOAD_DOUBLE);
++  %}
++  ins_pipe( fpu_loadF );
++%}
++
++// Store vectors (8 bytes long)
++instruct storeV8(memory mem, vecD src) %{
++  predicate(n->as_StoreVector()->memory_size() == 8);
++  match(Set mem (StoreVector mem src));
++  ins_cost(145);
++  format %{ "store    $mem, $src\t! store vector (8 bytes)" %}
++  ins_encode %{
++    __ loadstore_enc($src$$FloatRegister, $mem$$base, $mem$$index, $mem$$scale, $mem$$disp, MacroAssembler::STORE_DOUBLE);
++  %}
++  ins_pipe( fpu_storeF );
++%}
++
++instruct Repl8B_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3);
++  match(Set dst (ReplicateB src));
++  ins_cost(100);
++  format %{ "replv_ob    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ replv_ob(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB src));
++  ins_cost(140);
++  format %{ "move       AT,  $src\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate8B" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateB con));
++  ins_cost(110);
++  format %{ "repl_ob    AT, [$con]\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ repl_ob(AT, val);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB con));
++  ins_cost(150);
++  format %{ "move      AT, [$con]\n\t"
++            "dins  AT, AT,  8,  8\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst,0x00\t! replicate8B($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 8, 8);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB zero));
++  ins_cost(90);
++  format %{ "dmtc1    R0, $dst\t! replicate8B zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl8B_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 8);
++  match(Set dst (ReplicateB M1));
++  ins_cost(80);
++  format %{ "dmtc1    -1, $dst\t! replicate8B -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_DSP(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS src));
++  ins_cost(100);
++  format %{ "replv_qh    AT, $src\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ replv_qh(AT, $src$$Register);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS src));
++  ins_cost(120);
++  format %{ "move    AT,     $src  \n\t"
++            "dins    AT, AT, 16, 16\n\t"
++            "dinsu   AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S" %}
++  ins_encode %{
++    __ move(AT, $src$$Register);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm_DSP(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4 && UseLEXT3 && VM_Version::supports_dsp());
++  match(Set dst (ReplicateS con));
++  ins_cost(100);
++  format %{ "repl_qh    AT, [$con]\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    if ( Assembler::is_simm(val, 10)) {
++      //repl_qh supports 10 bits immediate
++      __ repl_qh(AT, val);
++    } else {
++      __ li32(AT, val);
++      __ replv_qh(AT, AT);
++    }
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_imm(vecD dst, immI con) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS con));
++  ins_cost(110);
++  format %{ "move    AT,   [$con]\n\t"
++            "dins  AT, AT, 16, 16\n\t"
++            "dinsu AT, AT, 32, 32\n\t"
++            "dmtc1 AT, $dst\t! replicate4S($con)" %}
++  ins_encode %{
++    __ move(AT, $con$$constant);
++    __ dins(AT, AT, 16, 16);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS zero));
++  format %{ "dmtc1    R0, $dst\t! replicate4S zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++instruct Repl4S_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 4);
++  match(Set dst (ReplicateS M1));
++  format %{ "dmtc1    -1, $dst\t! replicate4S -1" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar to be vector
++instruct Repl2I(vecD dst, mRegI src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI src));
++  format %{ "dins    AT, $src, 0, 32\n\t"
++            "dinsu   AT, $src, 32, 32\n\t"
++            "dmtc1   AT, $dst\t! replicate2I" %}
++  ins_encode %{
++    __ dins(AT, $src$$Register, 0, 32);
++    __ dinsu(AT, $src$$Register, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar immediate to be vector by loading from const table.
++instruct Repl2I_imm(vecD dst, immI con, mA7RegI tmp) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI con));
++  effect(KILL tmp);
++  format %{ "li32    AT, [$con], 32\n\t"
++            "dinsu   AT,         AT\n\t"
++            "dmtc1   AT, $dst\t! replicate2I($con)" %}
++  ins_encode %{
++    int      val = $con$$constant;
++    __ li32(AT, val);
++    __ dinsu(AT, AT, 32, 32);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar zero to be vector
++instruct Repl2I_zero(vecD dst, immI_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI zero));
++  format %{ "dmtc1    R0, $dst\t! replicate2I zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate integer (4 byte) scalar -1 to be vector
++instruct Repl2I_M1(vecD dst, immI_M1 M1) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateI M1));
++  format %{ "dmtc1    -1, $dst\t! replicate2I -1, use AT" %}
++  ins_encode %{
++    __ nor(AT, R0, R0);
++    __ dmtc1(AT, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++// Replicate float (4 byte) scalar to be vector
++instruct Repl2F(vecD dst, regF src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF src));
++  format %{ "cvt.ps  $dst, $src, $src\t! replicate2F" %}
++  ins_encode %{
++    __ cvt_ps_s($dst$$FloatRegister, $src$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Replicate float (4 byte) scalar zero to be vector
++instruct Repl2F_zero(vecD dst, immF_0 zero) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (ReplicateF zero));
++  format %{ "dmtc1   R0, $dst\t! replicate2F zero" %}
++  ins_encode %{
++    __ dmtc1(R0, $dst$$FloatRegister);
++  %}
++  ins_pipe( pipe_mtc1 );
++%}
++
++
++// ====================VECTOR ARITHMETIC=======================================
++
++// --------------------------------- ADD --------------------------------------
++
++// Floats vector add
++// kernel does not have emulation of PS instructions yet, so PS instructions is disabled.
++instruct vadd2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF dst src));
++  format %{ "add.ps   $dst,$src\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct vadd2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (AddVF src1 src2));
++  format %{ "add.ps   $dst,$src1,$src2\t! add packed2F" %}
++  ins_encode %{
++    __ add_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- SUB --------------------------------------
++
++// Floats vector sub
++instruct vsub2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (SubVF dst src));
++  format %{ "sub.ps   $dst,$src\t! sub packed2F" %}
++  ins_encode %{
++    __ sub_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- MUL --------------------------------------
++
++// Floats vector mul
++instruct vmul2F(vecD dst, vecD src) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF dst src));
++  format %{ "mul.ps   $dst, $src\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $dst$$FloatRegister, $src$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct vmul2F3(vecD dst, vecD src1, vecD src2) %{
++  predicate(n->as_Vector()->length() == 2);
++  match(Set dst (MulVF src1 src2));
++  format %{ "mul.ps   $dst, $src1, $src2\t! mul packed2F" %}
++  ins_encode %{
++    __ mul_ps($dst$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// --------------------------------- DIV --------------------------------------
++// MIPS do not have div.ps
++
++// --------------------------------- MADD --------------------------------------
++// Floats vector madd
++//instruct vmadd2F(vecD dst, vecD src1, vecD src2, vecD src3) %{
++//  predicate(n->as_Vector()->length() == 2);
++//  match(Set dst (AddVF (MulVF src1 src2) src3));
++//  ins_cost(50);
++//  format %{ "madd.ps   $dst, $src3, $src1, $src2\t! madd packed2F" %}
++//  ins_encode %{
++//    __ madd_ps($dst$$FloatRegister, $src3$$FloatRegister, $src1$$FloatRegister, $src2$$FloatRegister);
++//  %}
++//  ins_pipe( fpu_regF_regF );
++//%}
++
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceeding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == EAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(eRegI dst, eRegI src) %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_eReg(eRegI dst, immI_1 src, eFlagsReg cr) %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_eReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole %{
++//   peepmatch ( incI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( decI_eReg movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addI_eReg_imm movI );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++// peephole %{
++//   peepmatch ( addP_eReg_imm movP );
++//   peepconstraint ( 0.dst == 1.dst );
++//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, eRegI src) %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(eRegI dst, memory mem) %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++//peephole %{
++//  peepmatch ( loadI storeI );
++//  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
++//  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
++//%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++
+diff --git a/src/hotspot/cpu/mips/nativeInst_mips.cpp b/src/hotspot/cpu/mips/nativeInst_mips.cpp
+new file mode 100644
+index 0000000000..514298bd8b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/nativeInst_mips.cpp
+@@ -0,0 +1,1820 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "compiler/disassembler.hpp"
++#include "code/compiledIC.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/ostream.hpp"
++
++#include <sys/mman.h>
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++void NativeInstruction::wrote(int offset) {
++  ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long*)addr = i;
++  ICache::invalidate_range(addr, 8);
++}
++
++static int illegal_instruction_bits = 0;
++
++int NativeInstruction::illegal_instruction() {
++  if (illegal_instruction_bits == 0) {
++    ResourceMark rm;
++    char buf[40];
++    CodeBuffer cbuf((address)&buf[0], 20);
++    MacroAssembler* a = new MacroAssembler(&cbuf);
++    address ia = a->pc();
++    a->brk(11);
++    int bits = *(int*)ia;
++    illegal_instruction_bits = bits;
++  }
++  return illegal_instruction_bits;
++}
++
++bool NativeInstruction::is_int_branch() {
++  switch(Assembler::opcode(insn_word())) {
++    case Assembler::beq_op:
++    case Assembler::beql_op:
++    case Assembler::bgtz_op:
++    case Assembler::bgtzl_op:
++    case Assembler::blez_op:
++    case Assembler::blezl_op:
++    case Assembler::bne_op:
++    case Assembler::bnel_op:
++      return true;
++    case Assembler::regimm_op:
++      switch(Assembler::rt(insn_word())) {
++        case Assembler::bgez_op:
++        case Assembler::bgezal_op:
++        case Assembler::bgezall_op:
++        case Assembler::bgezl_op:
++        case Assembler::bltz_op:
++        case Assembler::bltzal_op:
++        case Assembler::bltzall_op:
++        case Assembler::bltzl_op:
++          return true;
++      }
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_float_branch() {
++  if (!is_op(Assembler::cop1_op) ||
++      !is_rs((Register)Assembler::bc1f_op)) return false;
++
++  switch(Assembler::rt(insn_word())) {
++    case Assembler::bcf_op:
++    case Assembler::bcfl_op:
++    case Assembler::bct_op:
++    case Assembler::bctl_op:
++      return true;
++  }
++
++  return false;
++}
++
++
++void NativeCall::verify() {
++  // make sure code pattern is actually a call instruction
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(20))->is_nop() ) {
++      return;
++  }
++
++  // jal targe
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)  &&
++  nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++  is_op(int_at(4), Assembler::ori_op) &&
++  is_special_op(int_at(8), Assembler::dsll_op) &&
++  is_op(int_at(12), Assembler::ori_op) &&
++  is_special_op(int_at(16), Assembler::dsll_op) &&
++  is_op(int_at(20), Assembler::ori_op) &&
++        is_special_op(int_at(24), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    is_special_op(int_at(8), Assembler::dsll_op) &&
++    is_op  (int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    is_op  (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++    is_special_op(int_at(4), Assembler::dsll_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  // FIXME: why add jr_op here?
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    is_op  (int_at(4), Assembler::ori_op) &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++    nativeInstruction_at(addr_at(4))->is_nop() &&
++    nativeInstruction_at(addr_at(8))->is_nop() &&
++    nativeInstruction_at(addr_at(12))->is_nop() &&
++          is_special_op(int_at(16), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          is_special_op(int_at(8), Assembler::jalr_op) ) {
++      return;
++  }
++
++  if (nativeInstruction_at(addr_at(0))->is_trampoline_call())
++    return;
++
++  fatal("not a call");
++}
++
++address NativeCall::target_addr_for_insn() const {
++  // jal target
++  // nop
++  if ( is_op(int_at(0), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(4))->is_nop()) {
++      int instr_index = int_at(0) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++  nativeInstruction_at(addr_at(4))->is_nop()   &&
++  nativeInstruction_at(addr_at(8))->is_nop()   &&
++  nativeInstruction_at(addr_at(12))->is_nop()  &&
++  is_op(int_at(16), Assembler::jal_op)         &&
++  nativeInstruction_at(addr_at(20))->is_nop()) {
++      int instr_index = int_at(16) & 0x3ffffff;
++      intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++      intptr_t target = target_high | (instr_index << 2);
++      return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                               (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ld_op) ) {
++
++      address dest = (address)Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0);
++      return dest + Assembler::simm16((intptr_t)int_at(12) & 0xffff);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++      return (address)Assembler::merge( (intptr_t)(0),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop  <-- optional
++  //nop  <-- optional
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() ) {
++
++      int sign = int_at(0) & 0x8000;
++      if (sign == 0) {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)0,
++                                  (intptr_t)0);
++      } else {
++         return (address)Assembler::merge( (intptr_t)0,
++                                  (intptr_t)(int_at(0) & 0xffff),
++                                  (intptr_t)(0xffff),
++                                  (intptr_t)(0xffff));
++      }
++  }
++
++  tty->print_cr("not a call: addr = " INTPTR_FORMAT , p2i(addr_at(0)));
++  tty->print_cr("======= Start decoding at addr = " INTPTR_FORMAT " =======", p2i(addr_at(0)));
++  Disassembler::decode(addr_at(0) - 2 * 4, addr_at(0) + 8 * 4, tty);
++  tty->print_cr("======= End of decoding =======");
++  fatal("not a call");
++  return NULL; // unreachable
++}
++
++// Extract call destination from a NativeCall. The call might use a trampoline stub.
++address NativeCall::destination() const {
++  address addr = (address)this;
++  address destination = target_addr_for_insn();
++  // Do we use a trampoline stub for this call?
++  // Trampoline stubs are located behind the main code.
++  if (destination > addr) {
++    // Filter out recursive method invocation (call to verified/unverified entry point).
++    CodeBlob* cb = CodeCache::find_blob_unsafe(addr);   // Else we get assertion if nmethod is zombie.
++    assert(cb && cb->is_nmethod(), "sanity");
++    nmethod *nm = (nmethod *)cb;
++    NativeInstruction* ni = nativeInstruction_at(addr);
++    if (nm->stub_contains(destination) && ni->is_trampoline_call()) {
++      // Yes we do, so get the destination from the trampoline stub.
++      const address trampoline_stub_addr = destination;
++      destination = nativeCallTrampolineStub_at(trampoline_stub_addr)->destination();
++    }
++  }
++  return destination;
++}
++
++// Similar to replace_mt_safe, but just changes the destination. The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++//
++// Add parameter assert_lock to switch off assertion
++// during code generation, where no patching lock is needed.
++void NativeCall::set_destination_mt_safe(address dest, bool assert_lock) {
++  assert(!assert_lock ||
++         (Patching_lock->is_locked() || SafepointSynchronize::is_at_safepoint()),
++         "concurrent code patching");
++
++  ResourceMark rm;
++  address addr_call = addr_at(0);
++  assert(NativeCall::is_call_at(addr_call), "unexpected code at call site");
++  // Patch the constant in the call's trampoline stub.
++  if (MacroAssembler::reachable_from_cache()) {
++    set_destination(dest);
++  } else {
++    address trampoline_stub_addr = nativeCall_at(addr_call)->target_addr_for_insn();
++    assert (get_trampoline() != NULL && trampoline_stub_addr == get_trampoline(), "we need a trampoline");
++    nativeCallTrampolineStub_at(trampoline_stub_addr)->set_destination(dest);
++  }
++}
++
++address NativeCall::get_trampoline() {
++  address call_addr = addr_at(0);
++
++  CodeBlob *code = CodeCache::find_blob(call_addr);
++  assert(code != NULL, "Could not find the containing code blob");
++
++  if (code->is_nmethod()) {
++    return trampoline_stub_Relocation::get_trampoline_for(call_addr, (nmethod*)code);
++  }
++  return NULL;
++}
++
++// manual implementation of GSSQ
++//
++//  00000001200009c0 <atomic_store128>:
++//     1200009c0:   0085202d        daddu   a0, a0, a1
++//     1200009c4:   e8860027        gssq    a2, a3, 0(a0)
++//     1200009c8:   03e00008        jr      ra
++//     1200009cc:   00000000        nop
++//
++typedef void (* atomic_store128_ptr)(long *addr, int offset, long low64, long hi64);
++
++static int *buf;
++
++static atomic_store128_ptr get_atomic_store128_func() {
++  assert(UseLEXT1, "UseLEXT1 must be true");
++  static atomic_store128_ptr p = NULL;
++  if (p != NULL)
++    return p;
++
++  buf = (int *)mmap(NULL, 1024, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS,
++                       -1, 0);
++  buf[0] = 0x0085202d;
++  buf[1] = (0x3a << 26) | (4 << 21) | (6 << 16) | 0x27;   /* gssq $a2, $a3, 0($a0) */
++  buf[2] = 0x03e00008;
++  buf[3] = 0;
++
++  asm("sync");
++  p = (atomic_store128_ptr)buf;
++  return p;
++}
++
++void  NativeCall::patch_on_jal_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(0, jal_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint jal_inst = (Assembler::jal_op << 26) | dest;
++    set_int_at(16, jal_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jal(address dst) {
++  patch_on_jal_gs(dst);
++}
++
++void  NativeCall::patch_on_trampoline(address dest) {
++  assert(nativeInstruction_at(addr_at(0))->is_trampoline_call(), "unexpected code at call site");
++  jlong dst = (jlong) dest;
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ld dst, dst, imm16
++  if ((dst> 0) && Assembler::is_simm16(dst >> 32)) {
++    dst += (dst & 0x8000) << 1;
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low(dst >> 32) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(dst >> 16) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low(dst) & 0xffff));
++
++    ICache::invalidate_range(addr_at(0), 24);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeCall::patch_on_jalr_gs(address dst) {
++  patch_set48_gs(dst);
++}
++
++void  NativeCall::patch_on_jalr(address dst) {
++  patch_set48(dst);
++}
++
++void  NativeCall::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++  int count = 0;
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    if (Assembler::split_low(value)) {
++      insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void NativeCall::patch_set32_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[2] = {0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      //set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      //set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        //set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 2) {
++    //nop();
++    //set_int_at(count << 2, 0);
++    insts[count] = 0;
++    count++;
++  }
++
++  long inst = insts[1];
++  inst = inst << 32;
++  inst = inst + insts[0];
++
++  set_long_at(0, inst);
++}
++
++void NativeCall::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      //ori(d, R0, julong(value) >> 16);
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      //dsll(d, d, 16);
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    //lui(d, value >> 32);
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    //ori(d, d, split_low(value >> 16));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    //dsll(d, d, 16);
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    //ori(d, d, split_low(value));
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    //nop();
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeCall::patch_set32(address dest) {
++  patch_set32_gs(dest);
++}
++
++void  NativeCall::set_destination(address dest) {
++  OrderAccess::fence();
++
++  // li64
++  if (is_special_op(int_at(16), Assembler::dsll_op)) {
++    int first_word = int_at(0);
++    set_int_at(0, 0x1000ffff); /* .1: b .1 */
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)dest) & 0xffff));
++    set_int_at(0, (first_word & 0xffff0000) | (Assembler::split_low((intptr_t)dest >> 48) & 0xffff));
++    ICache::invalidate_range(addr_at(0), 24);
++  } else if (is_op(int_at(16), Assembler::jal_op)) {
++    if (UseLEXT1) {
++      patch_on_jal_gs(dest);
++    } else {
++      patch_on_jal(dest);
++    }
++  } else if (is_op(int_at(0), Assembler::jal_op)) {
++    patch_on_jal_only(dest);
++  } else if (is_special_op(int_at(16), Assembler::jalr_op)) {
++    if (UseLEXT1) {
++      patch_on_jalr_gs(dest);
++    } else {
++      patch_on_jalr(dest);
++    }
++  } else if (is_special_op(int_at(8), Assembler::jalr_op)) {
++    guarantee(!os::is_MP() || (((long)addr_at(0) % 8) == 0), "destination must be aligned by 8");
++    if (UseLEXT1) {
++      patch_set32_gs(dest);
++    } else {
++      patch_set32(dest);
++    }
++    ICache::invalidate_range(addr_at(0), 8);
++  } else {
++      fatal("not a call");
++  }
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  NativeCall *call = nativeCall_at(code_pos);
++  CodeBuffer cb(call->addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  __ li48(T9, (long)entry);
++  __ jalr ();
++  __ delayed()->nop();
++#undef __
++
++  ICache::invalidate_range(call->addr_at(0), instruction_size);
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++//-------------------------------------------------------------------
++
++void NativeMovConstReg::verify() {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) ) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()) {
++    return;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++    return;
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++}
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++intptr_t NativeMovConstReg::data() const {
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)0,
++                              (intptr_t)0,
++                              (intptr_t)0);
++    } else {
++     return Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff),
++                              (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)0,
++                               (intptr_t)0);
++    } else {
++      return Assembler::merge( (intptr_t)0,
++                               (intptr_t)(int_at(0) & 0xffff),
++                               (intptr_t)(0xffff),
++                               (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a mov reg, imm64/imm48");
++  return 0; // unreachable
++}
++
++void NativeMovConstReg::patch_set48(intptr_t x) {
++  jlong value = (jlong) x;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      //daddiu(d, R0, value);
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      //lui(d, split_low(value >> 16));
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        //ori(d, d, split_low(value));
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    if (Assembler::split_low(value)) {
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++      count += 1;
++    }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++}
++
++void NativeMovConstReg::set_data(intptr_t x, intptr_t o) {
++  // li64 or li48
++  if ((!nativeInstruction_at(addr_at(12))->is_nop()) && is_special_op(int_at(16), Assembler::dsll_op) && is_op(long_at(20), Assembler::ori_op)) {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 48) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 32) & 0xffff));
++    set_int_at(12, (int_at(12) & 0xffff0000) | (Assembler::split_low((intptr_t)x >> 16) & 0xffff));
++    set_int_at(20, (int_at(20) & 0xffff0000) | (Assembler::split_low((intptr_t)x) & 0xffff));
++  } else {
++    patch_set48(x);
++  }
++
++  ICache::invalidate_range(addr_at(0), 24);
++
++  // Find and replace the oop/metadata corresponding to this
++  // instruction in oops section.
++  CodeBlob* blob = CodeCache::find_blob_unsafe(instruction_address());
++  nmethod* nm = blob->as_nmethod_or_null();
++  if (nm != NULL) {
++    o = o ? o : x;
++    RelocIterator iter(nm, instruction_address(), next_instruction_address());
++    while (iter.next()) {
++      if (iter.type() == relocInfo::oop_type) {
++        oop* oop_addr = iter.oop_reloc()->oop_addr();
++        *oop_addr = cast_to_oop(o);
++        break;
++      } else if (iter.type() == relocInfo::metadata_type) {
++        Metadata** metadata_addr = iter.metadata_reloc()->metadata_addr();
++        *metadata_addr = (Metadata*)o;
++        break;
++      }
++    }
++  }
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::offset() const{
++  if (is_immediate())
++    return (short)(int_at(instruction_offset)&0xffff);
++  else
++    return Assembler::merge(int_at(hiword_offset)&0xffff, long_at(instruction_offset)&0xffff);
++}
++
++void NativeMovRegMem::set_offset(int x) {
++  if (is_immediate()) {
++    assert(Assembler::is_simm16(x), "just check");
++    set_int_at(0, (int_at(0)&0xffff0000) | (x&0xffff) );
++    if (is_64ldst()) {
++      assert(Assembler::is_simm16(x+4), "just check");
++      set_int_at(4, (int_at(4)&0xffff0000) | ((x+4)&0xffff) );
++    }
++  } else {
++    set_int_at(0, (int_at(0) & 0xffff0000) | (Assembler::split_high(x) & 0xffff));
++    set_int_at(4, (int_at(4) & 0xffff0000) | (Assembler::split_low(x) & 0xffff));
++  }
++  ICache::invalidate_range(addr_at(0), 8);
++}
++
++void NativeMovRegMem::verify() {
++  int offset = 0;
++
++  if ( Assembler::opcode(int_at(0)) == Assembler::lui_op ) {
++
++    if ( Assembler::opcode(int_at(4)) != Assembler::ori_op ) {
++      fatal ("not a mov [reg+offs], reg instruction");
++    }
++
++    offset += 12;
++  }
++
++  switch(Assembler::opcode(int_at(offset))) {
++    case Assembler::lb_op:
++    case Assembler::lbu_op:
++    case Assembler::lh_op:
++    case Assembler::lhu_op:
++    case Assembler::lw_op:
++    case Assembler::lwu_op:
++    case Assembler::ld_op:
++    case Assembler::lwc1_op:
++    case Assembler::ldc1_op:
++    case Assembler::sb_op:
++    case Assembler::sh_op:
++    case Assembler::sw_op:
++    case Assembler::sd_op:
++    case Assembler::swc1_op:
++    case Assembler::sdc1_op:
++      break;
++    default:
++      fatal ("not a mov [reg+offs], reg instruction");
++  }
++}
++
++
++void NativeMovRegMem::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset());
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return uint_at(0) == NativeIllegalInstruction::instruction_code;
++}
++
++void NativeIllegalInstruction::insert(address code_pos) {
++  *(juint*)code_pos = instruction_code;
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::verify() {
++  assert(((NativeInstruction *)this)->is_jump() ||
++         ((NativeInstruction *)this)->is_cond_jump(), "not a general jump instruction");
++}
++
++void  NativeJump::patch_set48_gs(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++
++  if (rt_reg == 0) rt_reg = 25 << 16; // r25 is T9
++
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  int insts[4] = {0, 0, 0, 0};
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      insts[count] = (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value);
++      count += 1;
++    } else {
++      insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      insts[count] = (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16);
++      count += 1;
++      insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++      count += 1;
++      if (Assembler::split_low(value)) {
++        insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    insts[count] = (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16);
++    count += 1;
++    insts[count] = (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6);
++    count += 1;
++    insts[count] = (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value);
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    insts[count] = 0;
++    count++;
++  }
++
++  guarantee(((long)addr_at(0) % (BytesPerWord * 2)) == 0, "must be aligned");
++  atomic_store128_ptr func = get_atomic_store128_func();
++  (*func)((long *)addr_at(0), 0, *(long *)&insts[0], *(long *)&insts[2]);
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_set48(address dest) {
++  jlong value = (jlong) dest;
++  int  rt_reg = (int_at(0) & (0x1f << 16));
++  int  rs_reg = rt_reg << 5;
++  int  rd_reg = rt_reg >> 5;
++
++  int hi = (int)(value >> 32);
++  int lo = (int)(value & ~0);
++
++  int count = 0;
++
++  if (value == lo) {  // 32-bit integer
++    if (Assembler::is_simm16(value)) {
++      set_int_at(count << 2, (Assembler::daddiu_op << 26) | rt_reg | Assembler::split_low(value));
++      count += 1;
++    } else {
++      set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 16));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++    }
++  } else if (hi == 0) {  // hardware zero-extends to upper 32
++      set_int_at(count << 2, (Assembler::ori_op << 26) | rt_reg | Assembler::split_low(julong(value) >> 16));
++      count += 1;
++      set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++      count += 1;
++      if (Assembler::split_low(value)) {
++        set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++        count += 1;
++      }
++  } else if ((value> 0) && Assembler::is_simm16(value >> 32)) {
++    set_int_at(count << 2, (Assembler::lui_op << 26) | rt_reg | Assembler::split_low(value >> 32));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value >> 16));
++    count += 1;
++    set_int_at(count << 2, (Assembler::dsll_op) | rt_reg | rd_reg | (16 << 6));
++    count += 1;
++    set_int_at(count << 2, (Assembler::ori_op << 26) | rs_reg | rt_reg | Assembler::split_low(value));
++    count += 1;
++  } else {
++    tty->print_cr("dest = 0x%lx", value);
++    guarantee(false, "Not supported yet !");
++  }
++
++  while (count < 4) {
++    set_int_at(count << 2, 0);
++    count++;
++  }
++
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_j_only(address dst) {
++  long dest = ((long)dst - (((long)addr_at(4)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(0, j_inst);
++    ICache::invalidate_range(addr_at(0), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void  NativeJump::patch_on_j_gs(address dst) {
++  long dest = ((long)dst - (((long)addr_at(20)) & 0xfffffffff0000000))>>2;
++  if ((dest >= 0) && (dest < (1<<26))) {
++    jint j_inst = (Assembler::j_op << 26) | dest;
++    set_int_at(16, j_inst);
++    ICache::invalidate_range(addr_at(16), 4);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void  NativeJump::patch_on_j(address dst) {
++  patch_on_j_gs(dst);
++}
++
++void  NativeJump::patch_on_jr_gs(address dst) {
++  patch_set48_gs(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++void  NativeJump::patch_on_jr(address dst) {
++  patch_set48(dst);
++  ICache::invalidate_range(addr_at(0), 16);
++}
++
++
++void  NativeJump::set_jump_destination(address dest) {
++  OrderAccess::fence();
++
++  if (is_short()) {
++    assert(Assembler::is_simm16(dest-addr_at(4)), "change this code");
++    set_int_at(0, (int_at(0) & 0xffff0000) | (dest - addr_at(4)) & 0xffff );
++    ICache::invalidate_range(addr_at(0), 4);
++  } else if (is_b_far()) {
++    int offset = dest - addr_at(12);
++    set_int_at(12, (int_at(12) & 0xffff0000) | (offset >> 16));
++    set_int_at(16, (int_at(16) & 0xffff0000) | (offset & 0xffff));
++  } else {
++    if (is_op(int_at(16), Assembler::j_op)) {
++      if (UseLEXT1) {
++        patch_on_j_gs(dest);
++      } else {
++        patch_on_j(dest);
++      }
++    } else if (is_op(int_at(0), Assembler::j_op)) {
++      patch_on_j_only(dest);
++    } else if (is_special_op(int_at(16), Assembler::jr_op)) {
++      if (UseLEXT1) {
++        //guarantee(!os::is_MP() || (((long)addr_at(0) % 16) == 0), "destination must be aligned for GSSD");
++        //patch_on_jr_gs(dest);
++        patch_on_jr(dest);
++      } else {
++        patch_on_jr(dest);
++      }
++    } else {
++      fatal("not a jump");
++    }
++  }
++}
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  CodeBuffer cb(code_pos, instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  if (Assembler::is_simm16((entry - code_pos - 4) / 4)) {
++    __ b(entry);
++    __ delayed()->nop();
++  } else {
++    // Attention: We have to use a relative jump here since PC reloc-operation isn't allowed here.
++    int offset = entry - code_pos;
++
++    Label L;
++    __ bgezal(R0, L);
++    __ delayed()->lui(T9, (offset - 8) >> 16);
++    __ bind(L);
++    __ ori(T9, T9, (offset - 8) & 0xffff);
++    __ daddu(T9, T9, RA);
++    __ jr(T9);
++    __ delayed()->nop();
++  }
++
++#undef __
++
++  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++bool NativeJump::is_b_far() {
++//
++//   0x000000556809f198: daddu at, ra, zero
++//   0x000000556809f19c: [4110001]bgezal zero, 0x000000556809f1a4
++//
++//   0x000000556809f1a0: nop
++//   0x000000556809f1a4: lui t9, 0xfffffffd
++//   0x000000556809f1a8: ori t9, t9, 0x14dc
++//   0x000000556809f1ac: daddu t9, t9, ra
++//   0x000000556809f1b0: daddu ra, at, zero
++//   0x000000556809f1b4: jr t9
++//   0x000000556809f1b8: nop
++//  ;; ImplicitNullCheckStub slow case
++//   0x000000556809f1bc: lui t9, 0x55
++//
++  return is_op(int_at(12), Assembler::lui_op);
++}
++
++address NativeJump::jump_destination() {
++  if ( is_short() ) {
++    return addr_at(4) + Assembler::imm_off(int_at(instruction_offset)) * 4;
++  }
++  // Assembler::merge() is not correct in MIPS_64!
++  //
++  //   Example:
++  //     hi16 = 0xfffd,
++  //     lo16 = f7a4,
++  //
++  //     offset=0xfffdf7a4 (Right)
++  //     Assembler::merge = 0xfffcf7a4 (Wrong)
++  //
++  if ( is_b_far() ) {
++    int hi16 = int_at(12)&0xffff;
++    int low16 = int_at(16)&0xffff;
++    address target = addr_at(12) + (hi16 << 16) + low16;
++    return target;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_nop() &&
++        nativeInstruction_at(addr_at(4))->is_nop()   &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop()  &&
++        is_op(int_at(16), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(20))->is_nop()) {
++    int instr_index = int_at(16) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(20)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // j target
++  // nop
++  if ( is_op(int_at(0), Assembler::j_op)         &&
++        nativeInstruction_at(addr_at(4))->is_nop()) {
++    int instr_index = int_at(0) & 0x3ffffff;
++    intptr_t target_high = ((intptr_t)addr_at(4)) & 0xfffffffff0000000;
++    intptr_t target = target_high | (instr_index << 2);
++    return (address)target;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::dsll_op) &&
++        is_op(int_at(20), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(20) & 0xffff),
++                             (intptr_t)(int_at(12) & 0xffff),
++                             (intptr_t)(int_at(4) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff));
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op (int_at(12), Assembler::ori_op) ) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(12) & 0xffff),
++                 (intptr_t)(int_at(4) & 0xffff),
++           (intptr_t)(int_at(0) & 0xffff),
++           (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          is_op (int_at(8), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(int_at(8) & 0xffff),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++          is_special_op(int_at(4), Assembler::dsll_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop()   &&
++          nativeInstruction_at(addr_at(12))->is_nop()) {
++
++    return (address)Assembler::merge( (intptr_t)(0),
++                             (intptr_t)(int_at(0) & 0xffff),
++                             (intptr_t)0,
++                             (intptr_t)0);
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          is_op (int_at(4), Assembler::ori_op) &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)(int_at(4) & 0xffff),
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++          nativeInstruction_at(addr_at(4))->is_nop() &&
++          nativeInstruction_at(addr_at(8))->is_nop() &&
++          nativeInstruction_at(addr_at(12))->is_nop() ) {
++
++    int sign = int_at(0) & 0x8000;
++    if (sign == 0) {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)0,
++                                        (intptr_t)0);
++    } else {
++      return (address)Assembler::merge( (intptr_t)0,
++                                        (intptr_t)(int_at(0) & 0xffff),
++                                        (intptr_t)(0xffff),
++                                        (intptr_t)(0xffff));
++    }
++  }
++
++  fatal("not a jump");
++  return NULL; // unreachable
++}
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  NativeGeneralJump* h_jump =  nativeGeneralJump_at (instr_addr);
++  assert((int)instruction_size == (int)NativeCall::instruction_size,
++          "note::Runtime1::patch_code uses NativeCall::instruction_size");
++
++  // ensure 100% atomicity
++  guarantee(!os::is_MP() || (((long)instr_addr % BytesPerWord) == 0), "destination must be aligned for SD");
++
++  int *p = (int *)instr_addr;
++  int jr_word = p[4];
++
++  p[4] = 0x1000fffb;   /* .1: --; --; --; --; b .1; nop */
++  memcpy(instr_addr, code_buffer, NativeCall::instruction_size - 8);
++  *(long *)(instr_addr + 16) = *(long *)(code_buffer + 16);
++}
++
++// Must ensure atomicity
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++  assert(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  assert(nativeInstruction_at(verified_entry + BytesPerInstWord)->is_nop(), "mips64 cannot replace non-nop with jump");
++
++  if (MacroAssembler::reachable_from_cache(dest)) {
++    CodeBuffer cb(verified_entry, 1 * BytesPerInstWord);
++    MacroAssembler masm(&cb);
++    masm.j(dest);
++  } else {
++    // We use an illegal instruction for marking a method as
++    // not_entrant or zombie
++    NativeIllegalInstruction::insert(verified_entry);
++  }
++
++  ICache::invalidate_range(verified_entry, 1 * BytesPerInstWord);
++}
++
++bool NativeInstruction::is_jump()
++{
++  if ((int_at(0) & NativeGeneralJump::b_mask) == NativeGeneralJump::beq_opcode)
++    return true;
++  if (is_op(int_at(4), Assembler::lui_op)) // simplified b_far
++    return true;
++  if (is_op(int_at(12), Assembler::lui_op)) // original b_far
++    return true;
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // j target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::j_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++    return true;
++  }
++
++  // lui   rd, imm(63...48);
++  // ori   rd, rd, imm(47...32);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(31...16);
++  // dsll  rd, rd, 16;
++  // ori   rd, rd, imm(15...0);
++  // jr    rd
++  // nop
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::dsll_op) &&
++          is_op(int_at(20), Assembler::ori_op) &&
++          is_special_op(int_at(24), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (is_op(int_at(0), Assembler::lui_op) &&
++          is_op(int_at(4), Assembler::ori_op) &&
++          is_special_op(int_at(8), Assembler::dsll_op) &&
++          is_op(int_at(12), Assembler::ori_op) &&
++          is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++      return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jr_op)) {
++    return true;
++  }
++
++  return false;
++}
++
++bool NativeInstruction::is_dtrace_trap() {
++  //return (*(int32_t*)this & 0xff) == 0xcc;
++  Unimplemented();
++  return false;
++}
++
++bool NativeInstruction::is_safepoint_poll() {
++  //
++  // 390     li   T2, 0x0000000000400000 #@loadConP
++  // 394     sw    [SP + #12], V1    # spill 9
++  // 398     Safepoint @ [T2] : poll for GC @ safePoint_poll        # spec.benchmarks.compress.Decompressor::decompress @ bci:224  L[0]=A6 L[1]=_ L[2]=sp + #28 L[3]=_ L[4]=V1
++  //
++  //  0x000000ffe5815130: lui t2, 0x40
++  //  0x000000ffe5815134: sw v1, 0xc(sp)    ; OopMap{a6=Oop off=920}
++  //                                        ;*goto
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++  //  0x000000ffe5815138: lw at, 0x0(t2)    ;*goto       <---  PC
++  //                                        ; - spec.benchmarks.compress.Decompressor::decompress@224 (line 584)
++  //
++
++  // Since there may be some spill instructions between the safePoint_poll and loadConP,
++  // we check the safepoint instruction like the this.
++  return is_op(Assembler::lw_op) && is_rt(AT);
++}
+diff --git a/src/hotspot/cpu/mips/nativeInst_mips.hpp b/src/hotspot/cpu/mips/nativeInst_mips.hpp
+new file mode 100644
+index 0000000000..fb4f99c9c6
+--- /dev/null
++++ b/src/hotspot/cpu/mips/nativeInst_mips.hpp
+@@ -0,0 +1,734 @@
++/*
++ * Copyright (c) 1997, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++#define CPU_MIPS_VM_NATIVEINST_MIPS_HPP
++
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeJump
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativeReturn
++// - - NativeReturnX (return with argument)
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++
++ public:
++  enum mips_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    4,
++    sync_instruction_code       =    0xf
++  };
++
++  bool is_nop()                        { return long_at(0) == nop_instruction_code; }
++  bool is_sync()                       { return long_at(0) == sync_instruction_code; }
++  bool is_dtrace_trap();
++  inline bool is_call();
++  inline bool is_illegal();
++  inline bool is_return();
++  bool is_jump();
++  inline bool is_cond_jump();
++  bool is_safepoint_poll();
++
++  //mips has no instruction to generate a illegal instrucion exception
++  //we define ours: break 11
++  static int illegal_instruction();
++
++  bool is_int_branch();
++  bool is_float_branch();
++
++  inline bool is_trampoline_call();
++
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++ protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(BytesPerInstWord); }
++  address prev_instruction_address() const  { return addr_at(-BytesPerInstWord); }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++  juint uint_at(int offset) const       { return *(juint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++  int  long_at(int offset) const       { return *(jint*)addr_at(offset); }
++
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++  void set_long_at(int offset, long  i);
++
++  int  insn_word() const { return long_at(0); }
++  static bool is_op (int insn, Assembler::ops op) { return Assembler::opcode(insn) == (int)op; }
++  bool is_op (Assembler::ops op)     const { return is_op(insn_word(), op); }
++  bool is_rs (int insn, Register rs) const { return Assembler::rs(insn) == (int)rs->encoding(); }
++  bool is_rs (Register rs)           const { return is_rs(insn_word(), rs); }
++  bool is_rt (int insn, Register rt) const { return Assembler::rt(insn) == (int)rt->encoding(); }
++  bool is_rt (Register rt)        const { return is_rt(insn_word(), rt); }
++
++  static bool is_special_op (int insn, Assembler::special_ops op) {
++    return is_op(insn, Assembler::special_op) && Assembler::special(insn)==(int)op;
++  }
++  bool is_special_op (Assembler::special_ops op) const { return is_special_op(insn_word(), op); }
++
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++inline NativeCall* nativeCall_at(address address);
++// The NativeCall is an abstraction for accessing/manipulating native call imm32/imm64
++// instructions (used to manipulate inline caches, primitive & dll calls, etc.).
++// MIPS has no call instruction with imm32/imm64. Usually, a call was done like this:
++// 32 bits:
++//       lui     rt, imm16
++//       addiu    rt, rt, imm16
++//       jalr     rt
++//       nop
++//
++// 64 bits:
++//       lui   rd, imm(63...48);
++//       ori   rd, rd, imm(47...32);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(31...16);
++//       dsll  rd, rd, 16;
++//       ori   rd, rd, imm(15...0);
++//       jalr  rd
++//       nop
++//
++
++// we just consider the above for instruction as one call instruction
++class NativeCall: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset          =    0,
++    instruction_size            =   6 * BytesPerInstWord,
++    return_address_offset_short =   4 * BytesPerInstWord,
++    return_address_offset_long  =   6 * BytesPerInstWord,
++    displacement_offset         =   0
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++
++  address next_instruction_address() const  {
++    if (is_special_op(int_at(8), Assembler::jalr_op)) {
++      return addr_at(return_address_offset_short);
++    } else {
++      return addr_at(return_address_offset_long);
++    }
++  }
++
++  address return_address() const            {
++    return next_instruction_address();
++  }
++
++  address target_addr_for_insn() const;
++  address destination() const;
++  void  set_destination(address dest);
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jalr_gs(address dest);
++  void  patch_on_jalr(address dest);
++
++  void  patch_on_jal_gs(address dest);
++  void  patch_on_jal(address dest);
++
++  void  patch_on_trampoline(address dest);
++
++  void  patch_on_jal_only(address dest);
++
++  void  patch_set32_gs(address dest);
++  void  patch_set32(address dest);
++
++  void  verify_alignment() {  }
++  void  verify();
++  void  print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - return_address_offset_short) | is_call_at(return_address - return_address_offset_long);
++  }
++
++  static bool is_call_to(address instr, address target) {
++    return nativeInstruction_at(instr)->is_call() &&
++nativeCall_at(instr)->destination() == target;
++  }
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  // Similar to replace_mt_safe, but just changes the destination.  The
++  // important thing is that free-running threads are able to execute
++  // this call instruction at all times.  If the call is an immediate jal
++  // instruction we can simply rely on atomicity of 32-bit writes to
++  // make sure other threads will see no intermediate states.
++
++  // We cannot rely on locks here, since the free-running threads must run at
++  // full speed.
++  //
++  // Used in the runtime linkage of calls; see class CompiledIC.
++
++  // The parameter assert_lock disables the assertion during code generation.
++  void set_destination_mt_safe(address dest, bool assert_lock = true);
++
++  address get_trampoline();
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = NULL;
++  if (NativeCall::is_call_at(return_address - NativeCall::return_address_offset_long)) {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_long);
++  } else {
++    call = (NativeCall*)(return_address - NativeCall::return_address_offset_short);
++  }
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++class NativeMovConstReg: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset    =    0,
++    instruction_size            =    4 * BytesPerInstWord,
++    next_instruction_offset   =    4 * BytesPerInstWord,
++  };
++
++  int     insn_word() const                 { return long_at(instruction_offset); }
++  address instruction_address() const       { return addr_at(0); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data() const;
++  void    set_data(intptr_t x, intptr_t o = 0);
++
++  void    patch_set48(intptr_t x);
++
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++    friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++// An interface for accessing/manipulating native moves of the form:
++//       lui   AT, split_high(offset)
++//       addiu AT, split_low(offset)
++//       addu  reg, reg, AT
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, 0
++//       [lw/sw/lwc1/swc1                    dest, reg, 4]
++//     or
++//       lb/lbu/sb/lh/lhu/sh/lw/sw/lwc1/swc1 dest, reg, offset
++//       [lw/sw/lwc1/swc1                    dest, reg, offset+4]
++//
++// Warning: These routines must be able to handle any instruction sequences
++// that are generated as a result of the load/store byte,word,long
++// macros.
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset  = 0,
++    hiword_offset   = 4,
++    ldst_offset     = 12,
++    immediate_size  = 4,
++    ldst_size       = 16
++  };
++
++  //offset is less than 16 bits.
++  bool is_immediate() const { return !is_op(long_at(instruction_offset), Assembler::lui_op); }
++  bool is_64ldst() const {
++    if (is_immediate()) {
++      return (Assembler::opcode(long_at(hiword_offset)) == Assembler::opcode(long_at(instruction_offset))) &&
++       (Assembler::imm_off(long_at(hiword_offset)) == Assembler::imm_off(long_at(instruction_offset)) + wordSize);
++    } else {
++      return (Assembler::opcode(long_at(ldst_offset+hiword_offset)) == Assembler::opcode(long_at(ldst_offset))) &&
++       (Assembler::imm_off(long_at(ldst_offset+hiword_offset)) == Assembler::imm_off(long_at(ldst_offset)) + wordSize);
++    }
++  }
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  {
++    return addr_at( (is_immediate()? immediate_size : ldst_size) + (is_64ldst()? 4 : 0));
++  }
++
++  int   offset() const;
++
++  void  set_offset(int x);
++
++  void  add_offset_in_bytes(int add_offset)     { set_offset ( ( offset() + add_offset ) ); }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++class NativeMovRegMemPatching: public NativeMovRegMem {
++ private:
++  friend NativeMovRegMemPatching* nativeMovRegMemPatching_at (address address) {
++    NativeMovRegMemPatching* test = (NativeMovRegMemPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++
++// Handles all kinds of jump on Loongson. Long/far, conditional/unconditional
++// 32 bits:
++//    far jump:
++//        lui   reg, split_high(addr)
++//        addiu reg, split_low(addr)
++//        jr    reg
++//        nop
++//    or
++//        beq   ZERO, ZERO, offset
++//        nop
++//
++
++//64 bits:
++//    far jump:
++//          lui   rd, imm(63...48);
++//          ori   rd, rd, imm(47...32);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(31...16);
++//          dsll  rd, rd, 16;
++//          ori   rd, rd, imm(15...0);
++//          jalr  rd
++//          nop
++//
++class NativeJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_offset   =    0,
++    beq_opcode           =    0x10000000,//000100|00000|00000|offset
++    b_mask               =    0xffff0000,
++    short_size           =    8,
++    instruction_size     =    6 * BytesPerInstWord
++  };
++
++  bool is_short() const { return (long_at(instruction_offset) & b_mask) == beq_opcode; }
++  bool is_b_far();
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address jump_destination();
++
++  void  patch_set48_gs(address dest);
++  void  patch_set48(address dest);
++
++  void  patch_on_jr_gs(address dest);
++  void  patch_on_jr(address dest);
++
++  void  patch_on_j_gs(address dest);
++  void  patch_on_j(address dest);
++
++  void  patch_on_j_only(address dest);
++
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry) { Unimplemented(); }
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry) {}
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++
++  void verify();
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGeneralJump: public NativeJump {
++ public:
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum mips_specific_constants {
++    instruction_code          =    0x42000029,    // mips reserved instruction
++    instruction_size          =    4,
++    instruction_offset        =    0,
++    next_instruction_offset   =    4
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++// return instruction that does not pop values of the stack
++// jr RA
++// delay slot
++class NativeReturn: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size          =    8,
++    instruction_offset        =    0,
++    next_instruction_offset   =    8
++  };
++};
++
++
++
++
++class NativeCondJump;
++inline NativeCondJump* nativeCondJump_at(address address);
++class NativeCondJump: public NativeInstruction {
++ public:
++  enum mips_specific_constants {
++    instruction_size         = 16,
++    instruction_offset        = 12,
++    next_instruction_offset   = 20
++  };
++
++
++  int insn_word() const  { return long_at(instruction_offset); }
++  address instruction_address() const { return addr_at(0); }
++  address next_instruction_address() const { return addr_at(next_instruction_offset); }
++
++  // Creation
++  inline friend NativeCondJump* nativeCondJump_at(address address);
++
++  address jump_destination()  const {
++    return ::nativeCondJump_at(addr_at(12))->jump_destination();
++  }
++
++  void set_jump_destination(address dest) {
++    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
++  }
++
++};
++
++inline NativeCondJump* nativeCondJump_at(address address) {
++  NativeCondJump* jump = (NativeCondJump*)(address);
++  return jump;
++}
++
++
++
++inline bool NativeInstruction::is_illegal() { return insn_word() == illegal_instruction(); }
++
++inline bool NativeInstruction::is_call()    {
++  // jal target
++  // nop
++  if ( nativeInstruction_at(addr_at(0))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(4))->is_nop() ) {
++      return true;
++  }
++
++  // nop
++  // nop
++  // nop
++  // nop
++  // jal target
++  // nop
++  if ( is_nop() &&
++         nativeInstruction_at(addr_at(4))->is_nop()  &&
++         nativeInstruction_at(addr_at(8))->is_nop()  &&
++         nativeInstruction_at(addr_at(12))->is_nop() &&
++         nativeInstruction_at(addr_at(16))->is_op(Assembler::jal_op) &&
++         nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  // li64
++  if ( is_op(Assembler::lui_op) &&
++       is_op(int_at(4), Assembler::ori_op) &&
++       is_special_op(int_at(8), Assembler::dsll_op) &&
++       is_op(int_at(12), Assembler::ori_op) &&
++       is_special_op(int_at(16), Assembler::dsll_op) &&
++       is_op(int_at(20), Assembler::ori_op) &&
++       is_special_op(int_at(24), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op  (int_at(12), Assembler::ori_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //ori dst, dst, imm16
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        is_op  (int_at(8), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //ori dst, R0, imm16
++  //dsll dst, dst, 16
++  //nop
++  //nop
++  if (  is_op(Assembler::ori_op) &&
++        is_special_op(int_at(4), Assembler::dsll_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop()   &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //daddiu dst, R0, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  //nop
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        nativeInstruction_at(addr_at(8))->is_nop() &&
++        nativeInstruction_at(addr_at(12))->is_nop() &&
++        is_special_op(int_at(16), Assembler::jalr_op) ) {
++    return true;
++  }
++
++
++  //daddiu dst, R0, imm16
++  //nop
++  if (  is_op(Assembler::daddiu_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //ori dst, dst, imm16
++  if (  is_op(Assembler::lui_op) &&
++        is_op  (int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  //lui dst, imm16
++  //nop
++  if (  is_op(Assembler::lui_op) &&
++        nativeInstruction_at(addr_at(4))->is_nop() &&
++        is_special_op(int_at(8), Assembler::jalr_op) ) {
++    return true;
++  }
++
++  if(is_trampoline_call())
++    return true;
++
++  return false;
++
++}
++
++inline bool NativeInstruction::is_return()  { return is_special_op(Assembler::jr_op) && is_rs(RA);}
++
++inline bool NativeInstruction::is_cond_jump()    { return is_int_branch() || is_float_branch(); }
++
++// Call trampoline stubs.
++class NativeCallTrampolineStub : public NativeInstruction {
++ public:
++
++  enum mips_specific_constants {
++    instruction_size            =    2 * BytesPerInstWord,
++    instruction_offset          =    0,
++    next_instruction_offset     =    2 * BytesPerInstWord
++  };
++
++  address destination() const {
++    return (address)ptr_at(0);
++  }
++
++  void set_destination(address new_destination) {
++    set_ptr_at(0, (intptr_t)new_destination);
++  }
++};
++
++inline bool NativeInstruction::is_trampoline_call() {
++  // lui dst, imm16
++  // ori dst, dst, imm16
++  // dsll dst, dst, 16
++  // ld target, dst, imm16
++  // jalr target
++  // nop
++  if (  is_op(Assembler::lui_op) &&
++        is_op(int_at(4), Assembler::ori_op) &&
++        is_special_op(int_at(8), Assembler::dsll_op) &&
++        is_op(int_at(12), Assembler::ld_op) &&
++        is_special_op(int_at(16), Assembler::jalr_op) &&
++        nativeInstruction_at(addr_at(20))->is_nop() ) {
++    return true;
++  }
++
++  return false;
++}
++
++inline NativeCallTrampolineStub* nativeCallTrampolineStub_at(address addr) {
++  return (NativeCallTrampolineStub*)addr;
++}
++#endif // CPU_MIPS_VM_NATIVEINST_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/registerMap_mips.hpp b/src/hotspot/cpu/mips/registerMap_mips.hpp
+new file mode 100644
+index 0000000000..7f800eb107
+--- /dev/null
++++ b/src/hotspot/cpu/mips/registerMap_mips.hpp
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++#define CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++#ifndef CORE
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++#endif
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_MIPS_VM_REGISTERMAP_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/register_definitions_mips.cpp b/src/hotspot/cpu/mips/register_definitions_mips.cpp
+new file mode 100644
+index 0000000000..4af2531834
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_definitions_mips.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2002, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/register.hpp"
++#include "register_mips.hpp"
++#ifdef TARGET_ARCH_MODEL_mips_32
++# include "interp_masm_mips_32.hpp"
++#endif
++#ifdef TARGET_ARCH_MODEL_mips_64
++# include "interp_masm_mips_64.hpp"
++#endif
++
++REGISTER_DEFINITION(Register, noreg);
++REGISTER_DEFINITION(Register, i0);
++REGISTER_DEFINITION(Register, i1);
++REGISTER_DEFINITION(Register, i2);
++REGISTER_DEFINITION(Register, i3);
++REGISTER_DEFINITION(Register, i4);
++REGISTER_DEFINITION(Register, i5);
++REGISTER_DEFINITION(Register, i6);
++REGISTER_DEFINITION(Register, i7);
++REGISTER_DEFINITION(Register, i8);
++REGISTER_DEFINITION(Register, i9);
++REGISTER_DEFINITION(Register, i10);
++REGISTER_DEFINITION(Register, i11);
++REGISTER_DEFINITION(Register, i12);
++REGISTER_DEFINITION(Register, i13);
++REGISTER_DEFINITION(Register, i14);
++REGISTER_DEFINITION(Register, i15);
++REGISTER_DEFINITION(Register, i16);
++REGISTER_DEFINITION(Register, i17);
++REGISTER_DEFINITION(Register, i18);
++REGISTER_DEFINITION(Register, i19);
++REGISTER_DEFINITION(Register, i20);
++REGISTER_DEFINITION(Register, i21);
++REGISTER_DEFINITION(Register, i22);
++REGISTER_DEFINITION(Register, i23);
++REGISTER_DEFINITION(Register, i24);
++REGISTER_DEFINITION(Register, i25);
++REGISTER_DEFINITION(Register, i26);
++REGISTER_DEFINITION(Register, i27);
++REGISTER_DEFINITION(Register, i28);
++REGISTER_DEFINITION(Register, i29);
++REGISTER_DEFINITION(Register, i30);
++REGISTER_DEFINITION(Register, i31);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
+diff --git a/src/hotspot/cpu/mips/register_mips.cpp b/src/hotspot/cpu/mips/register_mips.cpp
+new file mode 100644
+index 0000000000..4a9b22bfef
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_mips.cpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_mips.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++const int ConcreteRegisterImpl::max_fpr = ConcreteRegisterImpl::max_gpr +
++                                                                 2 * FloatRegisterImpl::number_of_registers;
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "zero", "at", "v0", "v1", "a0", "a1", "a2", "a3",
++    "a4", "a5", "a6", "a7", "t0", "t1", "t2", "t3",
++    "s0", "s1", "s2", "s3", "s4", "s5", "s6", "s7",
++    "t8", "t9", "k0", "k1", "gp", "sp", "fp", "ra"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0",  "f1",   "f2",  "f3",   "f4",  "f5",   "f6",  "f7",
++    "f8",  "f9",  "f10", "f11",  "f12", "f13",  "f14", "f15",
++    "f16", "f17",  "f18", "f19", "f20", "f21",  "f22", "f23",
++    "f24", "f25",  "f26", "f27",  "f28", "f29",  "f30", "f31",
++  };
++  return is_valid() ? names[encoding()] : "fnoreg";
++}
++
+diff --git a/src/hotspot/cpu/mips/register_mips.hpp b/src/hotspot/cpu/mips/register_mips.hpp
+new file mode 100644
+index 0000000000..ea216fbcb9
+--- /dev/null
++++ b/src/hotspot/cpu/mips/register_mips.hpp
+@@ -0,0 +1,341 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_REGISTER_MIPS_HPP
++#define CPU_MIPS_VM_REGISTER_MIPS_HPP
++
++#include "asm/register.hpp"
++#include "utilities/formatBuffer.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers     = 32
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const                         { assert(is_valid(), "invalid register (%d)", (int)(intptr_t)this ); return (intptr_t)this; }
++  bool  is_valid() const                         { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++};
++
++
++// The integer registers of the MIPS32 architecture
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define NOREG ((Register)(noreg_RegisterEnumValue))
++
++#define I0 ((Register)(i0_RegisterEnumValue))
++#define I1 ((Register)(i1_RegisterEnumValue))
++#define I2 ((Register)(i2_RegisterEnumValue))
++#define I3 ((Register)(i3_RegisterEnumValue))
++#define I4 ((Register)(i4_RegisterEnumValue))
++#define I5 ((Register)(i5_RegisterEnumValue))
++#define I6 ((Register)(i6_RegisterEnumValue))
++#define I7 ((Register)(i7_RegisterEnumValue))
++#define I8 ((Register)(i8_RegisterEnumValue))
++#define I9 ((Register)(i9_RegisterEnumValue))
++#define I10 ((Register)(i10_RegisterEnumValue))
++#define I11 ((Register)(i11_RegisterEnumValue))
++#define I12 ((Register)(i12_RegisterEnumValue))
++#define I13 ((Register)(i13_RegisterEnumValue))
++#define I14 ((Register)(i14_RegisterEnumValue))
++#define I15 ((Register)(i15_RegisterEnumValue))
++#define I16 ((Register)(i16_RegisterEnumValue))
++#define I17 ((Register)(i17_RegisterEnumValue))
++#define I18 ((Register)(i18_RegisterEnumValue))
++#define I19 ((Register)(i19_RegisterEnumValue))
++#define I20 ((Register)(i20_RegisterEnumValue))
++#define I21 ((Register)(i21_RegisterEnumValue))
++#define I22 ((Register)(i22_RegisterEnumValue))
++#define I23 ((Register)(i23_RegisterEnumValue))
++#define I24 ((Register)(i24_RegisterEnumValue))
++#define I25 ((Register)(i25_RegisterEnumValue))
++#define I26 ((Register)(i26_RegisterEnumValue))
++#define I27 ((Register)(i27_RegisterEnumValue))
++#define I28 ((Register)(i28_RegisterEnumValue))
++#define I29 ((Register)(i29_RegisterEnumValue))
++#define I30 ((Register)(i30_RegisterEnumValue))
++#define I31 ((Register)(i31_RegisterEnumValue))
++
++#define R0 ((Register)(i0_RegisterEnumValue))
++#define AT ((Register)(i1_RegisterEnumValue))
++#define V0 ((Register)(i2_RegisterEnumValue))
++#define V1 ((Register)(i3_RegisterEnumValue))
++#define A0 ((Register)(i4_RegisterEnumValue))
++#define A1 ((Register)(i5_RegisterEnumValue))
++#define A2 ((Register)(i6_RegisterEnumValue))
++#define A3 ((Register)(i7_RegisterEnumValue))
++#define A4 ((Register)(i8_RegisterEnumValue))
++#define A5 ((Register)(i9_RegisterEnumValue))
++#define A6 ((Register)(i10_RegisterEnumValue))
++#define A7 ((Register)(i11_RegisterEnumValue))
++#define RT0 ((Register)(i12_RegisterEnumValue))
++#define RT1 ((Register)(i13_RegisterEnumValue))
++#define RT2 ((Register)(i14_RegisterEnumValue))
++#define RT3 ((Register)(i15_RegisterEnumValue))
++#define S0 ((Register)(i16_RegisterEnumValue))
++#define S1 ((Register)(i17_RegisterEnumValue))
++#define S2 ((Register)(i18_RegisterEnumValue))
++#define S3 ((Register)(i19_RegisterEnumValue))
++#define S4 ((Register)(i20_RegisterEnumValue))
++#define S5 ((Register)(i21_RegisterEnumValue))
++#define S6 ((Register)(i22_RegisterEnumValue))
++#define S7 ((Register)(i23_RegisterEnumValue))
++#define RT8 ((Register)(i24_RegisterEnumValue))
++#define RT9 ((Register)(i25_RegisterEnumValue))
++#define K0 ((Register)(i26_RegisterEnumValue))
++#define K1 ((Register)(i27_RegisterEnumValue))
++#define GP ((Register)(i28_RegisterEnumValue))
++#define SP ((Register)(i29_RegisterEnumValue))
++#define FP ((Register)(i30_RegisterEnumValue))
++#define S8 ((Register)(i30_RegisterEnumValue))
++#define RA ((Register)(i31_RegisterEnumValue))
++
++#define c_rarg0       RT0
++#define c_rarg1       RT1
++#define Rmethod       S3
++#define Rsender       S4
++#define Rnext         S1
++
++/*
++#define RT0       T0
++#define RT1       T1
++#define RT2       T2
++#define RT3       T3
++#define RT4       T8
++#define RT5       T9
++*/
++
++
++//for interpreter frame
++// bytecode pointer register
++#define BCP            S0
++// local variable pointer register
++#define LVP            S7
++// temperary callee saved register, we use this register to save the register maybe blowed cross call_VM
++// be sure to save and restore its value in call_stub
++#define TSR            S2
++
++#define OPT_THREAD 1
++
++#define TREG           S6
++
++#define  S5_heapbase   S5
++
++#define mh_SP_save     SP
++
++#define FSR            V0
++#define SSR            V1
++#define FSF            F0
++#define SSF            F1
++#define FTF            F14
++#define STF            F15
++
++#define AFT            F30
++
++#define RECEIVER       T0
++#define IC_Klass       T1
++
++#define SHIFT_count    T3
++
++#endif // DONT_USE_REGISTER_DEFINES
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    float_arg_base      = 12,
++    number_of_registers = 32
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++#ifndef DONT_USE_REGISTER_DEFINES
++#define FNOREG ((FloatRegister)(fnoreg_FloatRegisterEnumValue))
++#define F0     ((FloatRegister)(    f0_FloatRegisterEnumValue))
++#define F1     ((FloatRegister)(    f1_FloatRegisterEnumValue))
++#define F2     ((FloatRegister)(    f2_FloatRegisterEnumValue))
++#define F3     ((FloatRegister)(    f3_FloatRegisterEnumValue))
++#define F4     ((FloatRegister)(    f4_FloatRegisterEnumValue))
++#define F5     ((FloatRegister)(    f5_FloatRegisterEnumValue))
++#define F6     ((FloatRegister)(    f6_FloatRegisterEnumValue))
++#define F7     ((FloatRegister)(    f7_FloatRegisterEnumValue))
++#define F8     ((FloatRegister)(    f8_FloatRegisterEnumValue))
++#define F9     ((FloatRegister)(    f9_FloatRegisterEnumValue))
++#define F10    ((FloatRegister)(   f10_FloatRegisterEnumValue))
++#define F11    ((FloatRegister)(   f11_FloatRegisterEnumValue))
++#define F12    ((FloatRegister)(   f12_FloatRegisterEnumValue))
++#define F13    ((FloatRegister)(   f13_FloatRegisterEnumValue))
++#define F14    ((FloatRegister)(   f14_FloatRegisterEnumValue))
++#define F15    ((FloatRegister)(   f15_FloatRegisterEnumValue))
++#define F16    ((FloatRegister)(   f16_FloatRegisterEnumValue))
++#define F17    ((FloatRegister)(   f17_FloatRegisterEnumValue))
++#define F18    ((FloatRegister)(   f18_FloatRegisterEnumValue))
++#define F19    ((FloatRegister)(   f19_FloatRegisterEnumValue))
++#define F20    ((FloatRegister)(   f20_FloatRegisterEnumValue))
++#define F21    ((FloatRegister)(   f21_FloatRegisterEnumValue))
++#define F22    ((FloatRegister)(   f22_FloatRegisterEnumValue))
++#define F23    ((FloatRegister)(   f23_FloatRegisterEnumValue))
++#define F24    ((FloatRegister)(   f24_FloatRegisterEnumValue))
++#define F25    ((FloatRegister)(   f25_FloatRegisterEnumValue))
++#define F26    ((FloatRegister)(   f26_FloatRegisterEnumValue))
++#define F27    ((FloatRegister)(   f27_FloatRegisterEnumValue))
++#define F28    ((FloatRegister)(   f28_FloatRegisterEnumValue))
++#define F29    ((FloatRegister)(   f29_FloatRegisterEnumValue))
++#define F30    ((FloatRegister)(   f30_FloatRegisterEnumValue))
++#define F31    ((FloatRegister)(   f31_FloatRegisterEnumValue))
++#endif // DONT_USE_REGISTER_DEFINES
++
++
++const int MIPS_ARGS_IN_REGS_NUM = 4;
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++    number_of_registers = (RegisterImpl::number_of_registers + FloatRegisterImpl::number_of_registers) * 2
++  };
++
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++#endif //CPU_MIPS_VM_REGISTER_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/relocInfo_mips.cpp b/src/hotspot/cpu/mips/relocInfo_mips.cpp
+new file mode 100644
+index 0000000000..ff8028032b
+--- /dev/null
++++ b/src/hotspot/cpu/mips/relocInfo_mips.cpp
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1998, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "compiler/disassembler.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/oop.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)CompressedOops::encode((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode(oop(x))), (intptr_t)(x));
++      }
++    } else {
++      if (verify_only) {
++        assert(nativeMovConstReg_at(addr())->data() == (long)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)), (intptr_t)(x));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    assert(0, "call32_operand not supported in MIPS64");
++  }
++}
++
++
++//NOTICE HERE, this relocate is not need for MIPS, since MIPS USE abosolutly target,
++//Maybe We should FORGET CALL RELOCATION
++address Relocation::pd_call_destination(address orig_addr) {
++  intptr_t adj = 0;
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    if (!ni->is_trampoline_call()) {
++      return nativeCall_at(addr())->target_addr_for_insn();
++    } else {
++      address trampoline = nativeCall_at(addr())->get_trampoline();
++      if (trampoline) {
++        return nativeCallTrampolineStub_at(trampoline)->destination();
++      } else {
++        return (address) -1;
++      }
++    }
++  } else if (ni->is_jump()) {
++    return nativeGeneralJump_at(addr())->jump_destination() + adj;
++  } else if (ni->is_cond_jump()) {
++    return nativeCondJump_at(addr())->jump_destination() +adj;
++  } else {
++    tty->print_cr("\nError!\ncall destination: " INTPTR_FORMAT, p2i(addr()));
++    Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty);
++    ShouldNotReachHere();
++    return NULL;
++  }
++}
++
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    NativeCall* call = nativeCall_at(addr());
++    if (!ni->is_trampoline_call()) {
++      call->set_destination(x);
++    } else {
++      address trampoline_stub_addr = call->get_trampoline();
++      if (trampoline_stub_addr != NULL) {
++        address orig = call->target_addr_for_insn();
++        if (orig != trampoline_stub_addr) {
++          call->patch_on_trampoline(trampoline_stub_addr);
++        }
++        call->set_destination_mt_safe(x, false);
++      }
++    }
++  } else if (ni->is_jump())
++    nativeGeneralJump_at(addr())->set_jump_destination(x);
++  else if (ni->is_cond_jump())
++    nativeCondJump_at(addr())->set_jump_destination(x);
++  else
++    { ShouldNotReachHere(); }
++
++    // Unresolved jumps are recognized by a destination of -1
++    // However 64bit can't actually produce such an address
++    // and encodes a jump to self but jump_destination will
++    // return a -1 as the signal. We must not relocate this
++    // jmp or the ic code will not see it as unresolved.
++}
++
++
++address* Relocation::pd_address_in_code() {
++  return (address*)addr();
++}
++
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++/*
++void poll_return_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++*/
++
++void internal_pc_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++  address target =0;
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  target = new_addr_for((address)ni->data(), src, dest);
++  ni->set_data((intptr_t)target);
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/src/hotspot/cpu/mips/relocInfo_mips.hpp b/src/hotspot/cpu/mips/relocInfo_mips.hpp
+new file mode 100644
+index 0000000000..1e1e170fd8
+--- /dev/null
++++ b/src/hotspot/cpu/mips/relocInfo_mips.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++#define CPU_MIPS_VM_RELOCINFO_MIPS_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since MIPS instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_MIPS_VM_RELOCINFO_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/runtime_mips_64.cpp b/src/hotspot/cpu/mips/runtime_mips_64.cpp
+new file mode 100644
+index 0000000000..2a0488cd01
+--- /dev/null
++++ b/src/hotspot/cpu/mips/runtime_mips_64.cpp
+@@ -0,0 +1,198 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_mips.inline.hpp"
++#endif
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   V1: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_mips.cpp] generate_forward_exception()
++//      |- V0, V1 are created
++//      |- T9 <= SharedRuntime::exception_handler_for_return_address
++//      `- jr T9
++//           `- the caller's exception_handler
++//                 `- jr OptoRuntime::exception_blob
++//                        `- here
++//
++void OptoRuntime::generate_exception_blob() {
++  // Capture info about frame layout
++  enum layout {
++    fp_off,
++    return_off,                 // slot for return address
++    framesize
++  };
++
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer   buffer("exception_blob", 5120, 5120);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++
++  address start = __ pc();
++
++  __ daddiu(SP, SP, -1 * framesize * wordSize);   // Prolog!
++
++  // this frame will be treated as the original caller method.
++  // So, the return pc should be filled with the original exception pc.
++  //   ref: X86's implementation
++  __ sd(V1, SP, return_off  *wordSize);  // return address
++  __ sd(FP, SP, fp_off  *wordSize);
++
++  // Save callee saved registers.  None for UseSSE=0,
++  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++
++  __ daddiu(FP, SP, fp_off * wordSize);
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ sd(V0, Address(thread, JavaThread::exception_oop_offset()));
++  __ sd(V1, Address(thread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  __ set_last_Java_frame(thread, NOREG, NOREG, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++
++  {
++    long save_pc = (long)__ pc() + 48;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ move(A0, thread);
++  __ patchable_set48(T9, (long)OptoRuntime::handle_exception_C);
++  __ jalr(T9);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  oop_maps->add_gc_map( __ offset(), map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(thread, true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // V0: exception handler
++
++  // We have a handler in V0, (could be deopt blob)
++  __ move(T9, V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // Get the exception
++  __ ld(A0, Address(thread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ld(A1, Address(thread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ sd(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++  __ sd(R0, Address(thread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ sd(R0, Address(thread, JavaThread::exception_oop_offset()));
++
++  // Fix seg fault when running:
++  //    Eclipse + Plugin + Debug As
++  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++  //
++  __ move(V0, A0);
++  __ move(V1, A1);
++
++  // V0: exception oop
++  // T9: exception handler
++  // A1: exception pc
++  __ jr(T9);
++  __ delayed()->nop();
++
++  // make sure all code is generated
++  masm->flush();
++
++  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++}
+diff --git a/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp
+new file mode 100644
+index 0000000000..4a9791d4cb
+--- /dev/null
++++ b/src/hotspot/cpu/mips/sharedRuntime_mips_64.cpp
+@@ -0,0 +1,3879 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++#include <alloca.h>
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class RegisterSaver {
++  enum { FPU_regs_live = 32 };
++  // Capture info about frame layout
++  enum layout {
++#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
++    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
++    DEF_LAYOUT_OFFS(fpr0)
++    DEF_LAYOUT_OFFS(fpr1)
++    DEF_LAYOUT_OFFS(fpr2)
++    DEF_LAYOUT_OFFS(fpr3)
++    DEF_LAYOUT_OFFS(fpr4)
++    DEF_LAYOUT_OFFS(fpr5)
++    DEF_LAYOUT_OFFS(fpr6)
++    DEF_LAYOUT_OFFS(fpr7)
++    DEF_LAYOUT_OFFS(fpr8)
++    DEF_LAYOUT_OFFS(fpr9)
++    DEF_LAYOUT_OFFS(fpr10)
++    DEF_LAYOUT_OFFS(fpr11)
++    DEF_LAYOUT_OFFS(fpr12)
++    DEF_LAYOUT_OFFS(fpr13)
++    DEF_LAYOUT_OFFS(fpr14)
++    DEF_LAYOUT_OFFS(fpr15)
++    DEF_LAYOUT_OFFS(fpr16)
++    DEF_LAYOUT_OFFS(fpr17)
++    DEF_LAYOUT_OFFS(fpr18)
++    DEF_LAYOUT_OFFS(fpr19)
++    DEF_LAYOUT_OFFS(fpr20)
++    DEF_LAYOUT_OFFS(fpr21)
++    DEF_LAYOUT_OFFS(fpr22)
++    DEF_LAYOUT_OFFS(fpr23)
++    DEF_LAYOUT_OFFS(fpr24)
++    DEF_LAYOUT_OFFS(fpr25)
++    DEF_LAYOUT_OFFS(fpr26)
++    DEF_LAYOUT_OFFS(fpr27)
++    DEF_LAYOUT_OFFS(fpr28)
++    DEF_LAYOUT_OFFS(fpr29)
++    DEF_LAYOUT_OFFS(fpr30)
++    DEF_LAYOUT_OFFS(fpr31)
++
++    DEF_LAYOUT_OFFS(v0)
++    DEF_LAYOUT_OFFS(v1)
++    DEF_LAYOUT_OFFS(a0)
++    DEF_LAYOUT_OFFS(a1)
++    DEF_LAYOUT_OFFS(a2)
++    DEF_LAYOUT_OFFS(a3)
++    DEF_LAYOUT_OFFS(a4)
++    DEF_LAYOUT_OFFS(a5)
++    DEF_LAYOUT_OFFS(a6)
++    DEF_LAYOUT_OFFS(a7)
++    DEF_LAYOUT_OFFS(t0)
++    DEF_LAYOUT_OFFS(t1)
++    DEF_LAYOUT_OFFS(t2)
++    DEF_LAYOUT_OFFS(t3)
++    DEF_LAYOUT_OFFS(s0)
++    DEF_LAYOUT_OFFS(s1)
++    DEF_LAYOUT_OFFS(s2)
++    DEF_LAYOUT_OFFS(s3)
++    DEF_LAYOUT_OFFS(s4)
++    DEF_LAYOUT_OFFS(s5)
++    DEF_LAYOUT_OFFS(s6)
++    DEF_LAYOUT_OFFS(s7)
++    DEF_LAYOUT_OFFS(t8)
++    DEF_LAYOUT_OFFS(t9)
++
++    DEF_LAYOUT_OFFS(gp)
++    DEF_LAYOUT_OFFS(fp)
++    DEF_LAYOUT_OFFS(return)
++    reg_save_size
++  };
++
++  public:
++
++  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors =false );
++  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
++  static int raOffset(void) { return return_off / 2; }
++  //Rmethod
++  static int methodOffset(void) { return s3_off / 2; }
++
++  static int v0Offset(void) { return v0_off / 2; }
++  static int v1Offset(void) { return v1_off / 2; }
++
++  static int fpResultOffset(void) { return fpr0_off / 2; }
++
++  // During deoptimization only the result register need to be restored
++  // all the other values have already been extracted.
++  static void restore_result_registers(MacroAssembler* masm);
++};
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors ) {
++
++  // Always make the frame size 16-byte aligned
++  int frame_size_in_bytes = round_to(additional_frame_words*wordSize +
++                                     reg_save_size*BytesPerInt, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
++
++  // save registers
++
++  __ daddiu(SP, SP, - reg_save_size * jintSize);
++
++  __ sdc1(F0, SP, fpr0_off * jintSize); __ sdc1(F1, SP, fpr1_off * jintSize);
++  __ sdc1(F2, SP, fpr2_off * jintSize); __ sdc1(F3, SP, fpr3_off * jintSize);
++  __ sdc1(F4, SP, fpr4_off * jintSize); __ sdc1(F5, SP, fpr5_off * jintSize);
++  __ sdc1(F6, SP, fpr6_off * jintSize);  __ sdc1(F7, SP, fpr7_off * jintSize);
++  __ sdc1(F8, SP, fpr8_off * jintSize);  __ sdc1(F9, SP, fpr9_off * jintSize);
++  __ sdc1(F10, SP, fpr10_off * jintSize);  __ sdc1(F11, SP, fpr11_off * jintSize);
++  __ sdc1(F12, SP, fpr12_off * jintSize);  __ sdc1(F13, SP, fpr13_off * jintSize);
++  __ sdc1(F14, SP, fpr14_off * jintSize);  __ sdc1(F15, SP, fpr15_off * jintSize);
++  __ sdc1(F16, SP, fpr16_off * jintSize);  __ sdc1(F17, SP, fpr17_off * jintSize);
++  __ sdc1(F18, SP, fpr18_off * jintSize);  __ sdc1(F19, SP, fpr19_off * jintSize);
++  __ sdc1(F20, SP, fpr20_off * jintSize);  __ sdc1(F21, SP, fpr21_off * jintSize);
++  __ sdc1(F22, SP, fpr22_off * jintSize);  __ sdc1(F23, SP, fpr23_off * jintSize);
++  __ sdc1(F24, SP, fpr24_off * jintSize);  __ sdc1(F25, SP, fpr25_off * jintSize);
++  __ sdc1(F26, SP, fpr26_off * jintSize);  __ sdc1(F27, SP, fpr27_off * jintSize);
++  __ sdc1(F28, SP, fpr28_off * jintSize);  __ sdc1(F29, SP, fpr29_off * jintSize);
++  __ sdc1(F30, SP, fpr30_off * jintSize);  __ sdc1(F31, SP, fpr31_off * jintSize);
++  __ sd(V0, SP, v0_off * jintSize);  __ sd(V1, SP, v1_off * jintSize);
++  __ sd(A0, SP, a0_off * jintSize);  __ sd(A1, SP, a1_off * jintSize);
++  __ sd(A2, SP, a2_off * jintSize);  __ sd(A3, SP, a3_off * jintSize);
++  __ sd(A4, SP, a4_off * jintSize);  __ sd(A5, SP, a5_off * jintSize);
++  __ sd(A6, SP, a6_off * jintSize);  __ sd(A7, SP, a7_off * jintSize);
++  __ sd(T0, SP, t0_off * jintSize);
++  __ sd(T1, SP, t1_off * jintSize);
++  __ sd(T2, SP, t2_off * jintSize);
++  __ sd(T3, SP, t3_off * jintSize);
++  __ sd(S0, SP, s0_off * jintSize);
++  __ sd(S1, SP, s1_off * jintSize);
++  __ sd(S2, SP, s2_off * jintSize);
++  __ sd(S3, SP, s3_off * jintSize);
++  __ sd(S4, SP, s4_off * jintSize);
++  __ sd(S5, SP, s5_off * jintSize);
++  __ sd(S6, SP, s6_off * jintSize);
++  __ sd(S7, SP, s7_off * jintSize);
++
++  __ sd(T8, SP, t8_off * jintSize);
++  __ sd(T9, SP, t9_off * jintSize);
++
++  __ sd(GP, SP, gp_off * jintSize);
++  __ sd(FP, SP, fp_off * jintSize);
++  __ sd(RA, SP, return_off * jintSize);
++  __ daddiu(FP, SP, fp_off * jintSize);
++
++  OopMapSet *oop_maps = new OopMapSet();
++  //OopMap* map =  new OopMap( frame_words, 0 );
++  OopMap* map =  new OopMap( frame_size_in_slots, 0 );
++
++
++//#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_words)
++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots)
++  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( v1_off), V1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a6_off), A6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a7_off), A7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t0_off), T0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t1_off), T1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t2_off), T2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t3_off), T3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s0_off), S0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s1_off), S1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s2_off), S2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s3_off), S3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s4_off), S4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s5_off), S5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s6_off), S6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s7_off), S7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t8_off), T8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t9_off), T9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fp_off), FP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
++
++  map->set_callee_saved(STACK_OFFSET( fpr0_off), F0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr1_off), F1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr2_off), F2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr3_off), F3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr4_off), F4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr5_off), F5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr6_off), F6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr7_off), F7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr8_off), F8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr9_off), F9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr10_off), F10->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr11_off), F11->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr12_off), F12->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr13_off), F13->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr14_off), F14->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr15_off), F15->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr16_off), F16->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr17_off), F17->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr18_off), F18->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr19_off), F19->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr20_off), F20->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr21_off), F21->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr22_off), F22->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr23_off), F23->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr24_off), F24->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr25_off), F25->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr26_off), F26->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr27_off), F27->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr28_off), F28->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr29_off), F29->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr30_off), F30->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr31_off), F31->as_VMReg());
++
++#undef STACK_OFFSET
++  return map;
++}
++
++
++// Pop the current frame and restore all the registers that we
++// saved.
++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {
++  __ ldc1(F0, SP, fpr0_off * jintSize); __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ ldc1(F2, SP, fpr2_off * jintSize); __ ldc1(F3, SP, fpr3_off * jintSize);
++  __ ldc1(F4, SP, fpr4_off * jintSize); __ ldc1(F5, SP, fpr5_off * jintSize);
++  __ ldc1(F6, SP, fpr6_off * jintSize);  __ ldc1(F7, SP, fpr7_off * jintSize);
++  __ ldc1(F8, SP, fpr8_off * jintSize);  __ ldc1(F9, SP, fpr9_off * jintSize);
++  __ ldc1(F10, SP, fpr10_off * jintSize);  __ ldc1(F11, SP, fpr11_off * jintSize);
++  __ ldc1(F12, SP, fpr12_off * jintSize);  __ ldc1(F13, SP, fpr13_off * jintSize);
++  __ ldc1(F14, SP, fpr14_off * jintSize);  __ ldc1(F15, SP, fpr15_off * jintSize);
++  __ ldc1(F16, SP, fpr16_off * jintSize);  __ ldc1(F17, SP, fpr17_off * jintSize);
++  __ ldc1(F18, SP, fpr18_off * jintSize);  __ ldc1(F19, SP, fpr19_off * jintSize);
++  __ ldc1(F20, SP, fpr20_off * jintSize);  __ ldc1(F21, SP, fpr21_off * jintSize);
++  __ ldc1(F22, SP, fpr22_off * jintSize);  __ ldc1(F23, SP, fpr23_off * jintSize);
++  __ ldc1(F24, SP, fpr24_off * jintSize);  __ ldc1(F25, SP, fpr25_off * jintSize);
++  __ ldc1(F26, SP, fpr26_off * jintSize);  __ ldc1(F27, SP, fpr27_off * jintSize);
++  __ ldc1(F28, SP, fpr28_off * jintSize);  __ ldc1(F29, SP, fpr29_off * jintSize);
++  __ ldc1(F30, SP, fpr30_off * jintSize);  __ ldc1(F31, SP, fpr31_off * jintSize);
++
++  __ ld(V0, SP, v0_off * jintSize);  __ ld(V1, SP, v1_off * jintSize);
++  __ ld(A0, SP, a0_off * jintSize);  __ ld(A1, SP, a1_off * jintSize);
++  __ ld(A2, SP, a2_off * jintSize);  __ ld(A3, SP, a3_off * jintSize);
++  __ ld(A4, SP, a4_off * jintSize);  __ ld(A5, SP, a5_off * jintSize);
++  __ ld(A6, SP, a6_off * jintSize);  __ ld(A7, SP, a7_off * jintSize);
++  __ ld(T0, SP, t0_off * jintSize);
++  __ ld(T1, SP, t1_off * jintSize);
++  __ ld(T2, SP, t2_off * jintSize);
++  __ ld(T3, SP, t3_off * jintSize);
++  __ ld(S0, SP, s0_off * jintSize);
++  __ ld(S1, SP, s1_off * jintSize);
++  __ ld(S2, SP, s2_off * jintSize);
++  __ ld(S3, SP, s3_off * jintSize);
++  __ ld(S4, SP, s4_off * jintSize);
++  __ ld(S5, SP, s5_off * jintSize);
++  __ ld(S6, SP, s6_off * jintSize);
++  __ ld(S7, SP, s7_off * jintSize);
++
++  __ ld(T8, SP, t8_off * jintSize);
++  __ ld(T9, SP, t9_off * jintSize);
++
++  __ ld(GP, SP, gp_off * jintSize);
++  __ ld(FP, SP, fp_off * jintSize);
++  __ ld(RA, SP, return_off * jintSize);
++
++  __ addiu(SP, SP, reg_save_size * jintSize);
++}
++
++// Pop the current frame and restore the registers that might be holding
++// a result.
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {
++
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restore to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++  __ ld(V0, SP, v0_off * jintSize);
++  __ ld(V1, SP, v1_off * jintSize);
++  __ ldc1(F0, SP, fpr0_off * jintSize);
++  __ ldc1(F1, SP, fpr1_off * jintSize);
++  __ addiu(SP, SP, return_off * jintSize);
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++size_t SharedRuntime::trampoline_size() {
++  return 32;
++}
++
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {
++  // trampoline is not in CodeCache
++  __ set64(T9, (long)destination);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Since we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++
++static int reg2offset_in(VMReg r) {
++  // Account for saved fp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 2 * VMRegImpl::slots_per_word) * VMRegImpl::stack_slot_size;  // + 2 * VMRegImpl::stack_slot_size);
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than SharedInfo::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 32-bit
++// integer registers.
++
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build.
++
++
++// ---------------------------------------------------------------------------
++// The compiled Java calling convention.
++// Pass first five oop/int args in registers T0, A0 - A3.
++// Pass float/double/long args in stack.
++// Doubles have precedence, so if you pass a mix of floats and doubles
++// the doubles will grab the registers before the floats will.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and registers.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    T0, A0, A1, A2, A3, A4, A5, A6
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++  __ beq(AT, R0, L);
++  __ delayed()->nop();
++  // Schedule the branch target address early.
++  // Call into the VM to patch the caller, then jump to compiled callee
++  // V0 isn't live so capture return address while we easily can
++  __ move(V0, RA);
++
++  __ pushad();
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // VM needs caller's callsite
++  // VM needs target method
++
++  __ move(A0, Rmethod);
++  __ move(A1, V0);
++  // we should preserve the return address
++  __ move(TSR, SP);
++  __ move(AT, -(StackAlignmentInBytes));   // align the stack
++  __ andr(SP, SP, AT);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite),
++          relocInfo::runtime_call_type);
++
++  __ delayed()->nop();
++  __ move(SP, TSR);
++  __ popad();
++  __ bind(L);
++}
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {
++
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  // However we will run interpreted if we come thru here. The next pass
++  // thru the call site will run compiled. If we ran compiled here then
++  // we can (theorectically) do endless i2c->c2i->i2c transitions during
++  // deopt/uncommon trap cycles. If we always go interpreted here then
++  // we can have at most one and don't need to play any tricks to keep
++  // from endlessly growing the stack.
++  //
++  // Actually if we detected that we had an i2c->c2i transition here we
++  // ought to be able to reset the world back to the state of the interpreted
++  // call and not bother building another interpreter arg area. We don't
++  // do that at this point.
++
++  patch_callers_callsite(masm);
++  __ bind(skip_fixup);
++
++#ifdef COMPILER2
++  __ empty_FPU_stack();
++#endif
++  //this is for native ?
++  // Since all args are passed on the stack, total_args_passed * interpreter_
++  // stack_element_size  is the
++  // space we need.
++  int extraspace = total_args_passed * Interpreter::stackElementSize;
++
++  // stack is aligned, keep it that way
++  extraspace = round_to(extraspace, 2*wordSize);
++
++  // Get return address
++  __ move(V0, RA);
++  // set senderSP value
++  //refer to interpreter_mips.cpp:generate_asm_entry
++  __ move(Rsender, SP);
++  __ addiu(SP, SP, -extraspace);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // st_off points to lowest address on stack.
++    int st_off = ((total_args_passed - 1) - i) * Interpreter::stackElementSize;
++    // Say 4 args:
++    // i   st_off
++    // 0   12 T_LONG
++    // 1    8 T_VOID
++    // 2    4 T_OBJECT
++    // 3    0 T_BOOL
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use fpu stack top
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++      } else {
++
++
++        int next_off = st_off - Interpreter::stackElementSize;
++        __ ld_ptr(AT, SP, ld_off);
++        __ st_ptr(AT, SP, st_off);
++
++        // Ref to is_Register condition
++        if(sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ st_ptr(AT, SP, st_off - 8);
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++          __ sd(r, SP, st_off);
++      } else {
++        //FIXME, mips will not enter here
++        // long/double in gpr
++        __ sd(r, SP, st_off);
++        // In [java/util/zip/ZipFile.java]
++        //
++        //    private static native long open(String name, int mode, long lastModified);
++        //    private static native int getTotal(long jzfile);
++        //
++        // We need to transfer T_LONG paramenters from a compiled method to a native method.
++        // It's a complex process:
++        //
++        // Caller -> lir_static_call -> gen_resolve_stub
++        //      -> -- resolve_static_call_C
++        //         `- gen_c2i_adapter()  [*]
++        //             |
++        //       `- AdapterHandlerLibrary::get_create_apapter_index
++        //      -> generate_native_entry
++        //      -> InterpreterRuntime::SignatureHandlerGenerator::pass_long [**]
++        //
++        // In [**], T_Long parameter is stored in stack as:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // However, the sequence is reversed here:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // So I stored another 8 bytes in the T_VOID slot. It then can be accessed from generate_native_entry().
++        //
++        if (sig_bt[i] == T_LONG)
++          __ sd(r, SP, st_off - 8);
++      }
++    } else if (r_1->is_FloatRegister()) {
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++        __ swc1(fr, SP, st_off);
++      else {
++        __ sdc1(fr, SP, st_off);
++        __ sdc1(fr, SP, st_off - 8);  // T_DOUBLE needs two slots
++      }
++    }
++  }
++
++  // Schedule the branch target address early.
++  __ ld_ptr(AT, Rmethod, in_bytes(Method::interpreter_entry_offset()) );
++  // And repush original return address
++  __ move(RA, V0);
++  __ jr (AT);
++  __ delayed()->nop();
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                    const BasicType *sig_bt,
++                                    const VMRegPair *regs) {
++
++  // Generate an I2C adapter: adjust the I-frame to make space for the C-frame
++  // layout.  Lesp was saved by the calling I-frame and will be restored on
++  // return.  Meanwhile, outgoing arg space is all owned by the callee
++  // C-frame, so we can mangle it at will.  After adjusting the frame size,
++  // hoist register arguments and repack other args according to the compiled
++  // code convention.  Finally, end in a jump to the compiled code.  The entry
++  // point address is the start of the buffer.
++
++  // We will only enter here from an interpreted frame and never from after
++  // passing thru a c2i. Azul allowed this but we do not. If we lose the
++  // race and use a c2i we will remain interpreted for the race loser(s).
++  // This removes all sorts of headaches on the mips side and also eliminates
++  // the possibility of having c2i -> i2c -> c2i -> ... endless transitions.
++
++
++  __ move(T9, SP);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++    // int comp_args_on_stack = VMRegImpl::reg2stack(max_arg);
++    // Convert 4-byte stack slots to words.
++    comp_words_on_stack = round_to(comp_args_on_stack*4, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = round_to(comp_words_on_stack, 2);
++    __ daddiu(SP, SP, -comp_words_on_stack * wordSize);
++  }
++
++  // Align the outgoing SP
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  // push the return address on the stack (note that pushing, rather
++  // than storing it, yields the correct frame alignment for the callee)
++  // Put saved SP in another register
++  const Register saved_sp = V0;
++  __ move(saved_sp, T9);
++
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ld(T9, Rmethod, in_bytes(Method::from_compiled_offset()));
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    //assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(), "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed -1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset (+ wordSize to
++      // account for return address )
++      // NOTICE HERE!!!! I sub a wordSize here
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size;
++      //+ wordSize;
++
++      if (!r_2->is_valid()) {
++        __ ld(AT, saved_sp, ld_off);
++        __ sd(AT, SP, st_off);
++      } else {
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        // st_off is LSW (i.e. reg.first())
++
++        // [./org/eclipse/swt/graphics/GC.java]
++        // void drawImageXRender(Image srcImage, int srcX, int srcY, int srcWidth, int srcHeight,
++        //  int destX, int destY, int destWidth, int destHeight,
++        //  boolean simple,
++        //  int imgWidth, int imgHeight,
++        //  long maskPixmap,  <-- Pass T_LONG in stack
++        //  int maskType);
++        // Before this modification, Eclipse displays icons with solid black background.
++        //
++        __ ld(AT, saved_sp, ld_off);
++        if (sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE)
++          __ ld(AT, saved_sp, ld_off - 8);
++        __ sd(AT, SP, st_off);
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      if (r_2->is_valid()) {
++        // Remember r_1 is low address (and LSB on mips)
++        // So r_2 gets loaded from high address regardless of the platform
++        assert(r_2->as_Register() == r_1->as_Register(), "");
++        __ ld(r, saved_sp, ld_off);
++
++        //
++        // For T_LONG type, the real layout is as below:
++        //
++        //   (high)
++        //    |         |
++        //    -----------
++        //    | 8 bytes |
++        //    | (void)  |
++        //    -----------
++        //    | 8 bytes |
++        //    | (long)  |
++        //    -----------
++        //    |         |
++        //   (low)
++        //
++        // We should load the low-8 bytes.
++        //
++        if (sig_bt[i] == T_LONG)
++          __ ld(r, saved_sp, ld_off - 8);
++      } else {
++        __ lw(r, saved_sp, ld_off);
++      }
++    } else if (r_1->is_FloatRegister()) { // Float Register
++      assert(sig_bt[i] == T_FLOAT || sig_bt[i] == T_DOUBLE, "Must be a float register");
++
++      FloatRegister fr = r_1->as_FloatRegister();
++      if (sig_bt[i] == T_FLOAT)
++          __ lwc1(fr, saved_sp, ld_off);
++      else {
++          __ ldc1(fr, saved_sp, ld_off);
++          __ ldc1(fr, saved_sp, ld_off - 8);
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ sd(Rmethod, thread, in_bytes(JavaThread::callee_target_offset()));
++
++  // move methodOop to V0 in case we end up in an c2i adapter.
++  // the c2i adapters expect methodOop in V0 (c2) because c2's
++  // resolve stubs return the result (the method) in V0.
++  // I'd love to fix this.
++  __ move(V0, Rmethod);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know G5 holds the methodOop.  The
++  // args start out packed in the compiled layout.  They need to be unpacked
++  // into the interpreter layout.  This will almost always require some stack
++  // space.  We grow the current (compiled) stack, then repack the args.  We
++  // finally end in a jump to the generic interpreter entry point.  On exit
++  // from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not FP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  {
++    Register holder = T1;
++    Register receiver = T0;
++    Register temp = T8;
++    address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++    Label missed;
++
++    //add for compressedoops
++    __ load_klass(temp, receiver);
++
++    __ ld_ptr(AT, holder, CompiledICHolder::holder_klass_offset());
++    __ ld_ptr(Rmethod, holder, CompiledICHolder::holder_metadata_offset());
++    __ bne(AT, temp, missed);
++    __ delayed()->nop();
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ ld_ptr(AT, Rmethod, in_bytes(Method::code_offset()));
++    __ beq(AT, R0, skip_fixup);
++    __ delayed()->nop();
++    __ bind(missed);
++
++    __ jmp(ic_miss, relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return  AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {
++  assert(regs2 == NULL, "not needed on MIPS");
++  // Return the number of VMReg stack_slots needed for the args.
++  // This value does not include an abi space (like register window
++  // save area).
++
++  // We return the amount of VMReg stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots. Since we always
++  // have space for storing at least 6 registers to memory we start with that.
++  // See int_stk_helper for a further discussion.
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    A0, A1, A2, A3, A4, A5, A6, A7
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    F12, F13, F14, F15, F16, F17, F18, F19
++  };
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++// Example:
++//    n   java.lang.UNIXProcess::forkAndExec
++//     private native int forkAndExec(byte[] prog,
++//                                    byte[] argBlock, int argc,
++//                                    byte[] envBlock, int envc,
++//                                    byte[] dir,
++//                                    boolean redirectErrorStream,
++//                                    FileDescriptor stdin_fd,
++//                                    FileDescriptor stdout_fd,
++//                                    FileDescriptor stderr_fd)
++// JNIEXPORT jint JNICALL
++// Java_java_lang_UNIXProcess_forkAndExec(JNIEnv *env,
++//                                        jobject process,
++//                                        jbyteArray prog,
++//                                        jbyteArray argBlock, jint argc,
++//                                        jbyteArray envBlock, jint envc,
++//                                        jbyteArray dir,
++//                                        jboolean redirectErrorStream,
++//                                        jobject stdin_fd,
++//                                        jobject stdout_fd,
++//                                        jobject stderr_fd)
++//
++// ::c_calling_convention
++//  0:      // env                 <--       a0
++//  1: L    // klass/obj           <-- t0 => a1
++//  2: [    // prog[]              <-- a0 => a2
++//  3: [    // argBlock[]          <-- a1 => a3
++//  4: I    // argc                <-- a2 => a4
++//  5: [    // envBlock[]          <-- a3 => a5
++//  6: I    // envc                <-- a4 => a5
++//  7: [    // dir[]               <-- a5 => a7
++//  8: Z    // redirectErrorStream <-- a6 => sp[0]
++//  9: L    // stdin               fp[16] => sp[8]
++// 10: L    // stdout              fp[24] => sp[16]
++// 11: L    // stderr              fp[32] => sp[24]
++//
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_VOID: // Halves of longs and doubles
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++      if (args < Argument::n_register_parameters) {
++        regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert(sig_bt[i + 1] == T_VOID, "expecting half");
++      if (args < Argument::n_float_register_parameters) {
++        regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return round_to(stk_args, 2);
++}
++
++// ---------------------------------------------------------------------------
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ swc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ sdc1(FSF, FP, -wordSize );
++      break;
++    case T_VOID:  break;
++    case T_LONG:
++      __ sd(V0, FP, -wordSize);
++      break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ sd(V0, FP, -wordSize);
++      break;
++    default: {
++      __ sw(V0, FP, -wordSize);
++      }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++      __ lwc1(FSF, FP, -wordSize);
++      break;
++    case T_DOUBLE:
++      __ ldc1(FSF, FP, -wordSize );
++      break;
++    case T_LONG:
++      __ ld(V0, FP, -wordSize);
++      break;
++    case T_VOID:  break;
++    case T_OBJECT:
++    case T_ARRAY:
++      __ ld(V0, FP, -wordSize);
++      break;
++    default: {
++      __ lw(V0, FP, -wordSize);
++      }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ push(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++    } else if (args[i].first()->is_FloatRegister()) {
++      __ pop(args[i].first()->as_FloatRegister());
++    }
++  }
++}
++
++// A simple move of integer like type
++static void simple_move32(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      // stack to reg
++      __ lw(dst.first()->as_Register(),  FP, reg2offset_in(src.first()));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ sd(src.first()->as_Register(), SP, reg2offset_out(dst.first()));
++  } else {
++    if (dst.first() != src.first()){
++      __ move(dst.first()->as_Register(), src.first()->as_Register()); // fujie error:dst.first()
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  //FIXME, for mips, dst can be register
++  if (src.first()->is_stack()) {
++    // Oop is already on the stack as an argument
++    Register rHandle = V0;
++    Label nil;
++    __ xorr(rHandle, rHandle, rHandle);
++    __ ld(AT, FP, reg2offset_in(src.first()));
++    __ beq(AT, R0, nil);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(FP, reg2offset_in(src.first())));
++    __ bind(nil);
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move( (dst.first())->as_Register(), rHandle);
++    //if dst is register
++    //FIXME, do mips need out preserve stack slots?
++    int offset_in_older_frame = src.first()->reg2stack()
++      + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame
++          + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++  } else {
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles
++    const Register rOop = src.first()->as_Register();
++    assert( (rOop->encoding() >= A0->encoding()) && (rOop->encoding() <= T0->encoding()),"wrong register");
++    const Register rHandle = V0;
++    //Important: refer to java_calling_convertion
++    int oop_slot = (rOop->encoding() - A0->encoding()) * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++    Label skip;
++    __ sd( rOop , SP, offset );
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    __ xorr( rHandle, rHandle, rHandle);
++    __ beq(rOop, R0, skip);
++    __ delayed()->nop();
++    __ lea(rHandle, Address(SP, offset));
++    __ bind(skip);
++    // Store the handle parameter
++    if(dst.first()->is_stack())__ sd( rHandle, SP, reg2offset_out(dst.first()));
++    else                       __ move((dst.first())->as_Register(), rHandle);
++    //if dst is register
++
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ lw(AT, FP, reg2offset_in(src.first()));
++      __ sw(AT, SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ lwc1(dst.first()->as_FloatRegister(), FP, reg2offset_in(src.first()));
++  } else {
++    // reg to stack
++    if(dst.first()->is_stack())
++      __ swc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    else
++      __ mov_s(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibility for a long_move VMRegPair is:
++  // 1: two stack slots (possibly unaligned)
++  // as neither the java  or C calling convention will use registers
++  // for longs.
++
++  if (src.first()->is_stack()) {
++    assert(src.second()->is_stack() && dst.second()->is_stack(), "must be all stack");
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ld( (dst.first())->as_Register() , FP, reg2offset_in(src.first()));
++    }
++  } else {
++    if( dst.first()->is_stack()){
++      __ sd( (src.first())->as_Register(), SP, reg2offset_out(dst.first()));
++    } else {
++      __ move( (dst.first())->as_Register() , (src.first())->as_Register());
++    }
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst) {
++
++  // The only legal possibilities for a double_move VMRegPair are:
++  // The painful thing here is that like long_move a VMRegPair might be
++
++  // Because of the calling convention we know that src is either
++  //   1: a single physical register (xmm registers only)
++  //   2: two stack slots (possibly unaligned)
++  // dst can only be a pair of stack slots.
++
++
++  if (src.first()->is_stack()) {
++    // source is all stack
++    if( dst.first()->is_stack()){
++      __ ld(AT, FP, reg2offset_in(src.first()));
++      __ sd(AT, SP, reg2offset_out(dst.first()));
++    } else {
++      __ ldc1( (dst.first())->as_FloatRegister(), FP, reg2offset_in(src.first()));
++    }
++
++  } else {
++    // reg to stack
++    // No worries about stack alignment
++    if( dst.first()->is_stack()){
++      __ sdc1(src.first()->as_FloatRegister(), SP, reg2offset_out(dst.first()));
++    }
++    else
++      __ mov_d( dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++
++  }
++}
++
++static void verify_oop_args(MacroAssembler* masm,
++                            methodHandle method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {
++  Register temp_reg = T9;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ld(temp_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size + wordSize));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 methodHandle method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = S3;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ld(member_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = SSR;  // known to be free at this point
++      __ ld(receiver_reg, Address(SP, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++nmethod *SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    // Make enough room for patch_verified_entry
++    __ nop();
++    __ nop();
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // Native nmethod wrappers never take possesion of the oop arguments.
++  // So the caller will gc the arguments. The only thing we need an
++  // oopMap for is if the call is static
++  //
++  // An OopMap for lock (and class if static), and one for the VM call itself
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++  //
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // registers. We must create space for them here that is disjoint from
++  // the windowed save area because we have no control over when we might
++  // flush the window again and overwrite values that gc has since modified.
++  // (The live window race)
++  //
++  // We always just allocate 6 word for storing down these object. This allow
++  // us to simply record the base and use the Ireg number to decide which
++  // slot to use. (Note that the reg number is the inbound number not the
++  // outbound number).
++  // We must shuffle args to match the native convention, and include var-args space.
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 9 * VMRegImpl::slots_per_word;  // 9 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = round_to(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place to save return value or as a temporary for any gpr -> fpr moves
++  // + 2 for return address (which we own) and saved fp
++  stack_slots += 2 + 9 * VMRegImpl::slots_per_word;  // (T0, A0, A1, A2, A3, A4, A5, A6, A7)
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      | vararg area         |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++
++
++  // First thing make an ic check to see if we should even be here
++  address ic_miss = SharedRuntime::get_ic_miss_stub();
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except fp. fp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++  //refer to register_mips.hpp:IC_Klass
++  const Register ic_reg = T1;
++  const Register receiver = T0;
++
++  Label hit;
++  Label exception_pending;
++
++  __ verify_oop(receiver);
++  //add for compressedoops
++  __ load_klass(T9, receiver);
++  __ beq(T9, ic_reg, hit);
++  __ delayed()->nop();
++  __ jmp(ic_miss, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check
++  if (UseStackBanging) {
++    __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
++  }
++
++  // Generate a new frame for the wrapper.
++  // do mips need this ?
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ st_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++
++  __ enter();
++  // -2 because return address is already present and so is saved fp
++  __ addiu(SP, SP, -1 * (stack_size - 2*wordSize));
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++  // Calculate the difference between sp and fp. We need to know it
++  // after the native call because on windows Java Natives will pop
++  // the arguments and it is painful to do sp relative addressing
++  // in a platform independent way. So after the call we switch to
++  // fp relative addressing.
++  //FIXME actually , the fp_adjustment may not be the right, because andr(sp, sp, at) may change
++  //the SP
++  int fp_adjustment = stack_size - 2*wordSize;
++
++#ifdef COMPILER2
++  // C2 may leave the stack dirty if not in SSE2+ mode
++  __ empty_FPU_stack();
++#endif
++
++  // Compute the fp offset for any slots used after the jni call
++
++  int lock_slot_fp_offset = (lock_slot_offset*VMRegImpl::stack_slot_size) - fp_adjustment;
++  // We use TREG as a thread pointer because it is callee save and
++  // if we load it once it is usable thru the entire wrapper
++  const Register thread = TREG;
++
++  // We use S4 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = S4;
++  if (is_critical_native) {
++    Unimplemented();
++    // check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++    //                                   oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmpi, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++  //
++  // Natives require 1 or 2 extra arguments over the normal ones: the JNIEnv*
++  // and, if static, the class mirror instead of a receiver.  This pretty much
++  // guarantees that register layout will not match (and mips doesn't use reg
++  // parms though amd does).  Since the native abi doesn't use register args
++  // and the java conventions does we don't have to worry about collisions.
++  // All of our moved are reg->stack or stack->stack.
++  // We ignore the extra arguments during the shuffle and handle them at the
++  // last moment. The shuffle is described by the two calling convention
++  // vectors we have in our possession. We simply walk the java vector to
++  // get the source locations and the c vector to get the destinations.
++
++  int c_arg = method->is_static() ? 2 : 1 ;
++
++  // Record sp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of fp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(fp));
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(T8->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    Unimplemented();
++    // ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ move(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++          Unimplemented();
++          // unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        simple_move32(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  // point c_arg at the first arg that is already loaded in case we
++  // need to spill before we call out
++  c_arg = total_c_args - total_in_args;
++  // Pre-load a static method's oop.  Used both by locking code and
++  // the normal JNI call code.
++
++  __ move(oop_handle_reg, A1);
++
++  if (method->is_static() && !is_critical_native) {
++
++    //  load opp into a register
++    int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local(
++          (method->method_holder())->java_mirror()));
++
++
++    RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    __ relocate(rspec);
++    __ patchable_set48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++    // Now handlize the static class mirror it's known not-null.
++    __ sd( oop_handle_reg, SP, klass_offset);
++    map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++    // Now get the handle
++    __ lea(oop_handle_reg, Address(SP, klass_offset));
++    // store the klass handle as second argument
++    __ move(A1, oop_handle_reg);
++    // and protect the arg if we must spill
++    c_arg--;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  intptr_t the_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map(the_pc - start, map);
++
++  __ set_last_Java_frame(SP, noreg, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)the_pc ;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    save_args(masm, total_c_args, c_arg, out_regs);
++    int metadata_index = __ oop_recorder()->find_index(method());
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      thread, AT);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // These are register definitions we need for locking/unlocking
++  const Register swap_reg = T8;  // Must use T8 for cmpxchg instruction
++  const Register obj_reg  = T9;  // Will contain the oop
++  //const Register lock_reg = T6;  // Address of compiler lock object (BasicLock)
++  const Register lock_reg = c_rarg0;  // Address of compiler lock object (BasicLock)
++
++
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  // Lock a synchronized method
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ move(oop_handle_reg, A1);
++
++    // Get address of the box
++    __ lea(lock_reg, Address(FP, lock_slot_fp_offset));
++
++    // Load the oop from the handle
++    __ ld(obj_reg, oop_handle_reg, 0);
++
++    if (UseBiasedLocking) {
++      // Note that oop_handle_reg is trashed during this call
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, A1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %T8
++    __ move(swap_reg, 1);
++
++    __ ld(AT, obj_reg, 0);
++    __ orr(swap_reg, swap_reg, AT);
++
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ cmpxchg(Address(obj_reg, 0), swap_reg, lock_reg, AT, true, false, lock_done);
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) sp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - sp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %T8 as the result of cmpxchg
++
++    __ dsubu(swap_reg, swap_reg, SP);
++    __ move(AT, 3 - os::vm_page_size());
++    __ andr(swap_reg , swap_reg, AT);
++    // Save the test result, for recursive case, the result is zero
++    __ sd(swap_reg, lock_reg, mark_word_offset);
++    __ bne(swap_reg, R0, slow_path_lock);
++    __ delayed()->nop();
++    // Slow path will re-enter here
++    __ bind(lock_done);
++
++    if (UseBiasedLocking) {
++      // Re-fetch oop_handle_reg as we trashed it above
++      __ move(A1, oop_handle_reg);
++    }
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ addiu(A0, thread, in_bytes(JavaThread::jni_environment_offset()));
++  }
++
++  // Example: Java_java_lang_ref_Finalizer_invokeFinalizeMethod(JNIEnv *env, jclass clazz, jobject ob)
++  // Load the second arguments into A1
++  //__ ld(A1, SP , wordSize );   // klass
++
++  // Now set thread in native
++  __ addiu(AT, R0, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++  // do the call
++  __ call(native_func, relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // WARNING - on Windows Java Natives use pascal calling convention and pop the
++  // arguments off of the stack. We could just re-adjust the stack pointer here
++  // and continue to do SP relative addressing but we instead switch to FP
++  // relative addressing.
++
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);            break;
++  case T_CHAR   : __ andi(V0, V0, 0xFFFF);      break;
++  case T_BYTE   : __ sign_extend_byte (V0); break;
++  case T_SHORT  : __ sign_extend_short(V0); break;
++  case T_INT    : // nothing to do         break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++  // Result is in st0 we'll save as needed
++  break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++  break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ addiu(AT, R0, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    //
++    save_native_result(masm, ret_type, stack_slots);
++    __ move(A0, thread);
++    __ addiu(SP, SP, -wordSize);
++    __ push(S2);
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    } else {
++      __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition), relocInfo::runtime_call_type);
++      __ delayed()->nop();
++    }
++    __ move(SP, S2);     // use S2 as a sender SP holder
++    __ pop(S2);
++    __ addiu(SP, SP, wordSize);
++    //add for compressedoops
++    __ reinit_heapbase();
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ beq(R0, R0, after_transition);
++      __ delayed()->nop();
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ addiu(AT, R0, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(AT,  thread, in_bytes(JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++  Label reguard;
++  Label reguard_done;
++  __ lw(AT, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++  __ addiu(AT, AT, -JavaThread::stack_guard_yellow_reserved_disabled);
++  __ beq(AT, R0, reguard);
++  __ delayed()->nop();
++  // slow path reguard  re-enters here
++  __ bind(reguard_done);
++
++  // Handle possible exception (will unlock if necessary)
++
++  // native result if any is live
++
++  // Unlock
++  Label slow_path_unlock;
++  Label unlock_done;
++  if (method->is_synchronized()) {
++
++    Label done;
++
++    // Get locked oop from the handle we passed to jni
++    __ ld( obj_reg, oop_handle_reg, 0);
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, T8, done);
++
++    }
++
++    // Simple recursive lock?
++
++    __ ld(AT, FP, lock_slot_fp_offset);
++    __ beq(AT, R0, done);
++    __ delayed()->nop();
++    // Must save FSF if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    //  get old displaced header
++    __ ld (T8, FP, lock_slot_fp_offset);
++    // get address of the stack lock
++    __ addiu(c_rarg0, FP, lock_slot_fp_offset);
++    // Atomic swap old header if oop still contains the stack lock
++    __ cmpxchg(Address(obj_reg, 0), c_rarg0, T8, AT, false, false, unlock_done, &slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip_if(masm, &DTraceMethodProbes, 0);
++    // Tell dtrace about this method exit
++    save_native_result(masm, ret_type, stack_slots);
++    int metadata_index = __ oop_recorder()->find_index( (method()));
++    RelocationHolder rspec = metadata_Relocation::spec(metadata_index);
++    __ relocate(rspec);
++    __ patchable_set48(AT, (long)(method()));
++
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         thread, AT);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  // We can finally stop using that last_Java_frame we setup ages ago
++
++  __ reset_last_Java_frame(false);
++
++  // Unpack oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(V0, thread, T9);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ld(AT, thread, in_bytes(JavaThread::active_handles_offset()));
++    __ sw(R0, AT, JNIHandleBlock::top_offset_in_bytes());
++  }
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, exception_pending);
++    __ delayed()->nop();
++  }
++  // no exception, we're almost done
++
++  // check that only result value is on FPU stack
++  __ verify_FPU(ret_type == T_FLOAT || ret_type == T_DOUBLE ? 1 : 0, "native_wrapper normal exit");
++
++  // Return
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  //__ ld_ptr(SP, TREG, in_bytes(JavaThread::last_Java_sp_offset()));
++  __ leave();
++
++  __ jr(RA);
++  __ delayed()->nop();
++  // Unexpected paths are out of line and go here
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    __ move(A0, obj_reg);
++    __ move(A1, lock_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, - 3*wordSize);
++
++    __ move(AT, -(StackAlignmentInBytes));
++    __ move(S2, SP);     // use S2 as a sender SP holder
++    __ andr(SP, SP, AT); // align stack as required by ABI
++
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++                __ move(SP, S2);
++    __ addiu(SP, SP, 3*wordSize);
++
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit from monitorenter");
++      __ bind(L);
++    }
++#endif
++    __ b(lock_done);
++    __ delayed()->nop();
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // Slow path unlock
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ push(AT);
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++                __ move(AT, -(StackAlignmentInBytes));
++                __ move(S2, SP);     // use S2 as a sender SP holder
++                __ andr(SP, SP, AT); // align stack as required by ABI
++
++    // should be a peal
++    // +wordSize because of the push above
++    __ addiu(A1, FP, lock_slot_fp_offset);
++
++    __ move(A0, obj_reg);
++    __ move(A2, thread);
++    __ addiu(SP, SP, -2*wordSize);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C),
++        relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ addiu(SP, SP, 2*wordSize);
++                __ move(SP, S2);
++    //add for compressedoops
++    __ reinit_heapbase();
++#ifdef ASSERT
++    {
++      Label L;
++      __ ld( AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ pop(AT);
++    __ sd(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ b(unlock_done);
++    __ delayed()->nop();
++    // END Slow path unlock
++
++  }
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  save_native_result(masm, ret_type, stack_slots);
++  __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages),
++      relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++  restore_native_result(masm, ret_type, stack_slots);
++  __ b(reguard_done);
++  __ delayed()->nop();
++
++  // BEGIN EXCEPTION PROCESSING
++  if (!is_critical_native) {
++    // Forward  the exception
++    __ bind(exception_pending);
++
++    // remove possible return value from FPU register stack
++    __ empty_FPU_stack();
++
++    // pop our frame
++    //forward_exception_entry need return address on stack
++    __ move(SP, FP);
++    __ pop(FP);
++
++    // and forward the exception
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++  }
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++
++  return nm;
++
++}
++
++#ifdef HAVE_DTRACE_H
++// ---------------------------------------------------------------------------
++// Generate a dtrace nmethod for a given signature.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// abi and then leaves nops at the position you would expect to call a native
++// function. When the probe is enabled the nops are replaced with a trap
++// instruction that dtrace inserts and the trace will cause a notification
++// to dtrace.
++//
++// The probes are only able to take primitive types and java/lang/String as
++// arguments.  No other java types are allowed. Strings are converted to utf8
++// strings so that from dtrace point of view java strings are converted to C
++// strings. There is an arbitrary fixed limit on the total space that a method
++// can use for converting the strings. (256 chars per string in the signature).
++// So any java string larger then this is truncated.
++
++static int  fp_offset[ConcreteRegisterImpl::number_of_registers] = { 0 };
++static bool offsets_initialized = false;
++
++static VMRegPair reg64_to_VMRegPair(Register r) {
++  VMRegPair ret;
++  if (wordSize == 8) {
++    ret.set2(r->as_VMReg());
++  } else {
++    ret.set_pair(r->successor()->as_VMReg(), r->as_VMReg());
++  }
++  return ret;
++}
++
++
++nmethod *SharedRuntime::generate_dtrace_nmethod(MacroAssembler *masm,
++                                                methodHandle method) {
++
++
++  // generate_dtrace_nmethod is guarded by a mutex so we are sure to
++  // be single threaded in this method.
++  assert(AdapterHandlerLibrary_lock->owned_by_self(), "must be");
++
++  // Fill in the signature array, for the calling-convention call.
++  int total_args_passed = method->size_of_parameters();
++
++  BasicType* in_sig_bt  = NEW_RESOURCE_ARRAY(BasicType, total_args_passed);
++  VMRegPair  *in_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed);
++
++  // The signature we are going to use for the trap that dtrace will see
++  // java/lang/String is converted. We drop "this" and any other object
++  // is converted to NULL.  (A one-slot java/lang/Long object reference
++  // is converted to a two-slot long, which is why we double the allocation).
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_args_passed * 2);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_args_passed * 2);
++
++  int i=0;
++  int total_strings = 0;
++  int first_arg_to_pass = 0;
++  int total_c_args = 0;
++
++  // Skip the receiver as dtrace doesn't want to see it
++  if( !method->is_static() ) {
++    in_sig_bt[i++] = T_OBJECT;
++    first_arg_to_pass = 1;
++  }
++
++  SignatureStream ss(method->signature());
++  for ( ; !ss.at_return_type(); ss.next()) {
++    BasicType bt = ss.type();
++    in_sig_bt[i++] = bt;  // Collect remaining bits of signature
++    out_sig_bt[total_c_args++] = bt;
++    if( bt == T_OBJECT) {
++      symbolOop s = ss.as_symbol_or_null();
++      if (s == vmSymbols::java_lang_String()) {
++        total_strings++;
++        out_sig_bt[total_c_args-1] = T_ADDRESS;
++      } else if (s == vmSymbols::java_lang_Boolean() ||
++                 s == vmSymbols::java_lang_Byte()) {
++        out_sig_bt[total_c_args-1] = T_BYTE;
++      } else if (s == vmSymbols::java_lang_Character() ||
++                 s == vmSymbols::java_lang_Short()) {
++        out_sig_bt[total_c_args-1] = T_SHORT;
++      } else if (s == vmSymbols::java_lang_Integer() ||
++                 s == vmSymbols::java_lang_Float()) {
++        out_sig_bt[total_c_args-1] = T_INT;
++      } else if (s == vmSymbols::java_lang_Long() ||
++                 s == vmSymbols::java_lang_Double()) {
++        out_sig_bt[total_c_args-1] = T_LONG;
++        out_sig_bt[total_c_args++] = T_VOID;
++      }
++    } else if ( bt == T_LONG || bt == T_DOUBLE ) {
++      in_sig_bt[i++] = T_VOID;   // Longs & doubles take 2 Java slots
++      // We convert double to long
++      out_sig_bt[total_c_args-1] = T_LONG;
++      out_sig_bt[total_c_args++] = T_VOID;
++    } else if ( bt == T_FLOAT) {
++      // We convert float to int
++      out_sig_bt[total_c_args-1] = T_INT;
++    }
++  }
++
++  assert(i==total_args_passed, "validly parsed signature");
++
++  // Now get the compiled-Java layout as input arguments
++  int comp_args_on_stack;
++  comp_args_on_stack = SharedRuntime::java_calling_convention(
++      in_sig_bt, in_regs, total_args_passed, false);
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the a  native (non-jni) function would expect them. To figure out
++  // where they go we convert the java signature to a C signature and remove
++  // T_VOID for any long/double we might have received.
++
++
++  // Now figure out where the args must be stored and how much stack space
++  // they require (neglecting out_preserve_stack_slots but space for storing
++  // the 1st six register arguments). It's weird see int_stk_helper.
++
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Plus a temp for possible converion of float/double/long register args
++
++  int conversion_temp = stack_slots;
++  stack_slots += 2;
++
++
++  // Now space for the string(s) we must convert
++
++  int string_locs = stack_slots;
++  stack_slots += total_strings *
++                   (max_dtrace_string_size / VMRegImpl::stack_slot_size);
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | string[n]           |
++  //      |---------------------| <- string_locs[n]
++  //      | string[n-1]         |
++  //      |---------------------| <- string_locs[n-1]
++  //      | ...                 |
++  //      | ...                 |
++  //      |---------------------| <- string_locs[1]
++  //      | string[0]           |
++  //      |---------------------| <- string_locs[0]
++  //      | temp                |
++  //      |---------------------| <- conversion_temp
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = round_to(stack_slots, 4 * VMRegImpl::slots_per_word);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  intptr_t start = (intptr_t)__ pc();
++
++  // First thing make an ic check to see if we should even be here
++
++  {
++    Label L;
++    const Register temp_reg = G3_scratch;
++    Address ic_miss(temp_reg, SharedRuntime::get_ic_miss_stub());
++    __ verify_oop(O0);
++    __ ld_ptr(O0, oopDesc::klass_offset_in_bytes(), temp_reg);
++    __ cmp(temp_reg, G5_inline_cache_reg);
++    __ brx(Assembler::equal, true, Assembler::pt, L);
++    __ delayed()->nop();
++
++    __ jump_to(ic_miss, 0);
++    __ delayed()->nop();
++    __ align(CodeEntryAlignment);
++    __ bind(L);
++  }
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++  // Make enough room for patch_verified_entry
++  __ nop();
++  __ nop();
++
++  // Generate stack overflow check before creating frame
++  __ generate_stack_overflow_check(stack_size);
++
++  // Generate a new frame for the wrapper.
++  __ save(SP, -stack_size, SP);
++
++  // Frame is now completed as far a size and linkage.
++
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  VMRegPair zero;
++  const Register g0 = G0; // without this we get a compiler warning (why??)
++  zero.set2(g0->as_VMReg());
++
++  int c_arg, j_arg;
++
++  Register conversion_off = noreg;
++
++  for (j_arg = first_arg_to_pass, c_arg = 0 ;
++       j_arg < total_args_passed ; j_arg++, c_arg++ ) {
++
++    VMRegPair src = in_regs[j_arg];
++    VMRegPair dst = out_regs[c_arg];
++
++#ifdef ASSERT
++    if (src.first()->is_Register()) {
++      assert(!reg_destroyed[src.first()->as_Register()->encoding()], "ack!");
++    } else if (src.first()->is_FloatRegister()) {
++      assert(!freg_destroyed[src.first()->as_FloatRegister()->encoding(
++                                               FloatRegisterImpl::S)], "ack!");
++    }
++    if (dst.first()->is_Register()) {
++      reg_destroyed[dst.first()->as_Register()->encoding()] = true;
++    } else if (dst.first()->is_FloatRegister()) {
++      freg_destroyed[dst.first()->as_FloatRegister()->encoding(
++                                                 FloatRegisterImpl::S)] = true;
++    }
++#endif /* ASSERT */
++
++    switch (in_sig_bt[j_arg]) {
++      case T_ARRAY:
++      case T_OBJECT:
++        {
++          if (out_sig_bt[c_arg] == T_BYTE  || out_sig_bt[c_arg] == T_SHORT ||
++              out_sig_bt[c_arg] == T_INT || out_sig_bt[c_arg] == T_LONG) {
++            // need to unbox a one-slot value
++            Register in_reg = L0;
++            Register tmp = L2;
++            if ( src.first()->is_reg() ) {
++              in_reg = src.first()->as_Register();
++            } else {
++              assert(Assembler::is_simm13(reg2offset(src.first()) + STACK_BIAS),
++                     "must be");
++              __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, in_reg);
++            }
++            // If the final destination is an acceptable register
++            if ( dst.first()->is_reg() ) {
++              if ( dst.is_single_phys_reg() || out_sig_bt[c_arg] != T_LONG ) {
++                tmp = dst.first()->as_Register();
++              }
++            }
++
++            Label skipUnbox;
++            if ( wordSize == 4 && out_sig_bt[c_arg] == T_LONG ) {
++              __ mov(G0, tmp->successor());
++            }
++            __ br_null(in_reg, true, Assembler::pn, skipUnbox);
++            __ delayed()->mov(G0, tmp);
++
++            BasicType bt = out_sig_bt[c_arg];
++            int box_offset = java_lang_boxing_object::value_offset_in_bytes(bt);
++            switch (bt) {
++                case T_BYTE:
++                  __ ldub(in_reg, box_offset, tmp); break;
++                case T_SHORT:
++                  __ lduh(in_reg, box_offset, tmp); break;
++                case T_INT:
++                  __ ld(in_reg, box_offset, tmp); break;
++                case T_LONG:
++                  __ ld_long(in_reg, box_offset, tmp); break;
++                default: ShouldNotReachHere();
++            }
++
++            __ bind(skipUnbox);
++            // If tmp wasn't final destination copy to final destination
++            if (tmp == L2) {
++              VMRegPair tmp_as_VM = reg64_to_VMRegPair(L2);
++              if (out_sig_bt[c_arg] == T_LONG) {
++                long_move(masm, tmp_as_VM, dst);
++              } else {
++                move32_64(masm, tmp_as_VM, out_regs[c_arg]);
++              }
++            }
++            if (out_sig_bt[c_arg] == T_LONG) {
++              assert(out_sig_bt[c_arg+1] == T_VOID, "must be");
++              ++c_arg; // move over the T_VOID to keep the loop indices in sync
++            }
++          } else if (out_sig_bt[c_arg] == T_ADDRESS) {
++            Register s =
++                src.first()->is_reg() ? src.first()->as_Register() : L2;
++            Register d =
++                dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++            // We store the oop now so that the conversion pass can reach
++            // while in the inner frame. This will be the only store if
++            // the oop is NULL.
++            if (s != L2) {
++              // src is register
++              if (d != L2) {
++                // dst is register
++                __ mov(s, d);
++              } else {
++                assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                          STACK_BIAS), "must be");
++                __ st_ptr(s, SP, reg2offset(dst.first()) + STACK_BIAS);
++              }
++            } else {
++                // src not a register
++                assert(Assembler::is_simm13(reg2offset(src.first()) +
++                           STACK_BIAS), "must be");
++                __ ld_ptr(FP, reg2offset(src.first()) + STACK_BIAS, d);
++                if (d == L2) {
++                  assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                             STACK_BIAS), "must be");
++                  __ st_ptr(d, SP, reg2offset(dst.first()) + STACK_BIAS);
++                }
++            }
++          } else if (out_sig_bt[c_arg] != T_VOID) {
++            // Convert the arg to NULL
++            if (dst.first()->is_reg()) {
++              __ mov(G0, dst.first()->as_Register());
++            } else {
++              assert(Assembler::is_simm13(reg2offset(dst.first()) +
++                         STACK_BIAS), "must be");
++              __ st_ptr(G0, SP, reg2offset(dst.first()) + STACK_BIAS);
++            }
++          }
++        }
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          move32_64(masm, src, dst);
++        } else {
++          if (dst.first()->is_reg()) {
++            // freg -> reg
++            int off =
++              STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++            Register d = dst.first()->as_Register();
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++              __ ld(SP, off, d);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++              __ ld(SP, conversion_off , d);
++            }
++          } else {
++            // freg -> mem
++            int off = STACK_BIAS + reg2offset(dst.first());
++            if (Assembler::is_simm13(off)) {
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, off);
++            } else {
++              if (conversion_off == noreg) {
++                __ set(off, L6);
++                conversion_off = L6;
++              }
++              __ stf(FloatRegisterImpl::S, src.first()->as_FloatRegister(),
++                     SP, conversion_off);
++            }
++          }
++        }
++        break;
++
++      case T_DOUBLE:
++        assert( j_arg + 1 < total_args_passed &&
++                in_sig_bt[j_arg + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        if (src.first()->is_stack()) {
++          // Stack to stack/reg is simple
++          long_move(masm, src, dst);
++        } else {
++          Register d = dst.first()->is_reg() ? dst.first()->as_Register() : L2;
++
++          // Destination could be an odd reg on 32bit in which case
++          // we can't load direct to the destination.
++
++          if (!d->is_even() && wordSize == 4) {
++            d = L2;
++          }
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, off);
++            __ ld_long(SP, off, d);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stf(FloatRegisterImpl::D, src.first()->as_FloatRegister(),
++                   SP, conversion_off);
++            __ ld_long(SP, conversion_off, d);
++          }
++          if (d == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        }
++        break;
++
++      case T_LONG :
++        // 32bit can't do a split move of something like g1 -> O0, O1
++        // so use a memory temp
++        if (src.is_single_phys_reg() && wordSize == 4) {
++          Register tmp = L2;
++          if (dst.first()->is_reg() &&
++              (wordSize == 8 || dst.first()->as_Register()->is_even())) {
++            tmp = dst.first()->as_Register();
++          }
++
++          int off = STACK_BIAS + conversion_temp * VMRegImpl::stack_slot_size;
++          if (Assembler::is_simm13(off)) {
++            __ stx(src.first()->as_Register(), SP, off);
++            __ ld_long(SP, off, tmp);
++          } else {
++            if (conversion_off == noreg) {
++              __ set(off, L6);
++              conversion_off = L6;
++            }
++            __ stx(src.first()->as_Register(), SP, conversion_off);
++            __ ld_long(SP, conversion_off, tmp);
++          }
++
++          if (tmp == L2) {
++            long_move(masm, reg64_to_VMRegPair(L2), dst);
++          }
++        } else {
++          long_move(masm, src, dst);
++        }
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, src, dst);
++    }
++  }
++
++
++  // If we have any strings we must store any register based arg to the stack
++  // This includes any still live xmm registers too.
++
++  if (total_strings > 0 ) {
++
++    // protect all the arg registers
++    __ save_frame(0);
++    __ mov(G2_thread, L7_thread_cache);
++    const Register L2_string_off = L2;
++
++    // Get first string offset
++    __ set(string_locs * VMRegImpl::stack_slot_size, L2_string_off);
++
++    for (c_arg = 0 ; c_arg < total_c_args ; c_arg++ ) {
++      if (out_sig_bt[c_arg] == T_ADDRESS) {
++
++        VMRegPair dst = out_regs[c_arg];
++        const Register d = dst.first()->is_reg() ?
++            dst.first()->as_Register()->after_save() : noreg;
++
++        // It's a string the oop and it was already copied to the out arg
++        // position
++        if (d != noreg) {
++          __ mov(d, O0);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ ld_ptr(FP,  reg2offset(dst.first()) + STACK_BIAS, O0);
++        }
++        Label skip;
++
++        __ br_null(O0, false, Assembler::pn, skip);
++        __ delayed()->addu(FP, L2_string_off, O1);
++
++        if (d != noreg) {
++          __ mov(O1, d);
++        } else {
++          assert(Assembler::is_simm13(reg2offset(dst.first()) + STACK_BIAS),
++                 "must be");
++          __ st_ptr(O1, FP,  reg2offset(dst.first()) + STACK_BIAS);
++        }
++
++        __ call(CAST_FROM_FN_PTR(address, SharedRuntime::get_utf),
++                relocInfo::runtime_call_type);
++        __ delayed()->addu(L2_string_off, max_dtrace_string_size, L2_string_off);
++
++        __ bind(skip);
++
++      }
++
++    }
++    __ mov(L7_thread_cache, G2_thread);
++    __ restore();
++
++  }
++
++
++  // Ok now we are done. Need to place the nop that dtrace wants in order to
++  // patch in the trap
++
++  int patch_offset = ((intptr_t)__ pc()) - start;
++
++  __ nop();
++
++
++  // Return
++
++  __ ret();
++  __ delayed()->restore();
++
++  __ flush();
++
++  nmethod *nm = nmethod::new_dtrace_nmethod(
++      method, masm->code(), vep_offset, patch_offset, frame_complete,
++      stack_slots / VMRegImpl::slots_per_word);
++  return nm;
++
++}
++
++#endif // HAVE_DTRACE_H
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++// "Top of Stack" slots that may be unused by the calling convention but must
++// otherwise be preserved.
++// On Intel these are not necessary and the value can be zero.
++// On Sparc this describes the words reserved for storing a register window
++// when an interrupt occurs.
++uint SharedRuntime::out_preserve_stack_slots() {
++   return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_deopt_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  //CodeBuffer     buffer ("deopt_blob", 4000, 2048);
++  CodeBuffer     buffer ("deopt_blob", 8000, 2048);
++  MacroAssembler* masm  = new MacroAssembler( & buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  // Account for the extra args we place on the stack
++  // by the time we call fetch_unroll_info
++  const int additional_words = 2; // deopt kind, thread
++
++  OopMapSet *oop_maps = new OopMapSet();
++
++  address start = __ pc();
++  Label cont;
++  // we use S3 for DeOpt reason register
++  Register reason = S3;
++  // use S6 for thread register
++  Register thread = TREG;
++  // use S7 for fetch_unroll_info returned UnrollBlock
++  Register unroll = S7;
++  // Prolog for non exception case!
++  // Correct the return address we were given.
++  //FIXME, return address is on the tos or Ra?
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  // Save everything in sight.
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  // Normal deoptimization
++  __ move(reason, Deoptimization::Unpack_deopt);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int reexecute_offset = __ pc() - start;
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++  __ move(reason, Deoptimization::Unpack_reexecute);
++  __ b(cont);
++  __ delayed()->nop();
++
++  int   exception_offset = __ pc() - start;
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for V0 and
++  // V1 which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  int exception_in_tls_offset = __ pc() - start;
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // Return address will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  // Save everything in sight.
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  __ addiu(RA, RA, - (NativeCall::return_address_offset_long));
++  (void) RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++  // store the correct deoptimization type
++  __ move(reason, Deoptimization::Unpack_exception);
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(V1, SP, RegisterSaver::raOffset() * wordSize); //save ra
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ld_ptr(AT, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ verify_oop(AT);
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, no_pending_exception);
++  __ delayed()->nop();
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++  __ bind(cont);
++  // Compiled code leaves the floating point stack dirty, empty it.
++  __ empty_FPU_stack();
++
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  __ move(A0, thread);
++  __ move(A1, reason); // exec_mode
++  __ addiu(SP, SP, -additional_words  * wordSize);
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  // Call fetch_unroll_info().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.  Call should capture return values.
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call((address)Deoptimization::fetch_unroll_info);
++  //__ call(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  oop_maps->add_gc_map(__ pc() - start, map);
++  __ addiu(SP, SP, additional_words * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  __ move(unroll, V0);
++
++
++  // Move the unpack kind to a safe place in the UnrollBlock because
++  // we are very short of registers
++
++  Address unpack_kind(unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++  __ sw(reason, unpack_kind);
++  // save the unpack_kind value
++  // Retrieve the possible live values (return values)
++  // All callee save registers representing jvm state
++  // are now in the vframeArray.
++
++  Label noException;
++  __ move(AT, Deoptimization::Unpack_exception);
++  __ bne(AT, reason, noException);// Was exception pending?
++  __ delayed()->nop();
++  __ ld_ptr(V0, thread, in_bytes(JavaThread::exception_oop_offset()));
++  __ ld_ptr(V1, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_pc_offset()));
++  __ st_ptr(R0, thread, in_bytes(JavaThread::exception_oop_offset()));
++
++  __ verify_oop(V0);
++
++  // Overwrite the result registers with the exception results.
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset()*wordSize);
++  __ st_ptr(V1, SP, RegisterSaver::v1Offset()*wordSize);
++
++  __ bind(noException);
++
++
++  // Stack is back to only having register save data on the stack.
++  // Now restore the result registers. Everything else is either dead or captured
++  // in the vframeArray.
++
++  RegisterSaver::restore_result_registers(masm);
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++  // Pop all the frames we must move/replace.
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // register for the sender's sp
++  Register sender_sp = Rsender;
++  // register for frame pcs
++  Register pcs = T0;
++  // register for frame sizes
++  Register sizes = T1;
++  // register for frame count
++  Register count = T3;
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ addu(SP, SP, AT);
++  // sp should be pointing at the return address to the caller (3)
++
++  // Load array of frame pcs into pcs
++  __ ld_ptr(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++  __ addiu(SP, SP, wordSize);  // trash the old pc
++  // Load array of frame sizes into T6
++  __ ld_ptr(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++
++
++
++  // Load count of frams into T3
++  __ lw(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll,  Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++   // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ subu(SP, SP, AT);
++
++  // Push interpreter frames in a loop
++  //
++  //Loop:
++  //   0x000000555bd82d18: lw t2, 0x0(t1)           ; lw sizes[i]  <--- error lw->ld
++  //   0x000000555bd82d1c: ld at, 0x0(t0)           ; ld pcs[i]
++  //   0x000000555bd82d20: daddiu t2, t2, 0xfffffff0 ; t2 -= 16
++  //   0x000000555bd82d24: daddiu sp, sp, 0xfffffff0
++  //   0x000000555bd82d28: sd fp, 0x0(sp)           ; push fp
++  //   0x000000555bd82d2c: sd at, 0x8(sp)           ; push at
++  //   0x000000555bd82d30: daddu fp, sp, zero        ; fp <- sp
++  //   0x000000555bd82d34: dsubu sp, sp, t2          ; sp -= t2
++  //   0x000000555bd82d38: sd zero, 0xfffffff0(fp)  ; __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  //   0x000000555bd82d3c: sd s4, 0xfffffff8(fp)    ; __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  //   0x000000555bd82d40: daddu s4, sp, zero        ; move(sender_sp, SP);
++  //   0x000000555bd82d44: daddiu t3, t3, 0xffffffff ; count --
++  //   0x000000555bd82d48: daddiu t1, t1, 0x4        ; sizes += 4
++  //   0x000000555bd82d4c: bne t3, zero, 0x000000555bd82d18
++  //   0x000000555bd82d50: daddiu t0, t0, 0x4        ; <--- error    t0 += 8
++  //
++  // pcs[0] = frame_pcs[0] = deopt_sender.raw_pc(); regex.split
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);    // Load frame size
++  __ ld_ptr(AT, pcs, 0);           // save return address
++  __ addiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ subu(SP, SP, T2);       // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);  // pass to next frame
++  __ addiu(count, count, -1);   // decrement counter
++  __ addiu(sizes, sizes, wordSize);   // Bump array pointer (sizes)
++  __ bne(count, R0, loop);
++  __ delayed()->addiu(pcs, pcs, wordSize);   // Bump array pointer (pcs)
++  __ ld(AT, pcs, 0);      // frame_pcs[number_of_frames] = Interpreter::deopt_entry(vtos, 0);
++  // Re-push self-frame
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);
++  __ addiu(SP, SP, -(frame_size_in_words - 2 - additional_words) * wordSize);
++
++  // Restore frame locals after moving the frame
++  __ sd(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  __ sd(V1, SP, RegisterSaver::v1Offset() * wordSize);
++  __ sdc1(F0, SP, RegisterSaver::fpResultOffset()* wordSize);// Pop float stack and store in local
++  __ sdc1(F1, SP, (RegisterSaver::fpResultOffset() + 1) * wordSize);
++
++
++  // Call unpack_frames().  Need thread and this frame, but NOT official VM entry - cannot block on
++  // this call, no GC can happen.
++  __ move(A1, reason);  // exec_mode
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(A0, thread);  // thread
++  __ addiu(SP, SP, (-additional_words) *wordSize);
++
++  // set last_Java_sp, last_Java_fp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++
++  __ call(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ld(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map(__ offset(), new OopMap( frame_size_in_words , 0));
++
++  __ push(V0);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ ld(V0, SP, (RegisterSaver::v0Offset() + additional_words + 1) * wordSize);
++  __ ld(V1, SP, (RegisterSaver::v1Offset() + additional_words + 1) * wordSize);
++  __ ldc1(F0, SP, (RegisterSaver::fpResultOffset() + additional_words + 1) * wordSize);// Pop float stack and store in local
++  __ ldc1(F1, SP, (RegisterSaver::fpResultOffset() + additional_words + 2) * wordSize);
++  //FIXME,
++  // Clear floating point stack before returning to interpreter
++  __ empty_FPU_stack();
++  //FIXME, we should consider about float and double
++  // Push a float or double return value if necessary.
++  __ leave();
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++
++  masm->flush();
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++}
++
++#ifdef COMPILER2
++
++//------------------------------generate_uncommon_trap_blob--------------------
++// Ought to generate an ideal graph & compile, but here's some SPARC ASM
++// instead.
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // allocate space for the code
++  ResourceMark rm;
++  // setup code generation tools
++  CodeBuffer  buffer ("uncommon_trap_blob", 512*80 , 512*40 );
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  enum frame_layout {
++    fp_off, fp_off2,
++    return_off, return_off2,
++    framesize
++  };
++  assert(framesize % 4 == 0, "sp not 16-byte aligned");
++
++  address start = __ pc();
++
++  // Push self-frame.
++  __ daddiu(SP, SP, -framesize * BytesPerInt);
++
++  __ sd(RA, SP, return_off * BytesPerInt);
++  __ sd(FP, SP, fp_off * BytesPerInt);
++
++  __ daddiu(FP, SP, fp_off * BytesPerInt);
++
++  // Clear the floating point exception stack
++  __ empty_FPU_stack();
++
++  Register thread = TREG;
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // set last_Java_sp
++  __ set_last_Java_frame(NOREG, FP, NULL);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    long save_pc = (long)__ pc() + 56;
++    __ patchable_set48(AT, (long)save_pc);
++    __ sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  __ move(A0, thread);
++  // argument already in T0
++  __ move(A1, T0);
++  __ addiu(A2, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::uncommon_trap);
++
++  // Set an oopmap for the call site
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map =  new OopMap( framesize, 0 );
++
++  //oop_maps->add_gc_map( __ offset(), true, map);
++  oop_maps->add_gc_map( __ offset(),  map);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock into S7
++  Register unroll = S7;
++  __ move(unroll, V0);
++
++#ifdef ASSERT
++  { Label L;
++    __ ld_ptr(AT, unroll, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes());
++    __ li(T9, Deoptimization::Unpack_uncommon_trap);
++    __ beq(AT, T9, L);
++    __ delayed()->nop();
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: possible-i2c-adapter-frame
++  // 4: caller of deopting frame (could be compiled/interpreted. If interpreted we will create an
++  //    and c2i here)
++
++  __ daddiu(SP, SP, framesize * BytesPerInt);
++
++  // Pop deoptimized frame
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes());
++  __ daddu(SP, SP, AT);
++
++  // register for frame pcs
++  Register pcs = T8;
++  // register for frame sizes
++  Register sizes = T9;
++  // register for frame count
++  Register count = T3;
++  // register for the sender's sp
++  Register sender_sp = T1;
++
++  // sp should be pointing at the return address to the caller (4)
++  // Load array of frame pcs
++  __ ld(pcs, unroll, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes());
++
++  // Load array of frame sizes
++  __ ld(sizes, unroll, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes());
++  __ lwu(count, unroll, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes());
++
++  // Pick up the initial fp we should save
++  __ ld(FP, unroll, Deoptimization::UnrollBlock::initial_info_offset_in_bytes());
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  __ move(sender_sp, SP);
++  __ lw(AT, unroll, Deoptimization::UnrollBlock::caller_adjustment_offset_in_bytes());
++  __ dsubu(SP, SP, AT);
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ld(T2, sizes, 0);          // Load frame size
++  __ ld(AT, pcs, 0);           // save return address
++  __ daddiu(T2, T2, -2*wordSize);           // we'll push pc and fp, by hand
++  __ push2(AT, FP);
++  __ move(FP, SP);
++  __ dsubu(SP, SP, T2);                   // Prolog!
++  // This value is corrected by layout_activation_impl
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(sender_sp, FP, frame::interpreter_frame_sender_sp_offset * wordSize);// Make it walkable
++  __ move(sender_sp, SP);       // pass to next frame
++  __ daddiu(count, count, -1);    // decrement counter
++  __ daddiu(sizes, sizes, wordSize);     // Bump array pointer (sizes)
++  __ addiu(pcs, pcs, wordSize);      // Bump array pointer (pcs)
++  __ bne(count, R0, loop);
++  __ delayed()->nop();      // Bump array pointer (pcs)
++
++  __ ld(RA, pcs, 0);
++
++  // Re-push self-frame
++  // save old & set new FP
++  // save final return address
++  __ enter();
++
++  // Use FP because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(NOREG, FP, the_pc);
++
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);   // Fix stack alignment as required by ABI
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  __ move(A0, thread);
++  __ addiu(A1, R0, Deoptimization::Unpack_uncommon_trap);
++  __ patchable_call((address)Deoptimization::unpack_frames);
++  // Set an oopmap for the call site
++  oop_maps->add_gc_map( __ offset(),  new OopMap( framesize, 0 ) );
++
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();     // Epilog!
++
++  // Jump to interpreter
++  __ jr(RA);
++  __ delayed()->nop();
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++
++  _uncommon_trap_blob = UncommonTrapBlob::create(&buffer, oop_maps, framesize / 2);
++}
++
++#endif // COMPILER2
++
++//------------------------------generate_handler_blob-------------------
++//
++// Generate a special Compile2Runtime blob that saves all registers, and sets
++// up an OopMap and calls safepoint code to stop the compiled code for
++// a safepoint.
++//
++// This blob is jumped to (via a breakpoint and the signal handler) from a
++// safepoint in compiled code.
++
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int pool_type) {
++
++  // Account for thread arg in our frame
++  const int additional_words = 0;
++  int frame_size_in_words;
++
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // allocate space for the code
++  // setup code generation tools
++  CodeBuffer  buffer ("handler_blob", 2048, 512);
++  MacroAssembler* masm = new MacroAssembler( &buffer);
++
++  const Register thread = TREG;
++  address start   = __ pc();
++  address call_pc = NULL;
++  bool cause_return = (pool_type == POLL_AT_RETURN);
++  bool save_vectors = (pool_type == POLL_AT_VECTOR_LOOP);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  map = RegisterSaver::save_live_registers(masm, additional_words, &frame_size_in_words, save_vectors);
++
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  // The following is basically a call_VM. However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselvs.
++
++  __ set_last_Java_frame(NOREG, NOREG, NULL);
++
++  if (!cause_return) {
++    // overwrite the return address pushed by save_live_registers
++    // Additionally, TSR is a callee-saved register so we can look at
++    // it later to determine if someone changed the return address for
++    // us!
++    __ ld_ptr(TSR, thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    __ st_ptr(TSR, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  // Do the call
++  __ move(A0, thread);
++  __ call(call_ptr);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++  oop_maps->add_gc_map(__ offset(),  map);
++
++  Label noException;
++
++  // Clear last_Java_sp again
++  __ reset_last_Java_frame(false);
++
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ beq(AT, R0, noException);
++  __ delayed()->nop();
++
++  // Exception pending
++
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++  __ patchable_jump((address)StubRoutines::forward_exception_entry());
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ ld_ptr(AT, SP, RegisterSaver::raOffset() * wordSize);
++    __ bne(AT, TSR, no_adjust);
++    __ delayed()->nop();
++
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    // See NativeInstruction::is_safepoint_poll()
++    __ lwu(AT, TSR, 0);
++    __ dsrl(AT, AT, 16);
++    __ andi(AT, AT, 0xfc1f);
++    __ xori(AT, AT, 0x8c01);
++    __ bne(AT, R0, bail);
++    __ delayed()->nop();
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++     __ addiu(RA, TSR, 4);    // NativeInstruction::instruction_size=4
++     __ st_ptr(RA, SP, RegisterSaver::raOffset() * wordSize);
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, register restoring and exit
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  //CodeBuffer buffer(name, 1000, 512);
++  CodeBuffer buffer(name, 2000, 2048);
++  MacroAssembler* masm  = new MacroAssembler(&buffer);
++
++  int frame_size_words;
++  //we put the thread in A0
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  int start = __ offset();
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_words);
++
++
++  int frame_complete = __ offset();
++
++#ifndef OPT_THREAD
++  const Register thread = T8;
++  __ get_thread(thread);
++#else
++  const Register thread = TREG;
++#endif
++
++  __ move(A0, thread);
++  __ set_last_Java_frame(noreg, FP, NULL);
++  //align the stack before invoke native
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ relocate(relocInfo::internal_pc_type);
++  {
++    intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 24 + 1 * BytesPerInstWord;
++    __ patchable_set48(AT, save_pc);
++  }
++  __ sd(AT, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++  __ call(destination);
++  __ delayed()->nop();
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  oop_maps->add_gc_map( __ offset() - start, map);
++  // V0 contains the address we are going to jump to assuming no exception got installed
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ ld_ptr(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++  // clear last_Java_sp
++  __ reset_last_Java_frame(true);
++  // check for pending exceptions
++  Label pending;
++  __ ld_ptr(AT, thread, in_bytes(Thread::pending_exception_offset()));
++  __ bne(AT, R0, pending);
++  __ delayed()->nop();
++  // get the returned Method*
++  //FIXME, do mips need this ?
++  __ get_vm_result_2(Rmethod, thread);  // Refer to OpenJDK8
++  __ st_ptr(Rmethod, SP, RegisterSaver::methodOffset() * wordSize);
++  __ st_ptr(V0, SP, RegisterSaver::v0Offset() * wordSize);
++  RegisterSaver::restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go the callee method.
++  __ jr(V0);
++  __ delayed()->nop();
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  RegisterSaver::restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++  //forward_exception_entry need return address on the stack
++  __ push(RA);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ st_ptr(R0, thread, in_bytes(JavaThread::vm_result_offset()));
++  __ ld_ptr(V0, thread, in_bytes(Thread::pending_exception_offset()));
++  __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++  //
++  // make sure all code is generated
++  masm->flush();
++
++  RuntimeStub* tmp= RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_words, oop_maps, true);
++  return tmp;
++}
++
++extern "C" int SpinPause() {return 0;}
++
++
++//------------------------------Montgomery multiplication------------------------
++//
++
++// Subtract 0:b from carry:a.  Return carry.
++static unsigned long
++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
++  long borrow = 0, t = 0;
++  unsigned long tmp0, tmp1;
++  __asm__ __volatile__ (
++    "0:                                            \n"
++    "ld      %[tmp0],     0(%[a])                  \n"
++    "ld      %[tmp1],     0(%[b])                  \n"
++    "sltu    %[t],        %[tmp0],     %[borrow]   \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[borrow]   \n"
++    "sltu    %[borrow],   %[tmp0],     %[tmp1]     \n"
++    "or      %[borrow],   %[borrow],   %[t]        \n"
++    "dsubu   %[tmp0],     %[tmp0],     %[tmp1]     \n"
++    "sd      %[tmp0],     0(%[a])                  \n"
++    "daddiu  %[a],        %[a],         8          \n"
++    "daddiu  %[b],        %[b],         8          \n"
++    "daddiu  %[len],      %[len],      -1          \n"
++    "bgtz    %[len],      0b                       \n"
++    "dsubu   %[tmp0],     %[carry],    %[borrow]   \n"
++    : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
++    : [carry]"r"(carry)
++    : "memory"
++  );
++  return tmp0;
++}
++
++// Multiply (unsigned) Long A by Long B, accumulating the double-
++// length result into the accumulator formed of t0, t1, and t2.
++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// As above, but add twice the double-length result into the
++// accumulator.
++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "dmultu  %[A],        %[B]                     \n"
++    "mfhi    %[hi]                                 \n"
++    "mflo    %[lo]                                 \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    "daddu   %[t0],       %[t0],       %[lo]       \n"
++    "sltu    %[carry],    %[t0],       %[lo]       \n"
++    "daddu   %[t1],       %[t1],       %[carry]    \n"
++    "sltu    %[t],        %[t1],       %[carry]    \n"
++    "daddu   %[t1],       %[t1],       %[hi]       \n"
++    "sltu    %[carry],    %[t1],       %[hi]       \n"
++    "or      %[carry],    %[carry],    %[t]        \n"
++    "daddu   %[t2],       %[t2],       %[carry]    \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// Fast Montgomery multiplication.  The derivation of the algorithm is
++// in  A Cryptographic Library for the Motorola DSP56000,
++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++
++static void __attribute__((noinline))
++montgomery_multiply(unsigned long a[], unsigned long b[], unsigned long n[],
++                    unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    for (j = 0; j < i; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    MACC(a[i], b[0], t0, t1, t2);
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery multiply");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int j;
++    for (j = i-len+1; j < len; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Fast Montgomery squaring.  This uses asymptotically 25% fewer
++// multiplies so it should be up to 25% faster than Montgomery
++// multiplication.  However, its loop control is more complex and it
++// may actually run slower on some machines.
++
++static void __attribute__((noinline))
++montgomery_square(unsigned long a[], unsigned long n[],
++                  unsigned long m[], unsigned long inv, int len) {
++  unsigned long t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == -1UL, "broken inverse in Montgomery multiply");
++
++  for (i = 0; i < len; i++) {
++    int j;
++    int end = (i+1)/2;
++    for (j = 0; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < i; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery square");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int start = i-len+1;
++    int end = start + (len - start)/2;
++    int j;
++    for (j = start; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < len; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Swap words in a longword.
++static unsigned long swap(unsigned long x) {
++  return (x << 32) | (x >> 32);
++}
++
++// Copy len longwords from s to d, word-swapping as we go.  The
++// destination array is reversed.
++static void reverse_words(unsigned long *s, unsigned long *d, int len) {
++  d += len;
++  while(len-- > 0) {
++    d--;
++    *d = swap(*s);
++    s++;
++  }
++}
++
++// The threshold at which squaring is advantageous was determined
++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
++// Doesn't seem to be relevant for MIPS64 so we use the same value.
++#define MONTGOMERY_SQUARING_THRESHOLD 64
++
++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
++                                        jint len, jlong inv,
++                                        jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 8k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 4;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *b = scratch + 1 * longwords,
++    *n = scratch + 2 * longwords,
++    *m = scratch + 3 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)b_ints, b, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  ::montgomery_multiply(a, b, n, m, (unsigned long)inv, longwords);
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
++
++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
++                                      jint len, jlong inv,
++                                      jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_square must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 6k bytes of stack space.
++  int total_allocation = longwords * sizeof (unsigned long) * 3;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (unsigned long *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *n = scratch + 1 * longwords,
++    *m = scratch + 2 * longwords;
++
++  reverse_words((unsigned long *)a_ints, a, longwords);
++  reverse_words((unsigned long *)n_ints, n, longwords);
++
++  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
++    ::montgomery_square(a, n, m, (unsigned long)inv, longwords);
++  } else {
++    ::montgomery_multiply(a, a, n, m, (unsigned long)inv, longwords);
++  }
++
++  reverse_words(m, (unsigned long *)m_ints, longwords);
++}
+diff --git a/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp
+new file mode 100644
+index 0000000000..9fe2bc8377
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubGenerator_mips_64.cpp
+@@ -0,0 +1,2162 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_mips.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++//#define a__ ((Assembler*)_masm)->
++
++//#ifdef PRODUCT
++//#define BLOCK_COMMENT(str) /* nothing */
++//#else
++//#define BLOCK_COMMENT(str) __ block_comment(str)
++//#endif
++
++//#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++
++  // ABI mips n64
++  // This fig is not MIPS ABI. It is call Java from C ABI.
++  // Call stubs are used to call Java from C
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [ ptr. to call wrapper ] <--- a0 (old sp -->)fp
++  //  3 [ result               ] <--- a1
++  //  4 [ result_type          ] <--- a2
++  //  5 [ method               ] <--- a3
++  //  6 [ entry_point          ] <--- a4
++  //  7 [ parameters           ] <--- a5
++  //  8 [ parameter_size       ] <--- a6
++  //  9 [ thread               ] <--- a7
++
++  //
++  //  n64 does not save paras in sp.
++  //
++  //    [ return_from_Java     ]
++  //    [ argument word n-1    ] <--- sp
++  //      ...
++  //    [ argument word 0      ]
++  //      ...
++  //-13 [ thread               ]
++  //-12 [ result_type          ] <--- a2
++  //-11 [ result               ] <--- a1
++  //-10 [                      ]
++  // -9 [ ptr. to call wrapper ] <--- a0
++  // -8 [ S6                   ]
++  // -7 [ S5                   ]
++  // -6 [ S4                   ]
++  // -5 [ S3                   ]
++  // -4 [ S1                   ]
++  // -3 [ TSR(S2)              ]
++  // -2 [ LVP(S7)              ]
++  // -1 [ BCP(S1)              ]
++  //  0 [ saved fp             ] <--- fp_after_call
++  //  1 [ return address       ]
++  //  2 [                      ] <--- old sp
++  //
++  // Find a right place in the call_stub for GP.
++  // GP will point to the starting point of Interpreter::dispatch_table(itos).
++  // It should be saved/restored before/after Java calls.
++  //
++  enum call_stub_layout {
++    RA_off             = 1,
++    FP_off             = 0,
++    BCP_off            = -1,
++    LVP_off            = -2,
++    TSR_off            = -3,
++    S1_off             = -4,
++    S3_off             = -5,
++    S4_off             = -6,
++    S5_off             = -7,
++    S6_off             = -8,
++    call_wrapper_off   = -9,
++    result_off         = -11,
++    result_type_off    = -12,
++    thread_off         = -13,
++    total_off          = thread_off - 1,
++    GP_off             = -14,
++ };
++
++  address generate_call_stub(address& return_address) {
++
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++
++    // same as in generate_catch_exception()!
++
++    // stub code
++    // save ra and fp
++    __ enter();
++    // I think 14 is the max gap between argument and callee saved register
++    assert((int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off, "adjust this code");
++    __ daddiu(SP, SP, total_off * wordSize);
++    __ sd(BCP, FP, BCP_off * wordSize);
++    __ sd(LVP, FP, LVP_off * wordSize);
++    __ sd(TSR, FP, TSR_off * wordSize);
++    __ sd(S1, FP, S1_off * wordSize);
++    __ sd(S3, FP, S3_off * wordSize);
++    __ sd(S4, FP, S4_off * wordSize);
++    __ sd(S5, FP, S5_off * wordSize);
++    __ sd(S6, FP, S6_off * wordSize);
++    __ sd(A0, FP, call_wrapper_off * wordSize);
++    __ sd(A1, FP, result_off * wordSize);
++    __ sd(A2, FP, result_type_off * wordSize);
++    __ sd(A7, FP, thread_off * wordSize);
++    __ sd(GP, FP, GP_off * wordSize);
++
++    __ set64(GP, (long)Interpreter::dispatch_table(itos));
++
++#ifdef OPT_THREAD
++    __ move(TREG, A7);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ ld(AT, A7, in_bytes(Thread::pending_exception_offset()));
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      /* FIXME: I do not know how to realize stop in mips arch, do it in the future */
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    // A5: parameter
++    // A6: parameter_size
++    // T0: parameter_size_tmp(--)
++    // T2: offset(++)
++    // T3: tmp
++    Label parameters_done;
++    // judge if the parameter_size equals 0
++    __ beq(A6, R0, parameters_done);
++    __ delayed()->nop();
++    __ dsll(AT, A6, Interpreter::logStackElementSize);
++    __ dsubu(SP, SP, AT);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP , AT);
++    // Copy Java parameters in reverse order (receiver last)
++    // Note that the argument order is inverted in the process
++    Label loop;
++    __ move(T0, A6);
++    __ move(T2, R0);
++    __ bind(loop);
++
++    // get parameter
++    __ dsll(T3, T0, LogBytesPerWord);
++    __ daddu(T3, T3, A5);
++    __ ld(AT, T3,  -wordSize);
++    __ dsll(T3, T2, LogBytesPerWord);
++    __ daddu(T3, T3, SP);
++    __ sd(AT, T3, Interpreter::expr_offset_in_bytes(0));
++    __ daddiu(T2, T2, 1);
++    __ daddiu(T0, T0, -1);
++    __ bne(T0, R0, loop);
++    __ delayed()->nop();
++    // advance to next parameter
++
++    // call Java function
++    __ bind(parameters_done);
++
++    // receiver in V0, methodOop in Rmethod
++
++    __ move(Rmethod, A3);
++    __ move(Rsender, SP);             //set sender sp
++    __ jalr(A4);
++    __ delayed()->nop();
++    return_address = __ pc();
++
++    Label common_return;
++    __ bind(common_return);
++
++    // store result depending on type
++    // (everything that is not T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ld(T0, FP, result_off * wordSize);   // result --> T0
++    Label is_long, is_float, is_double, exit;
++    __ ld(T2, FP, result_type_off * wordSize);  // result_type --> T2
++    __ daddiu(T3, T2, (-1) * T_LONG);
++    __ beq(T3, R0, is_long);
++    __ delayed()->daddiu(T3, T2, (-1) * T_FLOAT);
++    __ beq(T3, R0, is_float);
++    __ delayed()->daddiu(T3, T2, (-1) * T_DOUBLE);
++    __ beq(T3, R0, is_double);
++    __ delayed()->nop();
++
++    // handle T_INT case
++    __ sd(V0, T0, 0 * wordSize);
++    __ bind(exit);
++
++    // restore
++    __ ld(BCP, FP, BCP_off * wordSize);
++    __ ld(LVP, FP, LVP_off * wordSize);
++    __ ld(GP, FP, GP_off * wordSize);
++    __ ld(TSR, FP, TSR_off * wordSize);
++
++    __ ld(S1, FP, S1_off * wordSize);
++    __ ld(S3, FP, S3_off * wordSize);
++    __ ld(S4, FP, S4_off * wordSize);
++    __ ld(S5, FP, S5_off * wordSize);
++    __ ld(S6, FP, S6_off * wordSize);
++
++    __ leave();
++
++    // return
++    __ jr(RA);
++    __ delayed()->nop();
++
++    // handle return types different from T_INT
++    __ bind(is_long);
++    __ sd(V0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_float);
++    __ swc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++
++    __ bind(is_double);
++    __ sdc1(F0, T0, 0 * wordSize);
++    __ b(exit);
++    __ delayed()->nop();
++    //FIXME, 1.6 mips version add operation of fpu here
++    StubRoutines::gs2::set_call_stub_compiled_return(__ pc());
++    __ b(common_return);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // sp.
++  //
++  // V0: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++
++    Register thread = TREG;
++
++    // get thread directly
++#ifndef OPT_THREAD
++    __ ld(thread, FP, thread_off * wordSize);
++#endif
++
++#ifdef ASSERT
++    // verify that threads correspond
++    { Label L;
++      __ get_thread(T8);
++      __ beq(T8, thread, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L);
++    }
++#endif
++    // set pending exception
++    __ verify_oop(V0);
++    __ sd(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ li(AT, (long)__FILE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_file_offset   ()));
++    __ li(AT, (long)__LINE__);
++    __ sd(AT, thread, in_bytes(Thread::exception_line_offset   ()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL, "_call_stub_return_address must have been generated before");
++    __ jmp(StubRoutines::_call_stub_return_address, relocInfo::none);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // V0: exception
++  // V1: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    //Register thread = TREG;
++    Register thread = TREG;
++    address start = __ pc();
++
++    // Upon entry, the sp points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++      __ bne(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into T9
++    __ ld(A1, SP, 0);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++    __ move(T9, V0);
++    __ pop(V1);
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(V0, thread, in_bytes(Thread::pending_exception_offset()));
++    __ sd(R0, thread, in_bytes(Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ bne(V0, R0, L);
++      __ delayed()->nop();
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // V0: exception
++    // T9: exception handler
++    // V1: throwing pc
++    __ verify_oop(V0);
++    __ jr(T9);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    __ reinit_heapbase();
++    __ verify_oop_subroutine();
++    address end = __ pc();
++    return start;
++  }
++
++  //
++  //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //     A0    -  array1
++  //     A1    -  array2
++  //     A2    -  element count
++  //
++
++ // use T9 as temp
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {
++    int elem_size = 1 << log2_elem_size;
++    Address::ScaleFactor sf = Address::times_1;
++
++    switch (log2_elem_size) {
++      case 0: sf = Address::times_1; break;
++      case 1: sf = Address::times_2; break;
++      case 2: sf = Address::times_4; break;
++      case 3: sf = Address::times_8; break;
++    }
++
++    __ dsll(AT, A2, sf);
++    __ daddu(AT, AT, A0);
++    __ daddiu(T9, AT, -elem_size);
++    __ dsubu(AT, A1, A0);
++    __ blez(AT, no_overlap_target);
++    __ delayed()->nop();
++    __ dsubu(AT, A1, T9);
++    __ bgtz(AT, no_overlap_target);
++    __ delayed()->nop();
++
++    // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
++    Label L;
++    __ bgez(A0, L);
++    __ delayed()->nop();
++    __ bgtz(A1, no_overlap_target);
++    __ delayed()->nop();
++    __ bind(L);
++
++  }
++
++  //
++  // Generate stub for array fill. If "aligned" is true, the
++  // "to" address is assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //   to:    c_rarg0
++  //   value: c_rarg1
++  //   count: c_rarg2 treated as signed
++  //
++  address generate_fill(BasicType t, bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    const Register to        = A0;  // source array address
++    const Register value     = A1;  // value
++    const Register count     = A2;  // elements count
++
++    const Register cnt_words = T8;  // temp register
++
++    __ enter();
++
++    Label L_fill_elements, L_exit1;
++
++    int shift = -1;
++    switch (t) {
++      case T_BYTE:
++        shift = 0;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 8, 8);   // 8 bit -> 16 bit
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_SHORT:
++        shift = 1;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ dins(value, value, 16, 16); // 16 bit -> 32 bit
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      case T_INT:
++        shift = 2;
++        __ slti(AT, count, 8 >> shift); // Short arrays (< 8 bytes) fill by element
++        __ bne(AT, R0, L_fill_elements);
++        __ delayed()->nop();
++        break;
++      default: ShouldNotReachHere();
++    }
++
++    // Align source address at 8 bytes address boundary.
++    Label L_skip_align1, L_skip_align2, L_skip_align4;
++    if (!aligned) {
++      switch (t) {
++        case T_BYTE:
++          // One byte misalignment happens only for byte arrays.
++          __ andi(AT, to, 1);
++          __ beq(AT, R0, L_skip_align1);
++          __ delayed()->nop();
++          __ sb(value, to, 0);
++          __ daddiu(to, to, 1);
++          __ addiu32(count, count, -1);
++          __ bind(L_skip_align1);
++          // Fallthrough
++        case T_SHORT:
++          // Two bytes misalignment happens only for byte and short (char) arrays.
++          __ andi(AT, to, 1 << 1);
++          __ beq(AT, R0, L_skip_align2);
++          __ delayed()->nop();
++          __ sh(value, to, 0);
++          __ daddiu(to, to, 2);
++          __ addiu32(count, count, -(2 >> shift));
++          __ bind(L_skip_align2);
++          // Fallthrough
++        case T_INT:
++          // Align to 8 bytes, we know we are 4 byte aligned to start.
++          __ andi(AT, to, 1 << 2);
++          __ beq(AT, R0, L_skip_align4);
++          __ delayed()->nop();
++          __ sw(value, to, 0);
++          __ daddiu(to, to, 4);
++          __ addiu32(count, count, -(4 >> shift));
++          __ bind(L_skip_align4);
++          break;
++        default: ShouldNotReachHere();
++      }
++    }
++
++    //
++    //  Fill large chunks
++    //
++    __ srl(cnt_words, count, 3 - shift); // number of words
++    __ dinsu(value, value, 32, 32);      // 32 bit -> 64 bit
++    __ sll(AT, cnt_words, 3 - shift);
++    __ subu32(count, count, AT);
++
++    Label L_loop_begin, L_loop_not_64bytes_fill, L_loop_end;
++    __ addiu32(AT, cnt_words, -8);
++    __ bltz(AT, L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_begin);
++    __ sd(value, to,  0);
++    __ sd(value, to,  8);
++    __ sd(value, to, 16);
++    __ sd(value, to, 24);
++    __ sd(value, to, 32);
++    __ sd(value, to, 40);
++    __ sd(value, to, 48);
++    __ sd(value, to, 56);
++    __ daddiu(to, to, 64);
++    __ addiu32(cnt_words, cnt_words, -8);
++    __ addiu32(AT, cnt_words, -8);
++    __ bgez(AT, L_loop_begin);
++    __ delayed()->nop();
++
++    __ bind(L_loop_not_64bytes_fill);
++    __ beq(cnt_words, R0, L_loop_end);
++    __ delayed()->nop();
++    __ sd(value, to, 0);
++    __ daddiu(to, to, 8);
++    __ addiu32(cnt_words, cnt_words, -1);
++    __ b(L_loop_not_64bytes_fill);
++    __ delayed()->nop();
++    __ bind(L_loop_end);
++
++    // Remaining count is less than 8 bytes. Fill it by a single store.
++    // Note that the total length is no less than 8 bytes.
++    if (t == T_BYTE || t == T_SHORT) {
++      Label L_exit1;
++      __ beq(count, R0, L_exit1);
++      __ delayed()->nop();
++      __ sll(AT, count, shift);
++      __ daddu(to, to, AT); // points to the end
++      __ sd(value, to, -8);    // overwrite some elements
++      __ bind(L_exit1);
++      __ leave();
++      __ jr(RA);
++      __ delayed()->nop();
++    }
++
++    // Handle copies less than 8 bytes.
++    Label L_fill_2, L_fill_4, L_exit2;
++    __ bind(L_fill_elements);
++    switch (t) {
++      case T_BYTE:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_2);
++        __ delayed()->nop();
++        __ sb(value, to, 0);
++        __ daddiu(to, to, 1);
++        __ bind(L_fill_2);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 2);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_SHORT:
++        __ andi(AT, count, 1);
++        __ beq(AT, R0, L_fill_4);
++        __ delayed()->nop();
++        __ sh(value, to, 0);
++        __ daddiu(to, to, 2);
++        __ bind(L_fill_4);
++        __ andi(AT, count, 1 << 1);
++        __ beq(AT, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      case T_INT:
++        __ beq(count, R0, L_exit2);
++        __ delayed()->nop();
++        __ sw(value, to, 0);
++        break;
++      default: ShouldNotReachHere();
++    }
++    __ bind(L_exit2);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11;
++    Label l_debug;
++
++    __ daddiu(AT, tmp3, -9); //why the number is 9 ?
++    __ blez(AT, l_9);
++    __ delayed()->nop();
++
++    if (!aligned) {
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_9); // if arrays don't have the same alignment mod 2, do 1 element copy
++      __ delayed()->nop();
++
++      __ andi(AT, tmp1, 1);
++      __ beq(AT, R0, l_10); //copy 1 enlement if necessary to aligh to 2 bytes
++      __ delayed()->nop();
++
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_10);
++
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 2 elements copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 2 elements if necessary to align to 4 bytes.
++      __ andi(AT, tmp1, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -2);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 4 elements if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -4);
++      __ sw(AT, tmp2, 0);
++      { // FasterArrayCopy
++        __ daddiu(tmp1, tmp1, 4);
++        __ daddiu(tmp2, tmp2, 4);
++      }
++    }
++
++    __ bind(l_7);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_6); // copy 4 at a time if less than 4 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      // For Loongson, there is 128-bit memory access. TODO
++      __ ld(AT, tmp1, 0);
++      __ sd(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_8);
++      __ delayed()->nop();
++    }
++    __ bind(l_6);
++
++    // copy 4 bytes at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_1);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_3);
++      __ delayed()->nop();
++
++    }
++
++    // do 2 bytes copy
++    __ bind(l_1);
++    {
++      __ daddiu(AT, tmp3, -1);
++      __ blez(AT, l_9);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(AT, tmp3, -2);
++      __ bgez(AT, l_5);
++      __ delayed()->nop();
++    }
++
++    //do 1 element copy--byte
++    __ bind(l_9);
++    __ beq(R0, tmp3, l_4);
++    __ delayed()->nop();
++
++    {
++      __ bind(l_11);
++      __ lb(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -1);
++      __ sb(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 1);
++      __ daddiu(tmp2, tmp2, 1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_4);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   A0   - source array address
++  //   A1   - destination array address
++  //   A2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    Label l_copy_4_bytes_loop, l_copy_suffix, l_copy_suffix_loop, l_exit;
++    Label l_copy_byte, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
++      StubRoutines::jbyte_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 0);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ daddu(end_from, from, end_count);
++    __ daddu(end_to, to, end_count);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_byte);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    __ bind(l_from_unaligned);
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ move(T8, end_count);
++    __ daddiu(AT, end_count, -3);
++    __ blez(AT, l_copy_suffix);
++    __ delayed()->nop();
++
++    //__ andi(T8, T8, 3);
++    __ lea(end_from, Address(end_from, -4));
++    __ lea(end_to, Address(end_to, -4));
++
++    __ dsrl(end_count, end_count, 2);
++    __ align(16);
++    __ bind(l_copy_4_bytes_loop); //l_copy_4_bytes
++    __ lw(AT, end_from, 0);
++    __ sw(AT, end_to, 0);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -1);
++    __ bne(end_count, R0, l_copy_4_bytes_loop);
++    __ delayed()->nop();
++
++    __ b(l_copy_suffix);
++    __ delayed()->nop();
++    // copy dwords aligned or not with repeat move
++    // l_copy_suffix
++    // copy suffix (0-3 bytes)
++    __ bind(l_copy_suffix);
++    __ andi(T8, T8, 3);
++    __ beq(T8, R0, l_exit);
++    __ delayed()->nop();
++    __ addiu(end_from, end_from, 3);
++    __ addiu(end_to, end_to, 3);
++    __ bind(l_copy_suffix_loop);
++    __ lb(AT, end_from, 0);
++    __ sb(AT, end_to, 0);
++    __ addiu(end_from, end_from, -1);
++    __ addiu(end_to, end_to, -1);
++    __ addiu(T8, T8, -1);
++    __ bne(T8, R0, l_copy_suffix_loop);
++    __ delayed()->nop();
++
++    __ bind(l_copy_byte);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lb(AT, end_from, -1);
++    __ sb(AT, end_to, -1);
++    __ daddiu(end_from, end_from, -1);
++    __ daddiu(end_to, end_to, -1);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_byte);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Generate stub for disjoint short copy.  If "aligned" is true, the
++  // "from" and "to" addresses are assumed to be heapword aligned.
++  //
++  // Arguments for generated stub:
++  //      from:  A0
++  //      to:    A1
++  //  elm.count: A2 treated as signed
++  //  one element: 2 bytes
++  //
++  // Strategy for aligned==true:
++  //
++  //  If length <= 9:
++  //     1. copy 1 elements at a time (l_5)
++  //
++  //  If length > 9:
++  //     1. copy 4 elements at a time until less than 4 elements are left (l_7)
++  //     2. copy 2 elements at a time until less than 2 elements are left (l_6)
++  //     3. copy last element if one was left in step 2. (l_1)
++  //
++  //
++  // Strategy for aligned==false:
++  //
++  //  If length <= 9: same as aligned==true case
++  //
++  //  If length > 9:
++  //     1. continue with step 7. if the alignment of from and to mod 4
++  //        is different.
++  //     2. align from and to to 4 bytes by copying 1 element if necessary
++  //     3. at l_2 from and to are 4 byte aligned; continue with
++  //        6. if they cannot be aligned to 8 bytes because they have
++  //        got different alignment mod 8.
++  //     4. at this point we know that both, from and to, have the same
++  //        alignment mod 8, now copy one element if necessary to get
++  //        8 byte alignment of from and to.
++  //     5. copy 4 elements at a time until less than 4 elements are
++  //        left; depending on step 3. all load/stores are aligned.
++  //     6. copy 2 elements at a time until less than 2 elements are
++  //        left. (l_6)
++  //     7. copy 1 element at a time. (l_5)
++  //     8. copy last element if one was left in step 6. (l_1)
++
++  address generate_disjoint_short_copy(bool aligned, const char * name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Register tmp1 = T0;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T8;
++    Register tmp5 = T9;
++    Register tmp6 = T2;
++
++    address start = __ pc();
++
++    __ push(tmp1);
++    __ push(tmp2);
++    __ push(tmp3);
++    __ move(tmp1, A0);
++    __ move(tmp2, A1);
++    __ move(tmp3, A2);
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
++    Label l_debug;
++    // don't try anything fancy if arrays don't have many elements
++    __ daddiu(AT, tmp3, -23);
++    __ blez(AT, l_14);
++    __ delayed()->nop();
++    // move push here
++    __ push(tmp4);
++    __ push(tmp5);
++    __ push(tmp6);
++
++    if (!aligned) {
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 1);
++      __ bne(AT, R0, l_debug); // if arrays don't have the same alignment mod 2, can this happen?
++      __ delayed()->nop();
++
++      __ xorr(AT, A0, A1);
++      __ andi(AT, AT, 3);
++      __ bne(AT, R0, l_1); // if arrays don't have the same alignment mod 4, do 1 element copy
++      __ delayed()->nop();
++
++      // At this point it is guaranteed that both, from and to have the same alignment mod 4.
++
++      // Copy 1 element if necessary to align to 4 bytes.
++      __ andi(AT, A0, 3);
++      __ beq(AT, R0, l_2);
++      __ delayed()->nop();
++
++      __ lhu(AT, tmp1, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ bind(l_2);
++
++      // At this point the positions of both, from and to, are at least 4 byte aligned.
++
++      // Copy 4 elements at a time.
++      // Align to 8 bytes, but only if both, from and to, have same alignment mod 8.
++      __ xorr(AT, tmp1, tmp2);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_6); // not same alignment mod 8 -> copy 2, either from or to will be unaligned
++      __ delayed()->nop();
++
++      // Copy a 2-element word if necessary to align to 8 bytes.
++      __ andi(AT, tmp1, 7);
++      __ beq(AT, R0, l_7);
++      __ delayed()->nop();
++
++      __ lw(AT, tmp1, 0);
++      __ daddiu(tmp3, tmp3, -2);
++      __ sw(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 4);
++      __ daddiu(tmp2, tmp2, 4);
++    }// end of if (!aligned)
++
++    __ bind(l_7);
++    // At this time the position of both, from and to, are at least 8 byte aligned.
++    // Copy 8 elemnets at a time.
++    // Align to 16 bytes, but only if both from and to have same alignment mod 8.
++    __ xorr(AT, tmp1, tmp2);
++    __ andi(AT, AT, 15);
++    __ bne(AT, R0, l_9);
++    __ delayed()->nop();
++
++    // Copy 4-element word if necessary to align to 16 bytes,
++    __ andi(AT, tmp1, 15);
++    __ beq(AT, R0, l_10);
++    __ delayed()->nop();
++
++    __ ld(AT, tmp1, 0);
++    __ daddiu(tmp3, tmp3, -4);
++    __ sd(AT, tmp2, 0);
++    __ daddiu(tmp1, tmp1, 8);
++    __ daddiu(tmp2, tmp2, 8);
++
++    __ bind(l_10);
++
++    // Copy 8 elements at a time; either the loads or the stores can
++    // be unalligned if aligned == false
++
++    { // FasterArrayCopy
++      __ bind(l_11);
++      // For loongson the 128-bit memory access instruction is gslq/gssq
++      if (UseLEXT1) {
++        __ gslq(AT, tmp4, tmp1, 0);
++        __ gslq(tmp5, tmp6, tmp1, 16);
++        __ daddiu(tmp1, tmp1, 32);
++        __ daddiu(tmp2, tmp2, 32);
++        __ gssq(AT, tmp4, tmp2, -32);
++        __ gssq(tmp5, tmp6, tmp2, -16);
++      } else {
++        __ ld(AT, tmp1, 0);
++        __ ld(tmp4, tmp1, 8);
++        __ ld(tmp5, tmp1, 16);
++        __ ld(tmp6, tmp1, 24);
++        __ daddiu(tmp1, tmp1, 32);
++        __ sd(AT, tmp2, 0);
++        __ sd(tmp4, tmp2, 8);
++        __ sd(tmp5, tmp2, 16);
++        __ sd(tmp6, tmp2, 24);
++        __ daddiu(tmp2, tmp2, 32);
++      }
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_11);
++      __ delayed()->nop();
++    }
++    __ bind(l_9);
++
++    // Copy 4 elements at a time; either the loads or the stores can
++    // be unaligned if aligned == false.
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -15);// loop unrolling 4 times, so if the elements should not be less than 16
++      __ blez(AT, l_4); // copy 2 at a time if less than 16 elements remain
++      __ delayed()->nop();
++
++      __ bind(l_8);
++      __ ld(AT, tmp1, 0);
++      __ ld(tmp4, tmp1, 8);
++      __ ld(tmp5, tmp1, 16);
++      __ ld(tmp6, tmp1, 24);
++      __ sd(AT, tmp2, 0);
++      __ sd(tmp4, tmp2, 8);
++      __ sd(tmp5, tmp2,16);
++      __ daddiu(tmp1, tmp1, 32);
++      __ daddiu(tmp2, tmp2, 32);
++      __ daddiu(tmp3, tmp3, -16);
++      __ daddiu(AT, tmp3, -16);
++      __ bgez(AT, l_8);
++      __ delayed()->sd(tmp6, tmp2, -8);
++    }
++    __ bind(l_6);
++
++    // copy 2 element at a time
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -7);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_3);
++      __ lw(AT, tmp1, 0);
++      __ lw(tmp4, tmp1, 4);
++      __ lw(tmp5, tmp1, 8);
++      __ lw(tmp6, tmp1, 12);
++      __ sw(AT, tmp2, 0);
++      __ sw(tmp4, tmp2, 4);
++      __ sw(tmp5, tmp2, 8);
++      __ daddiu(tmp1, tmp1, 16);
++      __ daddiu(tmp2, tmp2, 16);
++      __ daddiu(tmp3, tmp3, -8);
++      __ daddiu(AT, tmp3, -8);
++      __ bgez(AT, l_3);
++      __ delayed()->sw(tmp6, tmp2, -4);
++    }
++
++    __ bind(l_1);
++    // do single element copy (8 bit), can this happen?
++    { // FasterArrayCopy
++      __ daddiu(AT, tmp3, -3);
++      __ blez(AT, l_4);
++      __ delayed()->nop();
++
++      __ bind(l_5);
++      __ lhu(AT, tmp1, 0);
++      __ lhu(tmp4, tmp1, 2);
++      __ lhu(tmp5, tmp1, 4);
++      __ lhu(tmp6, tmp1, 6);
++      __ sh(AT, tmp2, 0);
++      __ sh(tmp4, tmp2, 2);
++      __ sh(tmp5, tmp2, 4);
++      __ daddiu(tmp1, tmp1, 8);
++      __ daddiu(tmp2, tmp2, 8);
++      __ daddiu(tmp3, tmp3, -4);
++      __ daddiu(AT, tmp3, -4);
++      __ bgez(AT, l_5);
++      __ delayed()->sh(tmp6, tmp2, -2);
++    }
++    // single element
++    __ bind(l_4);
++
++    __ pop(tmp6);
++    __ pop(tmp5);
++    __ pop(tmp4);
++
++    __ bind(l_14);
++    { // FasterArrayCopy
++      __ beq(R0, tmp3, l_13);
++      __ delayed()->nop();
++
++      __ bind(l_12);
++      __ lhu(AT, tmp1, 0);
++      __ sh(AT, tmp2, 0);
++      __ daddiu(tmp1, tmp1, 2);
++      __ daddiu(tmp2, tmp2, 2);
++      __ daddiu(tmp3, tmp3, -1);
++      __ daddiu(AT, tmp3, -1);
++      __ bgez(AT, l_12);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_13);
++    __ pop(tmp3);
++    __ pop(tmp2);
++    __ pop(tmp1);
++
++    __ jr(RA);
++    __ delayed()->nop();
++
++    __ bind(l_debug);
++    __ stop("generate_disjoint_short_copy should not reach here");
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, const char *name) {
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    Label l_exit, l_copy_short, l_from_unaligned, l_unaligned, l_4_bytes_aligned;
++
++    address nooverlap_target = aligned ?
++            StubRoutines::arrayof_jshort_disjoint_arraycopy() :
++            StubRoutines::jshort_disjoint_arraycopy();
++
++    array_overlap_test(nooverlap_target, 1);
++
++    const Register from      = A0;   // source array address
++    const Register to        = A1;   // destination array address
++    const Register count     = A2;   // elements count
++    const Register end_from  = T3;   // source array end address
++    const Register end_to    = T0;   // destination array end address
++    const Register end_count = T1;   // destination array end address
++
++    __ push(end_from);
++    __ push(end_to);
++    __ push(end_count);
++    __ push(T8);
++
++    // copy from high to low
++    __ move(end_count, count);
++    __ sll(AT, end_count, Address::times_2);
++    __ daddu(end_from, from, AT);
++    __ daddu(end_to, to, AT);
++
++    // If end_from and end_to has differante alignment, unaligned copy is performed.
++    __ andi(AT, end_from, 3);
++    __ andi(T8, end_to, 3);
++    __ bne(AT, T8, l_copy_short);
++    __ delayed()->nop();
++
++    // First deal with the unaligned data at the top.
++    __ bind(l_unaligned);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++
++    __ andi(AT, end_from, 3);
++    __ bne(AT, R0, l_from_unaligned);
++    __ delayed()->nop();
++
++    __ andi(AT, end_to, 3);
++    __ beq(AT, R0, l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // Copy 1 element if necessary to align to 4 bytes.
++    __ bind(l_from_unaligned);
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_unaligned);
++    __ delayed()->nop();
++
++    // now end_to, end_from point to 4-byte aligned high-ends
++    //     end_count contains byte count that is not copied.
++    // copy 4 bytes at a time
++    __ bind(l_4_bytes_aligned);
++
++    __ daddiu(AT, end_count, -1);
++    __ blez(AT, l_copy_short);
++    __ delayed()->nop();
++
++    __ lw(AT, end_from, -4);
++    __ sw(AT, end_to, -4);
++    __ addiu(end_from, end_from, -4);
++    __ addiu(end_to, end_to, -4);
++    __ addiu(end_count, end_count, -2);
++    __ b(l_4_bytes_aligned);
++    __ delayed()->nop();
++
++    // copy 1 element at a time
++    __ bind(l_copy_short);
++    __ beq(end_count, R0, l_exit);
++    __ delayed()->nop();
++    __ lhu(AT, end_from, -2);
++    __ sh(AT, end_to, -2);
++    __ daddiu(end_from, end_from, -2);
++    __ daddiu(end_to, end_to, -2);
++    __ daddiu(end_count, end_count, -1);
++    __ b(l_copy_short);
++    __ delayed()->nop();
++
++    __ bind(l_exit);
++    __ pop(T8);
++    __ pop(end_count);
++    __ pop(end_to);
++    __ pop(end_from);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4, l_5, l_6, l_7;
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    if(!aligned) {
++      __ xorr(AT, T3, T0);
++      __ andi(AT, AT, 7);
++      __ bne(AT, R0, l_5); // not same alignment mod 8 -> copy 1 element each time
++      __ delayed()->nop();
++
++      __ andi(AT, T3, 7);
++      __ beq(AT, R0, l_6); //copy 2 elements each time
++      __ delayed()->nop();
++
++      __ lw(AT, T3, 0);
++      __ daddiu(T1, T1, -1);
++      __ sw(AT, T0, 0);
++      __ daddiu(T3, T3, 4);
++      __ daddiu(T0, T0, 4);
++    }
++
++    {
++      __ bind(l_6);
++      __ daddiu(AT, T1, -1);
++      __ blez(AT, l_5);
++      __ delayed()->nop();
++
++      __ bind(l_7);
++      __ ld(AT, T3, 0);
++      __ sd(AT, T0, 0);
++      __ daddiu(T3, T3, 8);
++      __ daddiu(T0, T0, 8);
++      __ daddiu(T1, T1, -2);
++      __ daddiu(AT, T1, -2);
++      __ bgez(AT, l_7);
++      __ delayed()->nop();
++    }
++
++    __ bind(l_5);
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, 4);
++    __ addiu(T0, T0, 4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jint_disjoint_arraycopy() :
++              StubRoutines::jint_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 2);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    // no registers are destroyed by this call
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -4);
++    __ sll(AT, T1, Address::times_4);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -4);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ lw(AT, T3, 0);
++    __ sw(AT, T0, 0);
++    __ addiu(T3, T3, -4);
++    __ addiu(T0, T0, -4);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_3, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_3);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_3);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target;
++
++    if (is_oop) {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_oop_disjoint_arraycopy() :
++              StubRoutines::oop_disjoint_arraycopy();
++    } else {
++      nooverlap_target = aligned ?
++              StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++              StubRoutines::jlong_disjoint_arraycopy();
++    }
++
++    array_overlap_test(nooverlap_target, 3);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, is_oop, A1, A2);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ push(T8);
++    __ push(T9);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ beq(T1, R0, l_4);
++    __ delayed()->nop();
++
++    __ align(16);
++    __ bind(l_2);
++    __ ld(AT, T3, 0);
++    __ sd(AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0, -8);
++    __ addiu(T1, T1, -1);
++    __ bne(T1, R0, l_2);
++    __ delayed()->nop();
++
++    // exit
++    __ bind(l_4);
++    bs->arraycopy_epilogue(_masm, decorators, is_oop, A1, A2, T1);
++    __ pop(T9);
++    __ pop(T8);
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  //FIXME
++  address generate_disjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, 8);
++    __ addiu(T0, T0, 8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++
++  address generate_conjoint_long_copy(bool aligned, const char *name) {
++    Label l_1, l_2;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jlong_disjoint_arraycopy() :
++      StubRoutines::jlong_disjoint_arraycopy();
++    array_overlap_test(nooverlap_target, 3);
++
++    __ push(T3);
++    __ push(T0);
++    __ push(T1);
++
++    __ move(T1, A2);
++    __ move(T3, A0);
++    __ move(T0, A1);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T3, AT);
++    __ daddiu(T3, AT, -8);
++    __ sll(AT, T1, Address::times_8);
++    __ addu(AT, T0, AT);
++    __ daddiu(T0, AT, -8);
++
++    __ b(l_2);
++    __ delayed()->nop();
++    __ align(16);
++    __ bind(l_1);
++    __ ld(AT, T3, 0);
++    __ sd (AT, T0, 0);
++    __ addiu(T3, T3, -8);
++    __ addiu(T0, T0,-8);
++    __ bind(l_2);
++    __ addiu(T1, T1, -1);
++    __ bgez(T1, l_1);
++    __ delayed()->nop();
++    __ pop(T1);
++    __ pop(T0);
++    __ pop(T3);
++    __ jr(RA);
++    __ delayed()->nop();
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_int_oop_copy(false, true,
++                                                                                      "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_int_oop_copy(false, true,
++                                                                                      "oop_arraycopy_uninit", true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy          = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy                   = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit   = generate_disjoint_long_oop_copy(false, true,
++                                                                                       "oop_disjoint_arraycopy_uninit", true);
++      StubRoutines::_oop_arraycopy_uninit            = generate_conjoint_long_oop_copy(false, true,
++                                                                                       "oop_arraycopy_uninit", true);
++    }
++
++    StubRoutines::_jbyte_disjoint_arraycopy          = generate_disjoint_byte_copy(false, "jbyte_disjoint_arraycopy");
++    StubRoutines::_jshort_disjoint_arraycopy         = generate_disjoint_short_copy(false, "jshort_disjoint_arraycopy");
++    StubRoutines::_jint_disjoint_arraycopy           = generate_disjoint_int_oop_copy(false, false, "jint_disjoint_arraycopy");
++    StubRoutines::_jlong_disjoint_arraycopy          = generate_disjoint_long_copy(false, "jlong_disjoint_arraycopy");
++
++    StubRoutines::_jbyte_arraycopy  = generate_conjoint_byte_copy(false, "jbyte_arraycopy");
++    StubRoutines::_jshort_arraycopy = generate_conjoint_short_copy(false, "jshort_arraycopy");
++    StubRoutines::_jint_arraycopy   = generate_conjoint_int_oop_copy(false, false, "jint_arraycopy");
++    StubRoutines::_jlong_arraycopy  = generate_conjoint_long_copy(false, "jlong_arraycopy");
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++  }
++
++  // add a function to implement SafeFetch32 and SafeFetchN
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   A0 = adr
++    //   A1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into A1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ lw(A1, A0, 0);
++        break;
++      case 8:
++        // int64_t
++        __ ld(A1, A0, 0);
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ addu(V0,A1,R0);
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++
++#undef __
++#define __ masm->
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   bool restore_saved_exception_pc) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    enum layout {
++      thread_off,    // last_java_sp
++      S7_off,        // callee saved register      sp + 1
++      S6_off,        // callee saved register      sp + 2
++      S5_off,        // callee saved register      sp + 3
++      S4_off,        // callee saved register      sp + 4
++      S3_off,        // callee saved register      sp + 5
++      S2_off,        // callee saved register      sp + 6
++      S1_off,        // callee saved register      sp + 7
++      S0_off,        // callee saved register      sp + 8
++      FP_off,
++      ret_address,
++      framesize
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    //  CodeBuffer* code     = new CodeBuffer(insts_size, locs_size, 0, 0, 0, false,
++    //  NULL, NULL, NULL, false, NULL, name, false);
++    CodeBuffer code (name , insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++#ifndef OPT_THREAD
++    Register java_thread = TREG;
++    __ get_thread(java_thread);
++#else
++    Register java_thread = TREG;
++#endif
++    if (restore_saved_exception_pc) {
++      __ ld(RA, java_thread, in_bytes(JavaThread::saved_exception_pc_offset()));
++    }
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ addiu(SP, SP, (-1) * (framesize-2) * wordSize); // prolog
++    __ sd(S0, SP, S0_off * wordSize);
++    __ sd(S1, SP, S1_off * wordSize);
++    __ sd(S2, SP, S2_off * wordSize);
++    __ sd(S3, SP, S3_off * wordSize);
++    __ sd(S4, SP, S4_off * wordSize);
++    __ sd(S5, SP, S5_off * wordSize);
++    __ sd(S6, SP, S6_off * wordSize);
++    __ sd(S7, SP, S7_off * wordSize);
++
++    int frame_complete = __ pc() - start;
++    // push java thread (becomes first argument of C function)
++    __ sd(java_thread, SP, thread_off * wordSize);
++    if (java_thread != A0)
++      __ move(A0, java_thread);
++
++    // Set up last_Java_sp and last_Java_fp
++    __ set_last_Java_frame(java_thread, SP, FP, NULL);
++    // Align stack
++    __ set64(AT, -(StackAlignmentInBytes));
++    __ andr(SP, SP, AT);
++
++    __ relocate(relocInfo::internal_pc_type);
++    {
++      intptr_t save_pc = (intptr_t)__ pc() +  NativeMovConstReg::instruction_size + 28;
++      __ patchable_set48(AT, save_pc);
++    }
++    __ sd(AT, java_thread, in_bytes(JavaThread::last_Java_pc_offset()));
++
++    // Call runtime
++    __ call(runtime_entry);
++    __ delayed()->nop();
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(__ offset(),  map);
++
++    // restore the thread (cannot use the pushed argument since arguments
++    // may be overwritten by C code generated by an optimizing compiler);
++    // however can use the register value directly if it is callee saved.
++#ifndef OPT_THREAD
++    __ get_thread(java_thread);
++#endif
++
++    __ ld(SP, java_thread, in_bytes(JavaThread::last_Java_sp_offset()));
++    __ reset_last_Java_frame(java_thread, true);
++
++    // Restore callee save registers.  This must be done after resetting the Java frame
++    __ ld(S0, SP, S0_off * wordSize);
++    __ ld(S1, SP, S1_off * wordSize);
++    __ ld(S2, SP, S2_off * wordSize);
++    __ ld(S3, SP, S3_off * wordSize);
++    __ ld(S4, SP, S4_off * wordSize);
++    __ ld(S5, SP, S5_off * wordSize);
++    __ ld(S6, SP, S6_off * wordSize);
++    __ ld(S7, SP, S7_off * wordSize);
++
++    // discard arguments
++    __ move(SP, FP); // epilog
++    __ pop(FP);
++
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ ld(AT, java_thread, in_bytes(Thread::pending_exception_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ should_not_reach_here();
++    __ bind(L);
++#endif //ASSERT
++    __ jmp(StubRoutines::forward_exception_entry(), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    RuntimeStub* stub = RuntimeStub::new_runtime_stub(name,
++                                                      &code,
++                                                      frame_complete,
++                                                      framesize,
++                                                      oop_maps, false);
++    return stub->entry_point();
++  }
++
++  // Initialization
++  void generate_initial() {
++    // Generates all stubs and initializes the entry points
++
++    //-------------------------------------------------------------
++    //-----------------------------------------------------------
++    // entry points that exist in all platforms
++    // Note: This is code that could be shared among different platforms - however the benefit seems to be smaller
++    // than the disadvantage of having a much more complicated generator structure.
++    // See also comment in stubRoutines.hpp.
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    StubRoutines::_throw_StackOverflowError_entry =
++      generate_throw_exception("StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_StackOverflowError),
++                               false);
++    StubRoutines::_throw_delayed_StackOverflowError_entry =
++      generate_throw_exception("delayed StackOverflowError throw_exception",
++                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_delayed_StackOverflowError),
++                               false);
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = generate_throw_exception("AbstractMethodError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime::throw_AbstractMethodError),  false);
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                                                                               CAST_FROM_FN_PTR(address, SharedRuntime:: throw_IncompatibleClassChangeError), false);
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = generate_throw_exception("NullPointerException at call throw_exception",
++                                                                                        CAST_FROM_FN_PTR(address, SharedRuntime::throw_NullPointerException_at_call), false);
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++#ifndef CORE
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++#endif
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++
++#ifdef COMPILER2
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubRoutines::_montgomeryMultiply
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
++    }
++    if (UseMontgomerySquareIntrinsic) {
++      StubRoutines::_montgomerySquare
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
++    }
++#endif
++  }
++
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/mips/stubRoutines_mips.hpp b/src/hotspot/cpu/mips/stubRoutines_mips.hpp
+new file mode 100644
+index 0000000000..920c08844e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubRoutines_mips.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++#define CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc){
++  return return_pc == _call_stub_return_address||return_pc == gs2::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000,    // simply increase if too small (assembler will crash if too small)
++  code_size2 = 40000    // simply increase if too small (assembler will crash if too small)
++};
++
++class gs2 {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++
++public:
++  // Call back points for traps in compiled code
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret){ _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_MIPS_VM_STUBROUTINES_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp
+new file mode 100644
+index 0000000000..358d580d52
+--- /dev/null
++++ b/src/hotspot/cpu/mips/stubRoutines_mips_64.cpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::gs2::_call_stub_compiled_return                        = NULL;
+diff --git a/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp
+new file mode 100644
+index 0000000000..19e2f29c59
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateInterpreterGenerator_mips.cpp
+@@ -0,0 +1,2149 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++int TemplateInterpreter::InterpreterCodeSize = 500 * K;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {
++  address entry = __ pc();
++
++  // Rmethod: method
++  // LVP: pointer to locals
++  // A3: first stack arg
++  __ move(A3, SP);
++  __ daddiu(SP, SP, -10 * wordSize);
++  __ sd(RA, SP, 0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             Rmethod, LVP, A3);
++
++  // V0: result handler
++
++  // Stack layout:
++  //        ...
++  //     10 stack arg0      <--- old sp
++  //      9 float/double identifiers
++  //      8 register arg7
++  //        ...
++  //      2 register arg1
++  //      1 aligned slot
++  // SP:  0 return address
++
++  // Do FP first so we can use T3 as temp
++  __ ld(T3, Address(SP, 9 * wordSize)); // float/double identifiers
++
++  // A0 is for env.
++  // If the mothed is not static, A1 will be corrected in generate_native_entry.
++  for ( int i = 1; i < Argument::n_register_parameters; i++ ) {
++    Register reg = as_Register(i + A0->encoding());
++    FloatRegister floatreg = as_FloatRegister(i + F12->encoding());
++    Label isfloatordouble, isdouble, next;
++
++    __ andi(AT, T3, 1 << (i*2)); // Float or Double?
++    __ bne(AT, R0, isfloatordouble);
++    __ delayed()->nop();
++
++    // Do Int register here
++    __ ld(reg, SP, (1 + i) * wordSize);
++    __ b (next);
++    __ delayed()->nop();
++
++    __ bind(isfloatordouble);
++    __ andi(AT, T3, 1 << ((i*2)+1)); // Double?
++    __ bne(AT, R0, isdouble);
++    __ delayed()->nop();
++
++    // Do Float Here
++    __ lwc1(floatreg, SP, (1 + i) * wordSize);
++    __ b(next);
++    __ delayed()->nop();
++
++    // Do Double here
++    __ bind(isdouble);
++    __ ldc1(floatreg, SP, (1 + i) * wordSize);
++
++    __ bind(next);
++  }
++
++  __ ld(RA, SP, 0);
++  __ daddiu(SP, SP, 10 * wordSize);
++  __ jr(RA);
++  __ delayed()->nop();
++  return entry;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++/**
++* Method entry for static (non-native) methods:
++*   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++*   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long address, int off, int end)
++*/
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  if (UseCRC32CIntrinsics) {
++    address entry = __ pc();
++    Unimplemented();
++    return entry;
++  }
++  return NULL;
++}
++
++//
++// Various method entries
++//
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack:
++  //        [ arg ] <-- sp
++  //        [ arg ]
++  // retaddr in ra
++
++  address entry_point = NULL;
++  switch (kind) {
++  case Interpreter::java_lang_math_abs:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ abs_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sqrt:
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ sqrt_d(F0, F12);
++    __ move(SP, Rsender);
++    break;
++  case Interpreter::java_lang_math_sin :
++  case Interpreter::java_lang_math_cos :
++  case Interpreter::java_lang_math_tan :
++  case Interpreter::java_lang_math_log :
++  case Interpreter::java_lang_math_log10 :
++  case Interpreter::java_lang_math_exp :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 1);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_pow :
++    entry_point = __ pc();
++    __ ldc1(F12, SP, 2 * Interpreter::stackElementSize);
++    __ ldc1(F13, SP, 0);
++    __ move(SP, Rsender);
++    __ dmtc1(RA, F24);
++    __ dmtc1(SP, F25);
++    __ dins(SP, R0, 0, exact_log2(StackAlignmentInBytes));
++    generate_transcendental_entry(kind, 2);
++    __ dmfc1(SP, F25);
++    __ dmfc1(RA, F24);
++    break;
++  case Interpreter::java_lang_math_fmaD :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ ldc1(F12, SP, 4 * Interpreter::stackElementSize);
++      __ ldc1(F13, SP, 2 * Interpreter::stackElementSize);
++      __ ldc1(F14, SP, 0);
++      __ madd_d(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  case Interpreter::java_lang_math_fmaF :
++    if (UseFMA) {
++      entry_point = __ pc();
++      __ lwc1(F12, SP, 2 * Interpreter::stackElementSize);
++      __ lwc1(F13, SP, Interpreter::stackElementSize);
++      __ lwc1(F14, SP, 0);
++      __ madd_s(F0, F14, F13, F12);
++      __ move(SP, Rsender);
++    }
++    break;
++  default:
++    ;
++  }
++  if (entry_point) {
++    __ jr(RA);
++    __ delayed()->nop();
++  }
++
++  return entry_point;
++}
++
++  // double trigonometrics and transcendentals
++  // static jdouble dsin(jdouble x);
++  // static jdouble dcos(jdouble x);
++  // static jdouble dtan(jdouble x);
++  // static jdouble dlog(jdouble x);
++  // static jdouble dlog10(jdouble x);
++  // static jdouble dexp(jdouble x);
++  // static jdouble dpow(jdouble x, jdouble y);
++
++void TemplateInterpreterGenerator::generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs) {
++  address fn;
++  switch (kind) {
++  case Interpreter::java_lang_math_sin :
++    if (StubRoutines::dsin() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dsin);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dsin());
++    }
++    break;
++  case Interpreter::java_lang_math_cos :
++    if (StubRoutines::dcos() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dcos);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dcos());
++    }
++    break;
++  case Interpreter::java_lang_math_tan :
++    if (StubRoutines::dtan() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dtan);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dtan());
++    }
++    break;
++  case Interpreter::java_lang_math_log :
++    if (StubRoutines::dlog() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog());
++    }
++    break;
++  case Interpreter::java_lang_math_log10 :
++    if (StubRoutines::dlog10() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dlog10);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dlog10());
++    }
++    break;
++  case Interpreter::java_lang_math_exp :
++    if (StubRoutines::dexp() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dexp);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dexp());
++    }
++    break;
++  case Interpreter::java_lang_math_pow :
++    if (StubRoutines::dpow() == NULL) {
++      fn = CAST_FROM_FN_PTR(address, SharedRuntime::dpow);
++    } else {
++      fn = CAST_FROM_FN_PTR(address, StubRoutines::dpow());
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    fn = NULL;  // unreachable
++  }
++  __ li(T9, fn);
++  __ jalr(T9);
++  __ delayed()->nop();
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {
++
++  // Rmethod: methodOop
++  // V0: receiver (unused)
++  // Rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  // throw exception
++  // adjust stack to what a normal return would do
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), Rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  return entry_point;
++}
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bci_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  address entry = __ pc();
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ addiu(T1, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ subu(T1, T1, SP); // T1 = maximal sp for current fp
++    __ bgez(T1, L);     // check if frame is complete
++    __ delayed()->nop();
++    __ stop("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  // FIXME: please change the func restore_bcp
++  // S0 is the conventional register for bcp
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  // FIXME: why do not pass parameter thread ?
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {
++  address entry = __ pc();
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // ??? convention: expect array in register A1
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_ArrayIndexOutOfBoundsException), A1, A2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_ClassCastException),  FSR);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  // expression stack must be empty before entering the VM if an exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ li(A1, (long)name);
++  if (pass_oop) {
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_klass_exception), A1, FSR);
++  } else {
++    __ li(A2, (long)message);
++    __ call_VM(V0,
++    CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception), A1, A2);
++  }
++  // throw exception
++  __ jmp(Interpreter::throw_exception_entry(), relocInfo::none);
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {
++
++  address entry = __ pc();
++
++  // Restore stack bottom in case i2c adjusted stack
++  __ ld(SP, Address(FP, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that sp is now tos until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  // mdp: T8
++  // ret: FSR
++  // tmp: T9
++  if (state == atos) {
++    Register mdp = T8;
++    Register tmp = T9;
++    __ profile_return_type(mdp, FSR, tmp);
++  }
++
++
++  const Register cache = T9;
++  const Register index = T3;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ lw(flags, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andi(flags, flags, ConstantPoolCacheEntry::parameter_size_mask);
++  __ dsll(AT, flags, Interpreter::logStackElementSize);
++  __ daddu(SP, SP, AT);
++
++  Register java_thread;
++#ifndef OPT_THREAD
++    java_thread = T9;
++    __ get_thread(java_thread);
++#else
++    java_thread = TREG;
++#endif
++
++  __ check_and_handle_popframe(java_thread);
++  __ check_and_handle_earlyret(java_thread);
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state,
++                                                               int step,
++                                                               address continuation) {
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ restore_bcp();
++  __ restore_locals();
++  // handle exceptions
++  {
++    Label L;
++    const Register thread = TREG;
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ ld(AT, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++    __ delayed()->nop();
++  }
++  return entry;
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : // fall through
++    case T_LONG   : // fall through
++    case T_VOID   : i = 4; break;
++    case T_FLOAT  : i = 5; break;
++    case T_DOUBLE : i = 6; break;
++    case T_OBJECT : // fall through
++    case T_ARRAY  : i = 7; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);             break;
++    case T_CHAR   : __ andi(V0, V0, 0xFFFF);   break;
++    case T_BYTE   : __ sign_extend_byte (V0);  break;
++    case T_SHORT  : __ sign_extend_short(V0);  break;
++    case T_INT    : /* nothing to do */        break;
++    case T_FLOAT  : /* nothing to do */        break;
++    case T_DOUBLE : /* nothing to do */        break;
++    case T_OBJECT :
++    {
++       __ ld(V0, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++      __ verify_oop(V0);         // and verify it
++    }
++                 break;
++    default       : ShouldNotReachHere();
++  }
++  __ jr(RA);                                  // return from result handler
++  __ delayed()->nop();
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// prerequisites : method in T0, invocation counter in T3
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {
++  Label done;
++  const Address invocation_counter(FSR, in_bytes(MethodCounters::invocation_counter_offset())
++      + in_bytes(InvocationCounter::counter_offset()));
++  const Address backedge_counter  (FSR, in_bytes(MethodCounters::backedge_counter_offset())
++      + in_bytes(InvocationCounter::counter_offset()));
++
++  __ get_method_counters(Rmethod, FSR, done);
++
++  if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++    __ lw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++    __ incrementl(T9, 1);
++    __ sw(T9, FSR, in_bytes(MethodCounters::interpreter_invocation_counter_offset()));
++  }
++  // Update standard invocation counters
++  __ lw(T3, invocation_counter);
++  __ increment(T3, InvocationCounter::count_increment);
++  __ sw(T3, invocation_counter);  // save invocation count
++
++  __ lw(FSR, backedge_counter);  // load backedge counter
++  __ li(AT, InvocationCounter::count_mask_value);   // mask out the status bits
++  __ andr(FSR, FSR, AT);
++
++  __ daddu(T3, T3, FSR);          // add both counters
++
++  if (ProfileInterpreter && profile_method != NULL) {
++    // Test to see if we should create a method data oop
++    if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++      __ slti(AT, T3, InvocationCounter::InterpreterProfileLimit);
++    } else {
++      __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++      __ lw(AT, AT, 0);
++      __ slt(AT, T3, AT);
++    }
++
++    __ bne_far(AT, R0, *profile_method_continue);
++    __ delayed()->nop();
++
++    // if no method data exists, go to profile_method
++    __ test_method_data_pointer(FSR, *profile_method);
++  }
++
++  if (Assembler::is_simm16(CompileThreshold)) {
++    __ srl(AT, T3, InvocationCounter::count_shift);
++    __ slti(AT, AT, CompileThreshold);
++  } else {
++    __ li(AT, (long)&InvocationCounter::InterpreterInvocationLimit);
++    __ lw(AT, AT, 0);
++    __ slt(AT, T3, AT);
++  }
++
++  __ beq_far(AT, R0, *overflow);
++  __ delayed()->nop();
++  __ bind(done);
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {
++
++  // Asm interpreter on entry
++  // S7 - locals
++  // S0 - bcp
++  // Rmethod - method
++  // FP - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // Rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // the bcp is valid if and only if it's not null
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), R0);
++  __ ld(Rmethod, FP, method_offset);
++  // Preserve invariant that S0/S7 contain bcp/locals of sender frame
++  __ b_far(do_continue);
++  __ delayed()->nop();
++}
++
++// See if we've got enough room on the stack for locals plus overhead.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      T2: number of additional locals this frame needs (what we must check)
++//      T0: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // T0: Method*
++  // T2: number of additional locals this frame needs (what we must check)
++
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++
++  const int entry_size    = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size = -(frame::interpreter_frame_initial_sp_offset*wordSize)
++    + entry_size;
++
++  const int page_size = os::vm_page_size();
++
++  Label after_frame_check;
++
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ move(AT, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ slt(AT, AT, T2);
++  __ beq(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++#ifndef OPT_THREAD
++  Register thread = T1;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++
++  // locals + overhead, in bytes
++  __ dsll(T3, T2, Interpreter::logStackElementSize);
++  __ daddiu(T3, T3, overhead_size);   // locals * 4 + overhead_size --> T3
++
++#ifdef ASSERT
++  Label stack_base_okay, stack_size_okay;
++  // verify that thread stack base is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset()));
++  __ bne(AT, R0, stack_base_okay);
++  __ delayed()->nop();
++  __ stop("stack base is zero");
++  __ bind(stack_base_okay);
++  // verify that thread stack size is non-zero
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));
++  __ bne(AT, R0, stack_size_okay);
++  __ delayed()->nop();
++  __ stop("stack size is zero");
++  __ bind(stack_size_okay);
++#endif
++
++  // Add stack base to locals and subtract stack size
++  __ ld(AT, thread, in_bytes(Thread::stack_base_offset())); // stack_base --> AT
++  __ daddu(T3, T3, AT);   // locals * 4 + overhead_size + stack_base--> T3
++  __ ld(AT, thread, in_bytes(Thread::stack_size_offset()));  // stack_size --> AT
++  __ dsubu(T3, T3, AT);  // locals * 4 + overhead_size + stack_base - stack_size --> T3
++
++  // Use the bigger size for banging.
++  const int max_bang_size = (int)MAX2(JavaThread::stack_shadow_zone_size(), JavaThread::stack_guard_zone_size());
++
++  // add in the redzone and yellow size
++  __ move(AT, max_bang_size);
++  __ addu(T3, T3, AT);
++
++  // check against the current stack bottom
++  __ slt(AT, T3, SP);
++  __ bne(AT, R0, after_frame_check);
++  __ delayed()->nop();
++
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  __ move(SP, Rsender);
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jmp(StubRoutines::throw_StackOverflowError_entry(), relocInfo::runtime_call_type);
++  __ delayed()->nop();
++
++  // all done with frame size check
++  __ bind(after_frame_check);
++}
++
++// Allocate monitor and lock method (asm interpreter)
++// Rmethod - Method*
++void TemplateInterpreterGenerator::lock_method(void) {
++  // synchronize method
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  { Label L;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T0, T0, JVM_ACC_SYNCHRONIZED);
++    __ bne(T0, R0, L);
++    __ delayed()->nop();
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // get synchronization object
++  {
++    Label done;
++    __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(T2, T0, JVM_ACC_STATIC);
++    __ ld(T0, LVP, Interpreter::local_offset_in_bytes(0));
++    __ beq(T2, R0, done);
++    __ delayed()->nop();
++    __ load_mirror(T0, Rmethod, T9);
++    __ bind(done);
++  }
++  // add space for monitor & lock
++  __ daddiu(SP, SP, (-1) * entry_size);           // add space for a monitor entry
++  __ sd(SP, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  // set new monitor block top
++  __ sd(T0, SP, BasicObjectLock::obj_offset_in_bytes());   // store object
++  // FIXME: I do not know what lock_object will do and what it will need
++  __ move(c_rarg0, SP);      // object address
++  __ lock_object(c_rarg0);
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {
++
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argumnet word n-1  ] <--- T0(sender's sp)
++  //   ...
++  // [ argument word 0    ] <--- S7
++
++  // initialize fixed part of activation frame
++  // sender's sp in Rsender
++  int i = 0;
++  int frame_size = 10;
++#ifndef CORE
++  ++frame_size;
++#endif
++  __ daddiu(SP, SP, (-frame_size) * wordSize);
++  __ sd(RA, SP, (frame_size - 1) * wordSize);   // save return address
++  __ sd(FP, SP, (frame_size - 2) * wordSize);  // save sender's fp
++  __ daddiu(FP, SP, (frame_size - 2) * wordSize);
++  __ sd(Rsender, FP, (-++i) * wordSize);  // save sender's sp
++  __ sd(R0, FP,(-++i) * wordSize);       //save last_sp as null
++  __ sd(LVP, FP, (-++i) * wordSize);  // save locals offset
++  __ ld(BCP, Rmethod, in_bytes(Method::const_offset())); // get constMethodOop
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset())); // get codebase
++  __ sd(Rmethod, FP, (-++i) * wordSize);                              // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(T2, Rmethod, T9);
++  __ sd(T2, FP, (-++i) * wordSize); // Mirror
++#ifndef CORE
++  if (ProfileInterpreter) {
++    Label method_data_continue;
++    __ ld(AT, Rmethod,  in_bytes(Method::method_data_offset()));
++    __ beq(AT, R0, method_data_continue);
++    __ delayed()->nop();
++    __ daddiu(AT, AT, in_bytes(MethodData::data_offset()));
++    __ bind(method_data_continue);
++    __ sd(AT, FP,  (-++i) * wordSize);
++  } else {
++    __ sd(R0, FP, (-++i) * wordSize);
++  }
++#endif // !CORE
++
++  __ ld(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld(T2, T2, ConstantPool::cache_offset_in_bytes());
++  __ sd(T2, FP, (-++i) * wordSize);                    // set constant pool cache
++  if (native_call) {
++    __ sd(R0, FP, (-++i) * wordSize);          // no bcp
++  } else {
++    __ sd(BCP, FP, (-++i) * wordSize);          // set bcp
++  }
++  __ sd(SP, FP, (-++i) * wordSize);               // reserve word for pointer to expression stack bottom
++  assert(i + 2 == frame_size, "i + 2 should be equal to frame_size");
++}
++
++// End of helpers
++
++// Various method entries
++//------------------------------------------------------------------------------------------------------------------------
++//
++//
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {
++  address entry = __ pc();
++  Label slow_path;
++  __ b(slow_path);
++  __ delayed()->nop();
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  __ delayed()->nop();
++  return entry;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // Rsender: sender's sp
++  // Rmethod: Method*
++  address entry_point = __ pc();
++
++#ifndef CORE
++  const Address invocation_counter(Rmethod,in_bytes(MethodCounters::invocation_counter_offset() +
++  InvocationCounter::counter_offset()));
++#endif
++
++  // get parameter size (always needed)
++  // the size in the java stack
++  __ ld(V0, Rmethod, in_bytes(Method::const_offset()));
++  __ lhu(V0, V0, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // native calls don't need the stack size check since they have no expression stack
++  // and the arguments are already on the stack and we only add a handful of words
++  // to the stack
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (S7)
++  __ dsll(LVP, V0, Address::times_8);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++
++  // add 2 zero-initialized slots for native calls
++  // 1 slot for native oop temp offset (setup via runtime)
++  // 1 slot for static native result handler3 (setup via runtime)
++  __ push2(R0, R0);
++
++  // Layout of frame at this point
++  // [ method holder mirror  ] <--- sp
++  // [ result type info      ]
++  // [ argument word n-1     ] <--- T0
++  //   ...
++  // [ argument word 0      ] <--- LVP
++
++
++#ifndef CORE
++  if (inc_counter) __ lw(T3, invocation_counter);  // (pre-)fetch invocation count
++#endif
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- sender's sp
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_NATIVE);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(AT, T0, JVM_ACC_ABSTRACT);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++#endif // CORE
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ lw(T0, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(AT, T0, JVM_ACC_SYNCHRONIZED);
++      __ beq(AT, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // after method_lock, the layout of frame is as following
++  //
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in asm");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = Rmethod;
++  const Register t      = T8;
++
++  __ get_method(method);
++  {
++    Label L, Lstatic;
++    __ ld(t,method,in_bytes(Method::const_offset()));
++    __ lhu(t, t, in_bytes(ConstMethod::size_of_parameters_offset()));
++    // MIPS n64 ABI: caller does not reserve space for the register auguments.
++    // A0 and A1(if needed)
++    __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, AT, JVM_ACC_STATIC);
++    __ beq(AT, R0, Lstatic);
++    __ delayed()->nop();
++    __ daddiu(t, t, 1);
++    __ bind(Lstatic);
++    __ daddiu(t, t, -7);
++    __ blez(t, L);
++    __ delayed()->nop();
++    __ dsll(t, t, Address::times_8);
++    __ dsubu(SP, SP, t);
++    __ bind(L);
++  }
++  __ move(AT, -(StackAlignmentInBytes));
++  __ andr(SP, SP, AT);
++  __ move(AT, SP);
++  // [        ] <--- sp
++  //   ...                        (size of parameters - 8 )
++  // [ monitor entry            ]
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer (0)    ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Mirror                   ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ method holder mirror     ]
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++  // get signature handler
++  {
++    Label L;
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bne(T9, R0, L);
++    __ delayed()->nop();
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++               InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  // FIXME: when change codes in InterpreterRuntime, note this point
++  // from: begin of parameters
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == LVP, "adjust this code");
++  // to: current sp
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to  () == SP, "adjust this code");
++  // temp: T3
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == t  , "adjust this code");
++
++  __ jalr(T9);
++  __ delayed()->nop();
++  __ get_method(method);
++
++  //
++  // if native function is static, and its second parameter has type length of double word,
++  // and first parameter has type length of word, we have to reserve one word
++  // for the first parameter, according to mips o32 abi.
++  // if native function is not static, and its third parameter has type length of double word,
++  // and second parameter has type length of word, we have to reserve one word for the second
++  // parameter.
++  //
++
++
++  // result handler is in V0
++  // set result handler
++  __ sd(V0, FP, (frame::interpreter_frame_result_handler_offset)*wordSize);
++
++#define FIRSTPARA_SHIFT_COUNT 5
++#define SECONDPARA_SHIFT_COUNT 9
++#define THIRDPARA_SHIFT_COUNT 13
++#define PARA_MASK  0xf
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(AT, t, JVM_ACC_STATIC);
++    __ beq(AT, R0, L);
++    __ delayed()->nop();
++
++    // get mirror
++    __ load_mirror(t, method, T9);
++    // copy mirror into activation frame
++    __ sd(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    // pass handle to mirror
++    __ daddiu(t, FP, frame::interpreter_frame_oop_temp_offset * wordSize);
++    __ move(A1, t);
++    __ bind(L);
++  }
++
++  // [ mthd holder mirror ptr   ] <--- sp  --------------------| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters(or +1)    |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // get native function entry point
++  { Label L;
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ li(V1, SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ bne(V1, T9, L);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::prepare_native_call), method);
++    __ get_method(method);
++    __ ld(T9, method, in_bytes(Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  // native function in T9
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ daddiu(t, thread, in_bytes(JavaThread::jni_environment_offset()));
++  __ move(A0, t);
++  // [ jni environment          ] <--- sp
++  // [ mthd holder mirror ptr   ] ---------------------------->| (only for static method)
++  // [                          ]                              |
++  //   ...                        size of parameters           |
++  // [ monitor entry            ]                              |
++  //   ...                                                     |
++  // [ monitor entry            ]                              |
++  // [ monitor block top        ] ( the top monitor entry )    |
++  // [ byte code pointer (0)    ] (if native, bcp = 0)         |
++  // [ constant pool cache      ]                              |
++  // [ Mirror                   ]                              |
++  // [ Method*                  ]                              |
++  // [ locals offset            ]                              |
++  // [ sender's sp              ]                              |
++  // [ sender's fp              ]                              |
++  // [ return address           ] <--- fp                      |
++  // [ method holder mirror     ] <----------------------------|
++  // [ result type info         ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- S7
++
++  // set_last_Java_frame_before_call
++  __ sd(FP, thread, in_bytes(JavaThread::last_Java_fp_offset()));
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  __ li(t, __ pc());
++  __ sd(t, thread, in_bytes(JavaThread::last_Java_pc_offset()));
++  __ sd(SP, thread, in_bytes(JavaThread::last_Java_sp_offset()));
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ lw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++    __ daddiu(t, t, (-1) * _thread_in_Java);
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  __ move(t, _thread_in_native);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  // call native method
++  __ jalr(T9);
++  __ delayed()->nop();
++  // result potentially in V0 or F0
++
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push(dtos);
++  __ push(ltos);
++
++  // change thread state
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(t, _thread_in_native_trans);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ sync();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(thread, A0);
++    }
++  }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  { Label Continue;
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if BCP & LVP are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    Label slow_path;
++
++    __ safepoint_poll_acquire(slow_path, thread);
++    __ lw(AT, thread, in_bytes(JavaThread::suspend_flags_offset()));
++    __ beq(AT, R0, Continue);
++    __ delayed()->nop();
++    __ bind(slow_path);
++    __ move(A0, thread);
++    __ call(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans),
++                             relocInfo::runtime_call_type);
++    __ delayed()->nop();
++
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ move(t, _thread_in_Java);
++  if(os::is_MP()) {
++    __ sync(); // store release
++  }
++  __ sw(t, thread, in_bytes(JavaThread::thread_state_offset()));
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ sd(R0, thread, in_bytes(JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ld(t, thread, in_bytes(JavaThread::active_handles_offset()));
++  __ sw(R0, t, JNIHandleBlock::top_offset_in_bytes());
++
++  // If result was an oop then unbox and save it in the frame
++  {
++    Label no_oop;
++    //FIXME, addi only support 16-bit imeditate
++    __ ld(AT, FP, frame::interpreter_frame_result_handler_offset*wordSize);
++    __ li(T0, AbstractInterpreter::result_handler(T_OBJECT));
++    __ bne(AT, T0, no_oop);
++    __ delayed()->nop();
++    __ pop(ltos);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(V0, thread, T9);
++    __ sd(V0, FP, (frame::interpreter_frame_oop_temp_offset)*wordSize);
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(ltos);
++    __ bind(no_oop);
++  }
++  {
++    Label no_reguard;
++    __ lw(t, thread, in_bytes(JavaThread::stack_guard_state_offset()));
++    __ move(AT, (u1)JavaThread::stack_guard_yellow_reserved_disabled);
++    __ bne(t, AT, no_reguard);
++    __ delayed()->nop();
++    __ pushad();
++    __ move(S5_heapbase, SP);
++    __ move(AT, -StackAlignmentInBytes);
++    __ andr(SP, SP, AT);
++    __ call(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages), relocInfo::runtime_call_type);
++    __ delayed()->nop();
++    __ move(SP, S5_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ bind(no_reguard);
++  }
++  // restore BCP to have legal interpreter frame,
++  // i.e., bci == 0 <=> BCP == code_base()
++  // Can't call_VM until bcp is within reasonable.
++  __ get_method(method);      // method is junk from thread_in_native to now.
++  __ ld(BCP, method, in_bytes(Method::const_offset()));
++  __ lea(BCP, Address(BCP, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ ld(t, thread, in_bytes(Thread::pending_exception_offset()));
++    __ beq(t, R0, L);
++    __ delayed()->nop();
++    // Note: At some point we may want to unify this with the code used in
++    // call_VM_base();
++    // i.e., we should use the StubRoutines::forward_exception code. For now this
++    // doesn't work here because the sp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here();
++    __ bind(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ lw(t, method, in_bytes(Method::access_flags_offset()));
++    __ andi(t, t, JVM_ACC_SYNCHRONIZED);
++    __ beq(t, R0, L);
++    // the code below should be shared with interpreter macro assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list,
++      // since this is a synchronized method. However, need
++      // to check that the object has not been unlocked by
++      // an explicit monitorexit bytecode.
++      __ delayed()->daddiu(c_rarg0, FP, frame::interpreter_frame_initial_sp_offset * wordSize - (int)sizeof(BasicObjectLock));
++      // address of first monitor
++
++      __ ld(t, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++      __ bne(t, R0, unlock);
++      __ delayed()->nop();
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here();
++
++      __ bind(unlock);
++      __ unlock_object(c_rarg0);
++    }
++    __ bind(L);
++  }
++
++  // jvmti/jvmpi support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);
++
++  // restore potential result in V0,
++  // call result handler to restore potential result in ST0 & handle result
++
++  __ pop(ltos);
++  __ pop(dtos);
++
++  __ ld(t, FP, (frame::interpreter_frame_result_handler_offset) * wordSize);
++  __ jalr(t);
++  __ delayed()->nop();
++
++
++  // remove activation
++  __ ld(SP, FP, frame::interpreter_frame_sender_sp_offset * wordSize); // get sender sp
++  __ ld(RA, FP, frame::interpreter_frame_return_addr_offset * wordSize); // get return address
++  __ ld(FP, FP, frame::interpreter_frame_sender_fp_offset * wordSize); // restore sender's fp
++  __ jr(RA);
++  __ delayed()->nop();
++
++#ifndef CORE
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++    // entry_point is the beginning of this
++    // function and checks again for compiled code
++  }
++#endif
++  return entry_point;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  if (UseStackBanging) {
++    const int page_size = os::vm_page_size();
++    const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size;
++    const int start_page = native_call ? n_shadow_pages : 1;
++    BLOCK_COMMENT("bang_stack_shadow_pages:");
++    for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++      __ bang_stack_with_offset(pages*page_size);
++    }
++  }
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++// Layout of frame just at the entry
++//
++//   [ argument word n-1  ] <--- sp
++//     ...
++//   [ argument word 0    ]
++// assume Method* in Rmethod before call this method.
++// prerequisites to the generated stub : the callee Method* in Rmethod
++// note you must save the caller bcp before call the generated stub
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // Rmethod: Method*
++  // Rsender: sender 's sp
++  address entry_point = __ pc();
++
++  const Address invocation_counter(Rmethod,
++      in_bytes(MethodCounters::invocation_counter_offset() + InvocationCounter::counter_offset()));
++
++  // get parameter size (always needed)
++  __ ld(T3, Rmethod, in_bytes(Method::const_offset()));  //T3 --> Rmethod._constMethod
++  __ lhu(V0, T3, in_bytes(ConstMethod::size_of_parameters_offset()));
++
++  // Rmethod: Method*
++  // V0: size of parameters
++  // Rsender: sender 's sp ,could be different frome sp+ wordSize if we call via c2i
++  // get size of locals in words to T2
++  __ lhu(T2, T3, in_bytes(ConstMethod::size_of_locals_offset()));
++  // T2 = no. of additional locals, locals include parameters
++  __ dsubu(T2, T2, V0);
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  // Layout of frame at this point
++  //
++  // [ argument word n-1  ] <--- sp
++  //   ...
++  // [ argument word 0    ]
++  generate_stack_overflow_check();
++  // after this function, the layout of frame does not change
++
++  // compute beginning of parameters (LVP)
++  __ dsll(LVP, V0, LogBytesPerWord);
++  __ daddiu(LVP, LVP, (-1) * wordSize);
++  __ daddu(LVP, LVP, SP);
++
++  // T2 - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ beq(T2, R0, exit);
++    __ delayed()->nop();
++
++    __ bind(loop);
++    __ daddiu(SP, SP, (-1) * wordSize);
++    __ daddiu(T2, T2, -1);               // until everything initialized
++    __ bne(T2, R0, loop);
++    __ delayed()->sd(R0, SP, 0);     // initialize local variables
++
++    __ bind(exit);
++  }
++
++  //
++  // [ local var m-1      ] <--- sp
++  //   ...
++  // [ local var 0        ]
++  // [ argument word n-1  ] <--- T0?
++  //   ...
++  // [ argument word 0    ] <--- LVP
++
++  // initialize fixed part of activation frame
++
++  generate_fixed_frame(false);
++
++
++  // after this function, the layout of frame is as following
++  //
++  // [ monitor block top        ] <--- sp ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ] <--- fp
++  // [ return address           ]
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ld(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_NATIVE);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ andi(T2, AT, JVM_ACC_ABSTRACT);
++    __ beq(T2, R0, L);
++    __ delayed()->nop();
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++
++#ifndef OPT_THREAD
++  Register thread = T8;
++  __ get_thread(thread);
++#else
++  Register thread = TREG;
++#endif
++  __ move(AT, (int)true);
++  __ sb(AT, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++#ifndef CORE
++
++  // mdp : T8
++  // tmp1: T9
++  // tmp2: T2
++   __ profile_parameters_type(T8, T9, T2);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++#endif // CORE
++
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sb(R0, thread, in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  //
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { Label L;
++      __ lw(AT, Rmethod, in_bytes(Method::access_flags_offset()));
++      __ andi(T2, AT, JVM_ACC_SYNCHRONIZED);
++      __ beq(T2, R0, L);
++      __ delayed()->nop();
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // layout of frame after lock_method
++  // [ monitor entry            ] <--- sp
++  //   ...
++  // [ monitor entry            ]
++  // [ monitor block top        ] ( the top monitor entry )
++  // [ byte code pointer        ] (if native, bcp = 0)
++  // [ constant pool cache      ]
++  // [ Method*                  ]
++  // [ locals offset            ]
++  // [ sender's sp              ]
++  // [ sender's fp              ]
++  // [ return address           ] <--- fp
++  // [ local var m-1            ]
++  //   ...
++  // [ local var 0              ]
++  // [ argumnet word n-1        ] <--- ( sender's sp )
++  //   ...
++  // [ argument word 0          ] <--- LVP
++
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    __ ld(AT, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ beq(AT, SP, L);
++    __ delayed()->nop();
++    __ stop("broken stack frame setup in interpreter in native");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti/jvmpi support
++  __ notify_method_entry();
++
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                 InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(Rmethod);
++      __ b(profile_method_continue);
++      __ delayed()->nop();
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ sd(R0,FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++  // V0: exception
++  // V1: return address/pc that threw exception
++  __ restore_bcp();                              // BCP points to call/send
++  __ restore_locals();
++
++  //add for compressedoops
++  __ reinit_heapbase();
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // BCP: exception bcp
++  __ verify_oop(V0);
++
++  // expression stack must be empty before entering the VM in case of an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ move(A1, V0);
++  __ call_VM(V1, CAST_FROM_FN_PTR(address, InterpreterRuntime::exception_handler_for_exception), A1);
++  // V0: exception handler entry point
++  // V1: preserved exception oop
++  // S0: bcp for exception handler
++  __ push(V1);                                 // push exception which is now the only value on the stack
++  __ jr(V0);                                   // jump to exception handler (may be _remove_activation_entry!)
++  __ delayed()->nop();
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // BCP: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition indicating that we are
++  // currently handling popframe, so that call_VMs that may happen later do not trigger new
++  // popframe handling cycles.
++#ifndef OPT_THREAD
++  Register thread = T2;
++  __ get_thread(T2);
++#else
++  Register thread = TREG;
++#endif
++  __ lw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++  __ ori(T3, T3, JavaThread::popframe_processing_bit);
++  __ sw(T3, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#ifndef CORE
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    __ ld(A0, FP, frame::return_addr_offset * wordSize);
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::interpreter_contains), A0);
++    __ bne(V0, R0, caller_not_deoptimized);
++    __ delayed()->nop();
++
++    // Compute size of arguments for saving when returning to deoptimized caller
++    __ get_method(A1);
++    __ verify_oop(A1);
++    __ ld( A1, A1, in_bytes(Method::const_offset()));
++    __ lhu(A1, A1, in_bytes(ConstMethod::size_of_parameters_offset()));
++    __ shl(A1, Interpreter::logStackElementSize);
++    __ restore_locals();
++    __ dsubu(A2, LVP, A1);
++    __ daddiu(A2, A2, wordSize);
++    // Save these arguments
++#ifndef OPT_THREAD
++    __ get_thread(A0);
++#else
++    __ move(A0, TREG);
++#endif
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, Deoptimization::popframe_preserve_args), A0, A1, A2);
++
++    __ remove_activation(vtos, T9, false, false, false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++#ifndef OPT_THREAD
++    __ get_thread(thread);
++#endif
++    __ move(AT, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jr(T9);
++    __ delayed()->nop();
++
++    __ bind(caller_not_deoptimized);
++  }
++#endif /* !CORE */
++
++  __ remove_activation(vtos, T3,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Clear the popframe condition flag
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  __ move(T8, SP);
++  __ ld(A2, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  // PC must point into interpreter here
++  __ set_last_Java_frame(thread, noreg, FP, __ pc());
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), thread, T8, A2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ld(SP, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++  __ sd(R0, FP, frame::interpreter_frame_last_sp_offset * wordSize);
++
++
++
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++  // Finish with popframe handling
++  __ restore_bcp();
++  __ restore_locals();
++#ifndef CORE
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++#endif // !CORE
++  // Clear the popframe condition flag
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ move(AT, JavaThread::popframe_inactive);
++  __ sw(AT, thread, in_bytes(JavaThread::popframe_condition_offset()));
++
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++
++    __ lbu(AT, BCP, 0);
++    __ daddiu(AT, AT, -1 * Bytecodes::_invokestatic);
++    __ bne(AT, R0, L_done);
++    __ delayed()->nop();
++
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(T9);
++    __ ld(T8, LVP, 0);
++    __ call_VM(T8, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), T8, T9, BCP);
++
++    __ beq(T8, R0, L_done);
++    __ delayed()->nop();
++
++    __ sd(T8, SP, 0);
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop(T0);
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  __ sd(T0, thread, in_bytes(JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, T3, false, true, false);
++  // restore exception
++  __ get_vm_result(T0, thread);
++  __ verify_oop(T0);
++
++  // In between activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++  // T0: exception
++  // T1: return address/pc that threw exception
++  // SP: expression stack of caller
++  // FP: fp of caller
++  __ push2(T0, T3);             // save exception and return address
++  __ move(A1, T3);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), thread, A1);
++  __ move(T9, V0);                             // save exception handler
++  __ pop2(V0, V1);                   // restore return address and exception
++
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jr(T9);                                   // jump to exception handler of caller
++  __ delayed()->nop();
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {
++  address entry = __ pc();
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++#ifndef OPT_THREAD
++  __ get_thread(TREG);
++#endif
++  __ ld_ptr(T9, TREG, in_bytes(JavaThread::jvmti_thread_state_offset()));
++  const Address cond_addr(T9, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ move(AT, JvmtiThreadState::earlyret_inactive);
++  __ sw(AT, cond_addr);
++  __ sync();
++
++
++  __ remove_activation(state, T0,
++                         false, /* throw_monitor_exception */
++                         false, /* install_monitor_exception */
++                         true); /* notify_jvmdi */
++  __ sync();
++  __ jr(T0);
++  __ delayed()->nop();
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ b(L); __ delayed()->nop();
++  dep = __ pc(); __ push(dtos); __ b(L); __ delayed()->nop();
++  lep = __ pc(); __ push(ltos); __ b(L); __ delayed()->nop();
++  aep  =__ pc(); __ push(atos); __ b(L); __ delayed()->nop();
++  bep = cep = sep =
++  iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ bind(L);
++  generate_and_dispatch(t);
++}
++
++
++/*
++//-----------------------------------------------------------------------------
++// Generation of individual instructions
++
++// helpers for generate_and_dispatch
++
++
++InterpreterGenerator::InterpreterGenerator(StubQueue* code)
++  : TemplateInterpreterGenerator(code) {
++   generate_all(); // down here so it can be "virtual"
++}
++*/
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but mips o32 call convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ld(A2, SP, 0);
++  __ ld(A3, SP, 1 * wordSize);
++
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ move(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ jr(RA);
++  __ delayed()->nop();
++
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {
++  __ li(T8, (long)&BytecodeCounter::_counter_value);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {
++  __ li(T8, (long)&BytecodeHistogram::_counters[t->bytecode()]);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ lw(T9, T8, 0);
++  __ dsrl(T9, T9, BytecodePairHistogram::log2_number_of_codes);
++  __ li(T8, ((long)t->bytecode()) << BytecodePairHistogram::log2_number_of_codes);
++  __ orr(T9, T9, T8);
++  __ li(T8, (long)&BytecodePairHistogram::_index);
++  __ sw(T9, T8, 0);
++  __ dsll(T9, T9, 2);
++  __ li(T8, (long)BytecodePairHistogram::_counters);
++  __ daddu(T8, T8, T9);
++  __ lw(AT, T8, 0);
++  __ daddiu(AT, AT, 1);
++  __ sw(AT, T8, 0);
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++
++  address entry = Interpreter::trace_code(t->tos_in());
++  assert(entry != NULL, "entry must have been generated");
++  __ call(entry, relocInfo::none);
++  __ delayed()->nop();
++  //add for compressedoops
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {
++  Label L;
++  __ li(T8, long(&BytecodeCounter::_counter_value));
++  __ lw(T8, T8, 0);
++  __ move(AT, StopInterpreterAt);
++  __ bne(T8, AT, L);
++  __ delayed()->nop();
++  __ brk(5);
++  __ delayed()->nop();
++  __ bind(L);
++}
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/mips/templateTable_mips.hpp b/src/hotspot/cpu/mips/templateTable_mips.hpp
+new file mode 100644
+index 0000000000..46a88aba26
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateTable_mips.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++#define CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
++
++  static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_MIPS_VM_TEMPLATETABLE_MIPS_64_HPP
+diff --git a/src/hotspot/cpu/mips/templateTable_mips_64.cpp b/src/hotspot/cpu/mips/templateTable_mips_64.cpp
+new file mode 100644
+index 0000000000..5265483830
+--- /dev/null
++++ b/src/hotspot/cpu/mips/templateTable_mips_64.cpp
+@@ -0,0 +1,4688 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++
++
++#ifndef CC_INTERP
++
++#define __ _masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++// Platform-dependent initialization
++
++void TemplateTable::pd_initialize() {
++  // No mips specific initialization
++}
++
++// Address computation: local variables
++
++static inline Address iaddress(int n) {
++  return Address(LVP, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++static inline Address haddress(int n)            { return iaddress(n + 0); }
++
++
++static inline Address at_sp()             {  return Address(SP,   0); }
++static inline Address at_sp_p1()          { return Address(SP,  1 * wordSize); }
++static inline Address at_sp_p2()          { return Address(SP,  2 * wordSize); }
++
++// At top of Java expression stack which may be different than sp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  Address tos = Address(SP,  Interpreter::expr_offset_in_bytes(0));
++  return tos;
++}
++
++static inline Address at_tos_p1() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(2));
++}
++
++static inline Address at_tos_p3() {
++  return Address(SP,  Interpreter::expr_offset_in_bytes(3));
++}
++
++// we use S0 as bcp, be sure you have bcp in S0 before you call any of the Template generator
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(BCP, offset);
++}
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T9, T1, decorators);
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T9, T1, decorators);
++}
++
++// bytecode folding
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register tmp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(tmp_reg, bc_reg, tmp_reg, byte_no, 1);
++      __ daddiu(bc_reg, R0, bc);
++      __ beq(tmp_reg, R0, L_patch_done);
++      __ delayed()->nop();
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ move(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ lbu(tmp_reg, at_bcp(0));
++    __ move(AT, Bytecodes::_breakpoint);
++    __ bne(tmp_reg, AT, L_fast_patch);
++    __ delayed()->nop();
++
++    __ get_method(tmp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::set_original_bytecode_at), tmp_reg, BCP, bc_reg);
++
++    __ b(L_patch_done);
++    __ delayed()->nop();
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ lbu(tmp_reg, at_bcp(0));
++  __ move(AT, (int)Bytecodes::java_code(bc));
++  __ beq(tmp_reg, AT, L_okay);
++  __ delayed()->nop();
++  __ beq(tmp_reg, bc_reg, L_patch_done);
++  __ delayed()->nop();
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ sb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++
++
++// Individual instructions
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ move(FSR, R0);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ move(FSR, R0);
++  } else {
++    __ move(FSR, value);
++  }
++}
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  switch( value ) {
++    case 0:  __ mtc1(R0, FSF);    return;
++    case 1:  __ addiu(AT, R0, 1); break;
++    case 2:  __ addiu(AT, R0, 2); break;
++    default: ShouldNotReachHere();
++  }
++  __ mtc1(AT, FSF);
++  __ cvt_s_w(FSF, FSF);
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  switch( value ) {
++    case 0:  __ dmtc1(R0, FSF);
++             return;
++    case 1:  __ daddiu(AT, R0, 1);
++             __ dmtc1(AT, FSF);
++             __ cvt_d_w(FSF, FSF);
++             break;
++    default: ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ lb(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {
++  transition(vtos, itos);
++  __ lb(FSR, BCP, 1);
++  __ lbu(AT, BCP, 2);
++  __ dsll(FSR, FSR, 8);
++  __ orr(FSR, FSR, AT);
++}
++
++// T1 : tags
++// T2 : index
++// T3 : cpool
++// T8 : tag
++void TemplateTable::ldc(bool wide) {
++  transition(vtos, vtos);
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  // get index in cpool
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ lbu(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  if (UseLEXT1 && Assembler::is_simm(sizeof(tags_offset), 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  //now T1 is the tag
++
++  // unresolved class - get the resolved class
++  __ daddiu(AT, T1, - JVM_CONSTANT_UnresolvedClass);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // unresolved class in error (resolution failed) - call into runtime
++  // so that the same error from first resolution attempt is thrown.
++  __ daddiu(AT, T1, -JVM_CONSTANT_UnresolvedClassInError);
++  __ beq(AT, R0, call_ldc);
++  __ delayed()->nop();
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ daddiu(AT, T1, - JVM_CONSTANT_Class);
++  __ bne(AT, R0, notClass);
++  __ delayed()->dsll(T2, T2, Address::times_8);
++
++  __ bind(call_ldc);
++  __ move(A1, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), A1);
++  //__ push(atos);
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->sd(FSR, SP, 0); // added for performance issue
++
++  __ bind(notClass);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Float);
++  __ bne(AT, R0, notFloat);
++  __ delayed()->nop();
++  // ftos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ lwc1(FSF, AT, base_offset);
++  }
++  //__ push_f();
++  __ daddiu(SP, SP, - Interpreter::stackElementSize);
++  __ b(Done);
++  __ delayed()->swc1(FSF, SP, 0);
++
++  __ bind(notFloat);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Integer);
++  __ bne(AT, R0, notInt);
++  __ delayed()->nop();
++  // itos
++  if (UseLEXT1 && Assembler::is_simm(sizeof(base_offset), 8)) {
++    __ gslwx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(T0, T3, T2);
++    __ lw(FSR, T0, base_offset);
++  }
++  __ push(itos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {
++  const Register obj = FSR;
++  const Register off = SSR;
++  const Register flags = T3;
++  const Register rarg = A1;
++  __ move(rarg, (int)bytecode());
++  __ call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, TREG);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ andi(off, flags, ConstantPoolCacheEntry::field_index_mask);
++  __ daddu(obj, off, obj);
++  const Address field(obj, 0 * wordSize);
++
++  // What sort of thing are we loading?
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ daddiu(AT, flags, -itos);
++      __ bne(AT, R0, notInt);
++      __ delayed()->nop();
++      // itos
++      __ ld(obj, field);
++      __ push(itos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notInt);
++      __ daddiu(AT, flags, -ftos);
++      __ bne(AT, R0, notFloat);
++      __ delayed()->nop();
++      // ftos
++      __ lwc1(FSF, field);
++      __ push(ftos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notFloat);
++      __ daddiu(AT, flags, -stos);
++      __ bne(AT, R0, notShort);
++      __ delayed()->nop();
++      // stos
++      __ lh(obj, field);
++      __ push(stos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notShort);
++      __ daddiu(AT, flags, -btos);
++      __ bne(AT, R0, notByte);
++      __ delayed()->nop();
++      // btos
++      __ lb(obj, field);
++      __ push(btos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notByte);
++      __ daddiu(AT, flags, -ctos);
++      __ bne(AT, R0, notChar);
++      __ delayed()->nop();
++      // ctos
++      __ lhu(obj, field);
++      __ push(ctos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notChar);
++      __ daddiu(AT, flags, -ztos);
++      __ bne(AT, R0, notBool);
++      __ delayed()->nop();
++      // ztos
++      __ lbu(obj, field);
++      __ push(ztos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ daddiu(AT, flags, -ltos);
++      __ bne(AT, R0, notLong);
++      __ delayed()->nop();
++      // ltos
++      __ ld(obj, field);
++      __ push(ltos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notLong);
++      __ daddiu(AT, flags, -dtos);
++      __ bne(AT, R0, notDouble);
++      __ delayed()->nop();
++      // dtos
++      __ ldc1(FSF, field);
++      __ push(dtos);
++      __ b(Done);
++      __ delayed()->nop();
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = A1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp, T9);
++  __ bne(result, R0, resolved);
++  __ delayed()->nop();
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  // first time invocation - must resolve first
++  int i = (int)bytecode();
++  __ move(rarg, i);
++  __ call_VM(result, entry, rarg);
++
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    __ set64(rarg, (long)Universe::the_null_sentinel_addr());
++    __ ld_ptr(tmp, Address(rarg));
++    __ bne(tmp, result, notNull);
++    __ delayed()->nop();
++    __ xorr(result, result, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++
++// used register: T2, T3, T1
++// T2 : index
++// T3 : cpool
++// T1 : tag
++void TemplateTable::ldc2_w() {
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++
++  // get index in cpool
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++
++  __ get_cpool_and_tags(T3, T1);
++
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type in T1
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(T1, T1, T2, tags_offset);
++  } else {
++    __ daddu(AT, T1, T2);
++    __ lb(T1, AT, tags_offset);
++  }
++
++  __ daddiu(AT, T1, -JVM_CONSTANT_Double);
++  __ bne(AT, R0, notDouble);
++  __ delayed()->nop();
++
++  // dtos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldxc1(FSF, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ldc1(FSF, AT, base_offset);
++  }
++  __ push(dtos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ daddiu(AT, T1, -JVM_CONSTANT_Long);
++  __ bne(AT, R0, notLong);
++  __ delayed()->nop();
++
++  // ltos
++  __ dsll(T2, T2, Address::times_8);
++  if (UseLEXT1 && Assembler::is_simm(base_offset, 8)) {
++    __ gsldx(FSR, T3, T2, base_offset);
++  } else {
++    __ daddu(AT, T3, T2);
++    __ ld(FSR, AT, base_offset);
++  }
++  __ push(ltos);
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// we compute the actual local variable address here
++// the x86 dont do so for it has scaled index memory access model, we dont have, so do here
++void TemplateTable::locals_index(Register reg, int offset) {
++  __ lbu(reg, at_bcp(offset));
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++void TemplateTable::iload() {
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++// this method will do bytecode folding of the two form:
++// iload iload      iload caload
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::iload_internal(RewriteControl rc) {
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ move(AT, Bytecodes::_iload);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    __ move(T3, Bytecodes::_fast_iload2);
++    __ move(AT, Bytecodes::_fast_iload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _caload, rewrite to fast_icaload
++    __ move(T3, Bytecodes::_fast_icaload);
++    __ move(AT, Bytecodes::_caload);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // rewrite so iload doesn't check again.
++    __ move(T3, Bytecodes::_fast_iload);
++
++    // rewrite
++    // T3 : fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, T3, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload2() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ push(itos);
++  locals_index(T2, 3);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fast_iload() {
++  transition(vtos, itos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::lload() {
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::fload() {
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::dload() {
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::aload() {
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ld(FSR, T2, 0);
++}
++
++void TemplateTable::locals_index_wide(Register reg) {
++  __ get_unsigned_2_byte_index_at_bcp(reg, 2);
++  __ dsll(reg, reg, Address::times_8);
++  __ dsubu(reg, LVP, reg);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ lwc1(FSF, T2, 0);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ ldc1(FSF, T2, -wordSize);
++}
++
++// used register T2
++// T2 : index
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ld(FSR, T2, 0);
++}
++
++// we use A2 as the regiser for index, BE CAREFUL!
++// we dont use our tge 29 now, for later optimization
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // sign extend since tos (index) might contain garbage in upper bits
++  __ sll(index, index, 0);
++
++  // check index
++  Label ok;
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++#ifndef OPT_RANGECHECK
++  __ sltu(AT, index, AT);
++  __ bne(AT, R0, ok);
++  __ delayed()->nop();
++
++  //throw_ArrayIndexOutOfBoundsException assume abberrant index in A2
++  if (A1 != array) __ move(A1, array);
++  if (A2 != index) __ move(A2, index);
++  __ jmp(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry);
++  __ delayed()->nop();
++  __ bind(ok);
++#else
++  __ lw(AT, array, arrayOopDesc::length_offset_in_bytes());
++  __ move(A2, index);
++  __ tgeu(A2, AT, 29);
++#endif
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));
++
++    __ warn("iaload Unimplemented yet");
++    __ gslwle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_INT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_8);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);
++
++    __ warn("laload Unimplemented yet");
++    __ gsldle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, Address::times_8);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, FSR, Address(T9, arrayOopDesc::base_offset_in_bytes(T_LONG)), noreg, noreg);
++  }
++}
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ shl(AT, 2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));
++
++    __ warn("faload Unimplemented yet");
++    __ gslwlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ shl(FSR, 2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, 3);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, 3);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);
++
++    __ warn("daload Unimplemented yet");
++    __ gsldlec1(FSF, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(AT, FSR, 3);
++    __ daddu(T9, SSR, AT);
++    __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg, Address(T9, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, UseCompressedOops ? Address::times_4 : Address::times_8);
++  __ daddu(FSR, SSR, FSR);
++  //add for compressedoops
++  do_oop_load(_masm,
++              Address(FSR, arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array   FSR:index
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //base
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE)); //bound
++
++    __ warn("baload Unimplemented yet");
++    __ gslble(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), noreg, noreg);
++  }
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, Address::times_2);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++// used register : T2
++// T2 : index
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  index_check(SSR, FSR);
++  __ dsll(FSR, FSR, 1);
++  __ daddu(FSR, SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), noreg, noreg);
++}
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  if(UseBoundCheckInstruction) {
++    __ pop(SSR); //SSR:array    FSR： index
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ addiu(FSR, FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ lw(AT, SSR, arrayOopDesc::length_offset_in_bytes());  //bound
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, SSR, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_SHORT));
++
++    __ warn("saload Unimplemented yet");
++    __ gslhle(FSR, FSR, AT);
++  } else {
++    index_check(SSR, FSR);
++    __ dsll(FSR, FSR, Address::times_2);
++    __ daddu(FSR, SSR, FSR);
++    __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR, Address(FSR, arrayOopDesc::base_offset_in_bytes(T_SHORT)), noreg, noreg);
++  }
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ lw(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ld(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ lwc1(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ ldc1(FSF, laddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ld(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++// used register : T2, T3
++// T2 : bytecode
++// T3 : folded code
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    // get the next bytecode in T2
++    __ lbu(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // do actual aload_0
++    aload(0);
++
++    // if _getfield then wait with rewrite
++    __ move(AT, Bytecodes::_getfield);
++    __ beq(AT, T2, done);
++    __ delayed()->nop();
++
++    // if _igetfield then reqrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_iaccess_0);
++    __ move(AT, Bytecodes::_fast_igetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _agetfield then reqrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aaccess_0);
++    __ move(AT, Bytecodes::_fast_agetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // if _fgetfield then reqrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_faccess_0);
++    __ move(AT, Bytecodes::_fast_fgetfield);
++    __ beq(AT, T2, rewrite);
++    __ delayed()->nop();
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) ==
++        Bytecodes::_aload_0,
++        "fix bytecode definition");
++    __ move(T3, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, T3, T2, false);
++
++    __ bind(done);
++  } else {
++    aload(0);
++  }
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ swc1(FSF, T2, 0);
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ sdc1(FSF, T2, -wordSize);
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, -wordSize);
++}
++
++void TemplateTable::wide_fstore() {
++  wide_istore();
++}
++
++void TemplateTable::wide_dstore() {
++  wide_lstore();
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ sd(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);   // T2: array  SSR: index
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_INT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_INT));  //bound
++
++    __ warn("iastore Unimplemented yet");
++    __ gsswle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);  // prefer index in SSR
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_INT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_INT)), FSR, noreg, noreg);
++  }
++}
++
++
++
++// used register T2, T3
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_LONG) + 0 * wordSize);  //bound
++
++    __ warn("lastore Unimplemented yet");
++    __ gssdle(FSR, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_LONG)), FSR, noreg, noreg);
++  }
++}
++
++// used register T2
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_4);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_FLOAT));  //bound
++
++    __ warn("fastore Unimplemented yet");
++    __ gsswlec1(FSF, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_4);
++    __ daddu(T2, T2, SSR);
++    __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY, Address(T2, arrayOopDesc::base_offset_in_bytes(T_FLOAT)), noreg, noreg, noreg);
++  }
++}
++
++// used register T2, T3
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i (T2);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T3);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T2, T3, T2);
++    __ addiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  // base
++
++    __ lw(AT, T3, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(AT, T3, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_DOUBLE) + 0 * wordSize);  //bound
++
++    __ warn("dastore Unimplemented yet");
++    __ gssdlec1(FSF, T2, AT);
++  } else {
++    index_check(T3, T2);
++    __ dsll(T2, T2, Address::times_8);
++    __ daddu(T3, T3, T2);
++    __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY, Address(T3, arrayOopDesc::base_offset_in_bytes(T_DOUBLE)), noreg, noreg, noreg);
++  }
++}
++
++// used register : T2, T3, T8
++// T2 : array
++// T3 : subklass
++// T8 : supklass
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ld(FSR, at_tos());     // Value
++  __ lw(SSR, at_tos_p1());  // Index
++  __ ld(T2, at_tos_p2());  // Array
++
++  // index_check(T2, SSR);
++  index_check_without_pop(T2, SSR);
++  // do array store check - check for NULL value first
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Move subklass into T3
++  //add for compressedoops
++  __ load_klass(T3, FSR);
++  // Move superklass into T8
++  //add for compressedoops
++  __ load_klass(T8, T2);
++  __ ld(T8, Address(T8,  ObjArrayKlass::element_klass_offset()));
++  // Compress array+index*4+12 into a single register. T2
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  __ daddiu(T2, T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  // Generate subtype check.
++  // Superklass in T8.  Subklass in T3.
++  __ gen_subtype_check(T8, T3, ok_is_subtype);
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ArrayStoreException_entry);
++  __ delayed()->nop();
++  // Come here on success
++  __ bind(ok_is_subtype);
++  do_oop_store(_masm, Address(T2, 0), FSR, IS_ARRAY);
++  __ b(done);
++  __ delayed()->nop();
++
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T9);
++  __ dsll(AT, SSR, UseCompressedOops? Address::times_4 : Address::times_8);
++  __ daddu(T2, T2, AT);
++  do_oop_store(_masm, Address(T2, arrayOopDesc::base_offset_in_bytes(T_OBJECT)), noreg, IS_ARRAY);
++
++  __ bind(done);
++  __ daddiu(SP, SP, 3 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    guarantee(false, "unimplemented yet!");
++    __ pop_ptr(T2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_BYTE));  //bound
++
++    __ warn("bastore Unimplemented yet");
++    __ gssble(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++
++    // Need to check whether array is boolean or byte
++    // since both types share the bastore bytecode.
++    __ load_klass(T9, T2);
++    __ lw(T9, T9, in_bytes(Klass::layout_helper_offset()));
++
++    int diffbit = Klass::layout_helper_boolean_diffbit();
++    __ move(AT, diffbit);
++
++    Label L_skip;
++    __ andr(AT, T9, AT);
++    __ beq(AT, R0, L_skip);
++    __ delayed()->nop();
++    __ andi(FSR, FSR, 0x1);
++    __ bind(L_skip);
++
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_BYTE)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  if(UseBoundCheckInstruction) {
++    __ pop_ptr(T2);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ addiu(SSR, SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR));  // base
++
++    __ lw(AT, T2, arrayOopDesc::length_offset_in_bytes());
++    __ dsll(AT, AT, Address::times_2);
++    __ daddu(AT, T2, AT);
++    __ addiu(AT, AT, arrayOopDesc::base_offset_in_bytes(T_CHAR));  //bound
++
++    __ warn("castore Unimplemented yet");
++    __ gsshle(FSR, SSR, AT);
++  } else {
++    index_check(T2, SSR);
++    __ dsll(SSR, SSR, Address::times_2);
++    __ daddu(SSR, T2, SSR);
++    __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY, Address(SSR, arrayOopDesc::base_offset_in_bytes(T_CHAR)), FSR, noreg, noreg);
++  }
++}
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ sw(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ sd(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ swc1(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ sdc1(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ sd(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, Interpreter::stackElementSize);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ daddiu(SP, SP, 2 * Interpreter::stackElementSize);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  // stack: ..., a
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++// blows FSR
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(0, FSR);  // load b
++  __ load_ptr(1, A5);  // load a
++  __ store_ptr(1, FSR); // store b
++  __ store_ptr(0, A5); // store a
++  __ push_ptr(FSR);             // push b
++  // stack: ..., b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, FSR);  // load c
++  __ load_ptr(2, A5);  // load a
++  __ store_ptr(2, FSR); // store c in a
++  __ push_ptr(FSR);             // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr(2, FSR);  // load b
++  __ store_ptr(2, A5); // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR); // store b in c
++  // stack: ..., c, a, b, c
++}
++
++// blows FSR
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);             // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  // stack: ..., a, b, a, b
++}
++
++// blows FSR
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr(0, T2);  // load c
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);             // push b
++  __ push_ptr(T2);             // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2); // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr(4, T2);  // load a
++  __ store_ptr(2, T2); // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++
++  // stack: ..., b, c, a, b, c
++}
++
++// blows FSR, SSR
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);  // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);             // push c
++  __ push_ptr(T2);             // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2); // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);  // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2); // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++
++  // stack: ..., c, d, a, b, c, d
++}
++
++// blows FSR
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++
++  __ load_ptr(1, A5);  // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5); // store a in b
++  __ store_ptr(1, FSR); // store b in a
++
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  switch (op) {
++    case add  : __ addu32(FSR, SSR, FSR); break;
++    case sub  : __ subu32(FSR, SSR, FSR); break;
++    case mul  : __ mul(FSR, SSR, FSR);    break;
++    case _and : __ andr(FSR, SSR, FSR);   break;
++    case _or  : __ orr(FSR, SSR, FSR);    break;
++    case _xor : __ xorr(FSR, SSR, FSR);   break;
++    case shl  : __ sllv(FSR, SSR, FSR);   break;
++    case shr  : __ srav(FSR, SSR, FSR);   break;
++    case ushr : __ srlv(FSR, SSR, FSR);   break;
++    default   : ShouldNotReachHere();
++  }
++}
++
++// the result stored in FSR, SSR,
++// used registers : T2, T3
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ daddu(FSR, T2, FSR); break;
++    case sub : __ dsubu(FSR, T2, FSR); break;
++    case _and: __ andr(FSR, T2, FSR);  break;
++    case _or : __ orr(FSR, T2, FSR);   break;
++    case _xor: __ xorr(FSR, T2, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++// java require this bytecode could handle 0x80000000/-1, dont cause a overflow exception,
++// the result is 0x80000000
++// the godson2 cpu do the same, so we need not handle this specially like x86
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  if (UseLEXT1) {
++    __ gsdiv(FSR, SSR, FSR);
++  } else {
++    __ div(SSR, FSR);
++    __ mflo(FSR);
++  }
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++  __ div(SSR, FSR);
++
++  __ bne(FSR, R0, not_zero);
++  __ delayed()->nop();
++  //__ brk(7);
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(not_zero);
++  __ mfhi(FSR);
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  if (UseLEXT1) {
++    __ gsdmult(FSR, T2, FSR);
++  } else {
++    __ dmult(T2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  //__ brk(7);    //generate FPE
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l(A2);
++  if (UseLEXT1) {
++    __ gsddiv(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mflo(FSR);
++  }
++}
++
++// NOTE: i DONT use the Interpreter::_throw_ArithmeticException_entry
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne(FSR, R0, normal);
++  __ delayed()->nop();
++
++  __ jmp(Interpreter::_throw_ArithmeticException_entry);
++  __ delayed()->nop();
++
++  __ bind(normal);
++  __ pop_l (A2);
++
++  if (UseLEXT1) {
++    __ gsdmod(FSR, A2, FSR);
++  } else {
++    __ ddiv(A2, FSR);
++    __ mfhi(FSR);
++  }
++}
++
++// result in FSR
++// used registers : T0
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsllv(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrav(FSR, T0, FSR);
++}
++
++// used registers : T0
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ dsrlv(FSR, T0, FSR);
++}
++
++// result in FSF
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ lwc1(FTF, at_sp());
++      __ add_s(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ lwc1(FTF, at_sp());
++      __ sub_s(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ lwc1(FTF, at_sp());
++      __ mul_s(FSF, FTF, FSF);
++      break;
++    case div:
++      __ lwc1(FTF, at_sp());
++      __ div_s(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_s(F13, FSF);
++      __ lwc1(F12, at_sp());
++       __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 1 * wordSize);
++}
++
++// result in SSF||FSF
++// i dont handle the strict flags
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ ldc1(FTF, at_sp());
++      __ add_d(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ ldc1(FTF, at_sp());
++      __ sub_d(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ ldc1(FTF, at_sp());
++      __ mul_d(FSF, FTF, FSF);
++      break;
++    case div:
++      __ ldc1(FTF, at_sp());
++      __ div_d(FSF, FTF, FSF);
++      break;
++    case rem:
++      __ mov_d(F13, FSF);
++      __ ldc1(F12, at_sp());
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ daddiu(SP, SP, 2 * wordSize);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ subu32(FSR, R0, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ dsubu(FSR, R0, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ neg_s(FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ neg_d(FSF, FSF);
++}
++
++// used registers : T2
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ lw(FSR, T2, 0);
++  __ lb(AT, at_bcp(2));           // get constant
++  __ daddu(FSR, FSR, AT);
++  __ sw(FSR, T2, 0);
++}
++
++// used register : T2
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_2_byte_integer_at_bcp(FSR, AT, 4);
++  __ hswap(FSR);
++  __ lw(AT, T2, 0);
++  __ daddu(FSR, AT, FSR);
++  __ sw(FSR, T2, 0);
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_i2f:
++      __ mtc1(FSR, FSF);
++      __ cvt_s_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2d:
++      __ mtc1(FSR, FSF);
++      __ cvt_d_w(FSF, FSF);
++      break;
++    case Bytecodes::_i2b:
++      __ seb(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ andi(FSR, FSR, 0xFFFF);  // truncate upper 56 bits
++      break;
++    case Bytecodes::_i2s:
++      __ seh(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ sll(FSR, FSR, 0);
++      break;
++    case Bytecodes::_l2f:
++      __ dmtc1(FSR, FSF);
++      __ cvt_s_l(FSF, FSF);
++      break;
++    case Bytecodes::_l2d:
++      __ dmtc1(FSR, FSF);
++      __ cvt_d_l(FSF, FSF);
++      break;
++    case Bytecodes::_f2i:
++    {
++      Label L;
++
++      __ trunc_w_s(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2l:
++    {
++      Label L;
++
++      __ trunc_l_s(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++      __ c_un_s(FSF, FSF);    //NaN?
++      __ movt(FSR, R0);
++
++      __ bne(AT, FSR, L);
++      __ delayed()->lui(T9, 0x8000);
++
++      __ mfc1(AT, FSF);
++      __ andr(AT, AT, T9);
++
++      __ dsll32(T9, T9, 0);
++      __ movn(FSR, T9, AT);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_f2d:
++      __ cvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++    {
++      Label L;
++
++      __ trunc_w_d(F12, FSF);
++      __ move(AT, 0x7fffffff);
++      __ mfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->addiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu32(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++      __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2l:
++    {
++      Label L;
++
++      __ trunc_l_d(F12, FSF);
++      __ daddiu(AT, R0, -1);
++      __ dsrl(AT, AT, 1);
++      __ dmfc1(FSR, F12);
++
++      __ bne(FSR, AT, L);
++      __ delayed()->mtc1(R0, F12);
++
++      __ cvt_d_w(F12, F12);
++      __ c_ult_d(FSF, F12);
++      __ bc1f(L);
++      __ delayed()->daddiu(T9, R0, -1);
++
++      __ c_un_d(FSF, FSF);    //NaN?
++      __ subu(FSR, T9, AT);
++      __ movt(FSR, R0);
++
++    __ bind(L);
++    }
++      break;
++    case Bytecodes::_d2f:
++      __ cvt_s_d(FSF, FSF);
++      break;
++    default             :
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  __ pop(T0);
++  __ pop(R0);
++
++  __ slt(AT, T0, FSR);
++  __ slt(FSR, FSR, T0);
++  __ subu(FSR, FSR, AT);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  __ ori(FSR, R0, 1);
++  __ ori(AT, R0, 1);
++
++  if (is_float) {
++    __ lwc1(FTF, at_sp());
++    __ daddiu(SP, SP, 1 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_s(FTF, FSF);
++    } else {
++      __ c_ult_s(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_s(FTF, FSF);
++    }
++  } else {
++    __ ldc1(FTF, at_sp());
++    __ daddiu(SP, SP, 2 * wordSize);
++    if (unordered_result < 0) {
++      __ c_olt_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_ult_d(FTF, FSF);
++    } else {
++      __ c_ult_d(FSF, FTF);
++      __ movf(FSR, R0);
++      __ c_olt_d(FTF, FSF);
++    }
++  }
++
++  __ movf(AT, R0);
++  __ subu(FSR, FSR, AT);
++}
++
++
++// used registers : T3, A7, Rnext
++// FSR : return bci, this is defined by the vm specification
++// T2 : MDO taken count
++// T3 : method
++// A7 : offset
++// Rnext : next bytecode, this is required by dispatch_base
++void TemplateTable::branch(bool is_jsr, bool is_wide) {
++  __ get_method(T3);
++  __ profile_taken_branch(A7, T2);    // only C2 meaningful
++
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T4 with the branch displacement
++  if (!is_wide) {
++    __ lb(A7, BCP, 1);
++    __ lbu(AT, BCP, 2);
++    __ dsll(A7, A7, 8);
++    __ orr(A7, A7, AT);
++  } else {
++    __ get_4_byte_integer_at_bcp(A7, AT, 1);
++    __ swap(A7);
++  }
++
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occuring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into Rnext
++    __ daddu(AT, BCP, A7);
++    __ lbu(Rnext, AT, 0);
++
++    // compute return address as bci in FSR
++    __ daddiu(FSR, BCP, (is_wide?5:3) - in_bytes(ConstMethod::codes_offset()));
++    __ ld(AT, T3, in_bytes(Method::const_offset()));
++    __ dsubu(FSR, FSR, AT);
++    // Adjust the bcp in BCP by the displacement in A7
++    __ daddu(BCP, BCP, A7);
++    // jsr returns atos that is not an oop
++    // Push return address
++    __ push_i(FSR);
++    // jsr returns vtos
++    __ dispatch_only_noverify(vtos);
++
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T4
++  __ daddu(BCP, BCP, A7);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T3: method
++    // T4: target offset
++    // BCP: target bcp
++    // LVP: locals pointer
++    __ bgtz(A7, dispatch);  // check if forward or backward branch
++    __ delayed()->nop();
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ bne(AT, R0, has_counters);
++    __ delayed()->nop();
++    __ push(T3);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               T3);
++    __ pop(T3);
++    __ ld(AT, T3, in_bytes(Method::method_counters_offset()));  // use AT as MDO, TEMP
++    __ beq(AT, R0, dispatch);
++    __ delayed()->nop();
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      int mask = ((1 << Tier0BackedgeNotifyFreqLog) - 1) << InvocationCounter::count_shift;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ld(T0, Address(T3, in_bytes(Method::method_data_offset())));
++        __ beq(T0, R0, no_mdo);
++        __ delayed()->nop();
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T0, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask,
++                                   T1, false, Assembler::zero, &backedge_counter_overflow);
++        __ beq(R0, R0, dispatch);
++        __ delayed()->nop();
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ld(T0, Address(T3, Method::method_counters_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T1, false, Assembler::zero, &backedge_counter_overflow);
++      if (!UseOnStackReplacement) {
++        __ bind(backedge_counter_overflow);
++      }
++    } else {
++      // increment back edge counter
++      __ ld(T1, T3, in_bytes(Method::method_counters_offset()));
++      __ lw(T0, T1, in_bytes(be_offset));
++      __ increment(T0, InvocationCounter::count_increment);
++      __ sw(T0, T1, in_bytes(be_offset));
++
++      // load invocation counter
++      __ lw(T1, T1, in_bytes(inv_offset));
++      // buffer bit added, mask no needed
++
++      // dadd backedge counter & invocation counter
++      __ daddu(T1, T1, T0);
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        // T1 : backedge counter & invocation counter
++        if (Assembler::is_simm16(InvocationCounter::InterpreterProfileLimit)) {
++          __ slti(AT, T1, InvocationCounter::InterpreterProfileLimit);
++        } else {
++          __ li(AT, (long)&InvocationCounter::InterpreterProfileLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++        }
++
++        __ bne(AT, R0, dispatch);
++        __ delayed()->nop();
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(T1, profile_method);
++
++        if (UseOnStackReplacement) {
++          if (Assembler::is_simm16(InvocationCounter::InterpreterBackwardBranchLimit)) {
++            __ slti(AT, T2, InvocationCounter::InterpreterBackwardBranchLimit);
++          } else {
++            __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++            __ lw(AT, AT, 0);
++            __ slt(AT, T2, AT);
++          }
++
++          __ bne(AT, R0, dispatch);
++          __ delayed()->nop();
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the methodDataOop, which value does not get reset on
++          // the call to  frequency_counter_overflow().
++          // To avoid excessive calls to the overflow routine while
++          // the method is being compiled, dadd a second test to make
++          // sure the overflow function is called only once every
++          // overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andi(AT, T2, overflow_frequency-1);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against AT, which is the sum of the counters
++          __ li(AT, (long)&InvocationCounter::InterpreterBackwardBranchLimit);
++          __ lw(AT, AT, 0);
++          __ slt(AT, T1, AT);
++          __ beq(AT, R0, backedge_counter_overflow);
++          __ delayed()->nop();
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into Rnext
++  __ lbu(Rnext, BCP, 0);
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // Rnext: target bytecode
++  // BCP: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ b(dispatch);
++      __ delayed()->nop();
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ subu(A7, BCP, A7);  // branch bcp
++      call_VM(NOREG, CAST_FROM_FN_PTR(address,
++      InterpreterRuntime::frequency_counter_overflow), A7);
++
++      // V0: osr nmethod (osr ok) or NULL (osr not possible)
++      // V1: osr adapter frame return address
++      // LVP: locals pointer
++      // BCP: bcp
++      __ beq(V0, R0, dispatch);
++      __ delayed()->nop();
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ lb(T3, V0, nmethod::state_offset());
++      __ move(AT, nmethod::in_use);
++      __ bne(AT, T3, dispatch);
++      __ delayed()->nop();
++
++      // We have the address of an on stack replacement routine in rax.
++      // In preparation of invoking it, first we must migrate the locals
++      // and monitors from off the interpreter frame on the stack.
++      // Ensure to save the osr nmethod over the migration call,
++      // it will be preserved in Rnext.
++      __ move(Rnext, V0);
++      const Register thread = TREG;
++#ifndef OPT_THREAD
++      __ get_thread(thread);
++#endif
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      // refer to osrBufferPointer in c1_LIRAssembler_mips.cpp
++      __ move(T0, V0);
++
++      // pop the interpreter frame
++      __ ld(A7, Address(FP, frame::interpreter_frame_sender_sp_offset * wordSize));
++      //FIXME, shall we keep the return address on the stack?
++      __ leave();                                // remove frame anchor
++      __ move(LVP, RA);
++      __ move(SP, A7);
++
++      __ move(AT, -(StackAlignmentInBytes));
++      __ andr(SP , SP , AT);
++
++      // push the (possibly adjusted) return address
++      //refer to osr_entry in c1_LIRAssembler_mips.cpp
++      __ ld(AT, Rnext, nmethod::osr_entry_point_offset());
++      __ jr(AT);
++      __ delayed()->nop();
++    }
++  }
++}
++
++
++void TemplateTable::if_0cmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    case less:
++      __ bgez(FSR, not_taken);
++      break;
++    case less_equal:
++      __ bgtz(FSR, not_taken);
++      break;
++    case greater:
++      __ blez(FSR, not_taken);
++      break;
++    case greater_equal:
++      __ bltz(FSR, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++
++  __ pop_i(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    case less:
++      __ slt(AT, SSR, FSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case less_equal:
++      __ slt(AT, FSR, SSR);
++      __ bne(AT, R0, not_taken);
++      break;
++    case greater:
++      __ slt(AT, FSR, SSR);
++      __ beq(AT, R0, not_taken);
++      break;
++    case greater_equal:
++      __ slt(AT, SSR, FSR);
++      __ bne(AT, R0, not_taken);
++      break;
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  switch(cc) {
++    case not_equal:
++      __ beq(FSR, R0, not_taken);
++      break;
++    case equal:
++      __ bne(FSR, R0, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  //  __ lw(SSR, SP, 0);
++  __ pop_ptr(SSR);
++  switch(cc) {
++    case not_equal:
++      __ beq(SSR, FSR, not_taken);
++      break;
++    case equal:
++      __ bne(SSR, FSR, not_taken);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  __ delayed()->nop();
++
++  branch(false, false);
++
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::ret() {
++  transition(vtos, vtos);
++
++  locals_index(T2);
++  __ ld(T2, T2, 0);
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : method
++// T2 : returb bci
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++
++  locals_index_wide(T2);
++  __ ld(T2, T2, 0);                   // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++
++  __ get_method(T1);
++  __ ld(BCP, T1, in_bytes(Method::const_offset()));
++  __ daddu(BCP, BCP, T2);
++  __ daddiu(BCP, BCP, in_bytes(ConstMethod::codes_offset()));
++
++  __ dispatch_next(vtos, 0, true);
++}
++
++// used register T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : low
++// A7 : high
++// Rnext : dest bytecode, required by dispatch_base
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // load lo & hi
++  __ lw(T3, T2, 1 * BytesPerInt);
++  __ swap(T3);
++  __ lw(A7, T2, 2 * BytesPerInt);
++  __ swap(A7);
++
++  // check against lo & hi
++  __ slt(AT, FSR, T3);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  __ slt(AT, A7, FSR);
++  __ bne(AT, R0, default_case);
++  __ delayed()->nop();
++
++  // lookup dispatch offset, in A7 big endian
++  __ dsubu(FSR, FSR, T3);
++  __ dsll(AT, FSR, Address::times_4);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(FSR, T9, T3);
++
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++// used registers : T2, T3, A7, Rnext
++// T2 : bytecode pointer
++// T3 : pair index
++// A7 : offset
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++
++  // swap FSR so we can avoid swapping the table entries
++  __ swap(FSR);
++
++  // align BCP
++  __ daddiu(T2, BCP, BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(T2, T2, AT);
++
++  // set counter
++  __ lw(T3, T2, BytesPerInt);
++  __ swap(T3);
++  __ b(loop_entry);
++  __ delayed()->nop();
++
++  // table search
++  __ bind(loop);
++  // get the entry value
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(AT, AT, 2 * BytesPerInt);
++
++  // found?
++  __ beq(FSR, AT, found);
++  __ delayed()->nop();
++
++  __ bind(loop_entry);
++  __ bgtz(T3, loop);
++  __ delayed()->daddiu(T3, T3, -1);
++
++  // default case
++  __ profile_switch_default(FSR);
++  __ lw(A7, T2, 0);
++  __ b(continue_execution);
++  __ delayed()->nop();
++
++  // entry found -> get offset
++  __ bind(found);
++  __ dsll(AT, T3, Address::times_8);
++  __ daddu(AT, T2, AT);
++  __ lw(A7, AT, 3 * BytesPerInt);
++  __ profile_switch_case(T3, FSR, T2);
++
++  // continue execution
++  __ bind(continue_execution);
++  __ swap(A7);
++  __ daddu(BCP, BCP, A7);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++// used registers : T0, T1, T2, T3, A7, Rnext
++// T2 : pairs address(array)
++// Rnext : dest bytecode
++// the data after the opcode is the same as lookupswitch
++// see Rewriter::rewrite_method for more information
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // register allocation
++  const Register array = T2;
++  const Register i = T3, j = A7;
++  const Register h = T1;
++  const Register temp = T0;
++  const Register key = FSR;
++
++  // setup array
++  __ daddiu(array, BCP, 3*BytesPerInt);
++  __ li(AT, -BytesPerInt);
++  __ andr(array, array, AT);
++
++  // initialize i & j
++  __ move(i, R0);
++  __ lw(j, array, - 1 * BytesPerInt);
++  // Convert j into native byteordering
++  __ swap(j);
++
++  // and start
++  Label entry;
++  __ b(entry);
++  __ delayed()->nop();
++
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ daddu(h, i, j);
++    __ dsrl(h, h, 1);
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ dsll(AT, h, Address::times_8);
++    __ daddu(AT, array, AT);
++    __ lw(temp, AT, 0 * BytesPerInt);
++    __ swap(temp);
++
++    __ slt(AT, key, temp);
++    __ movz(i, h, AT);
++    __ movn(j, h, AT);
++
++    // while (i+1 < j)
++    __ bind(entry);
++    __ daddiu(h, i, 1);
++    __ slt(AT, h, j);
++    __ bne(AT, R0, loop);
++    __ delayed()->nop();
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(temp, AT, 0 * BytesPerInt);
++  __ swap(temp);
++  __ bne(key, temp, default_case);
++  __ delayed()->nop();
++
++  // entry found -> j = offset
++  __ dsll(AT, i, Address::times_8);
++  __ daddu(AT, array, AT);
++  __ lw(j, AT, 1 * BytesPerInt);
++  __ profile_switch_case(i, key, array);
++  __ swap(j);
++
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ lw(j, array, - 2 * BytesPerInt);
++  __ swap(j);
++  __ daddu(BCP, BCP, j);
++  __ lbu(Rnext, BCP, 0);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {
++  transition(state, state);
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    __ ld(T1, aaddress(0));
++    __ load_klass(LVP, T1);
++    __ lw(LVP, LVP, in_bytes(Klass::access_flags_offset()));
++    __ move(AT, JVM_ACC_HAS_FINALIZER);
++    __ andr(AT, AT, LVP);
++    Label skip_register_finalizer;
++    __ beq(AT, R0, skip_register_finalizer);
++    __ delayed()->nop();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++    InterpreterRuntime::register_finalizer), T1);
++    __ bind(skip_register_finalizer);
++  }
++
++  Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++
++  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ lb(AT, thread, in_bytes(Thread::polling_page_offset()));
++    __ andi(AT, AT, SafepointMechanism::poll_bit());
++    __ beq(AT, R0, no_safepoint);
++    __ delayed()->nop();
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T9);
++  __ sync();
++
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
++void TemplateTable::volatile_barrier() {
++  if(os::is_MP()) __ sync();
++}
++
++// we dont shift left 2 bits in get_cache_and_index_at_bcp
++// for we always need shift the index we use it. the ConstantPoolCacheEntry
++// is 16-byte long, index is the index in
++// ConstantPoolCache, so cache + base_offset() + index * 16 is
++// the corresponding ConstantPoolCacheEntry
++// used registers : T2
++// NOTE : the returned index need also shift left 4 to get the address!
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  const Register temp = A1;
++  assert_different_registers(Rcache, index);
++
++  Label resolved;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  // is resolved?
++  int i = (int)code;
++  __ addiu(temp, temp, -i);
++  __ beq(temp, R0, resolved);
++  __ delayed()->nop();
++
++  // resolve first time through
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++
++  __ move(temp, i);
++  __ call_VM(NOREG, entry, temp);
++
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ bind(resolved);
++}
++
++// The Rcache and index registers must be set before call
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(off, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f2_offset()));
++  // Flags
++  __ ld(flags, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::flags_offset()));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ld(obj, AT, in_bytes(cp_base_offset + ConstantPoolCacheEntry::f1_offset()));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ld(obj, Address(obj, mirror_offset));
++
++    __ resolve_oop_handle(obj, T9);
++  }
++}
++
++// get the method, itable_index and flags of the current invoke
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  assert(is_invokevirtual == (byte_no == f2_byte), "is invokevirtual flag redundant");
++  // determine constant pool cache field offsets
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4): sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++
++  //assert(wordSize == 8, "adjust code below");
++  // note we shift 4 not 2, for we get is the true inde
++  // of ConstantPoolCacheEntry, not the shifted 2-bit index as x86 version
++  __ dsll(AT, index, Address::times_ptr);
++  __ daddu(AT, cache, AT);
++  __ ld(method, AT, method_offset);
++
++  if (itable_index != NOREG) {
++    __ ld(itable_index, AT, index_offset);
++  }
++  __ ld(flags, AT, flags_offset);
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, Register index,
++                                            bool is_static, bool has_tos) {
++  // do the JVMTI work here to avoid disturbing the register state below
++  // We use c_rarg registers here because we want to use the register used in
++  // the call to the VM
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    // kill FSR
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    assert_different_registers(cache, index, AT);
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp3, 1);
++
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(ConstantPoolCache::base_offset()));
++    __ shl(tmp3, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp3);
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      __ ld(tmp1, SP, 0);
++      __ verify_oop(tmp1);
++    }
++    // tmp1: object pointer or NULL
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++                                       InterpreterRuntime::post_field_access),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++// used registers : T1, T2, T3, T1
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T1 : field address
++// The flags 31, 30, 29, 28 together build a 4 bit number 0 to 8 with the
++// following mapping to the TosState states:
++// btos: 0
++// ctos: 1
++// stos: 2
++// itos: 3
++// ltos: 4
++// ftos: 5
++// dtos: 6
++// atos: 7
++// vtos: 8
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  __ daddu(index, obj, off);
++
++  const Address field(index, 0);
++
++  Label Done, notByte, notBool, notInt, notShort, notChar,
++              notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  // ztos
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  // atos
++  //add for compressedoops
++  do_oop_load(_masm, Address(index, 0), FSR, IN_HEAP);
++  __ push(atos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  // FIXME : the load/store should be atomic, we have no simple method to do this in mips32
++  // ltos
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, FSR, field, noreg, noreg);
++  __ push(ltos);
++
++  // Don't rewrite to _fast_lgetfield for potential volatile case.
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  // ftos
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, T3, T2);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, T3, T2);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++  transition(vtos, vtos);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    assert_different_registers(cache, index, tmp4);
++
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(AT, AT, 0);
++    __ beq(AT, R0, L1);
++    __ delayed()->nop();
++
++    __ get_cache_and_index_at_bcp(tmp2, tmp4, 1);
++
++    if (is_static) {
++      __ move(tmp1, R0);
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      Label two_word, valsize_known;
++      __ dsll(AT, tmp4, Address::times_8);
++      __ daddu(AT, tmp2, AT);
++      __ ld(tmp3, AT, in_bytes(cp_base_offset +
++                               ConstantPoolCacheEntry::flags_offset()));
++      __ shr(tmp3, ConstantPoolCacheEntry::tos_state_shift);
++
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ move(tmp1, SP);
++      __ move(AT, ltos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ move(AT, dtos);
++      __ beq(tmp3, AT, two_word);
++      __ delayed()->nop();
++      __ b(valsize_known);
++      __ delayed()->daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(1) );
++
++      __ bind(two_word);
++      __ daddiu(tmp1, tmp1, Interpreter::expr_offset_in_bytes(2));
++
++      __ bind(valsize_known);
++      // setup object pointer
++      __ ld(tmp1, tmp1, 0*wordSize);
++    }
++    // cache entry pointer
++    __ daddiu(tmp2, tmp2, in_bytes(cp_base_offset));
++    __ shl(tmp4, LogBytesPerWord);
++    __ daddu(tmp2, tmp2, tmp4);
++    // object (tos)
++    __ move(tmp3, SP);
++    // tmp1: object pointer set up above (NULL if static)
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++// used registers : T0, T1, T2, T3, T8
++// T1 : flags
++// T2 : off
++// T3 : obj
++// T8 : volatile bit
++// see ConstantPoolCacheEntry::set_field for more info
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = T3;
++
++  const Register scratch = T8;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, flags);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++
++  Label notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  assert(btos == 0, "change code, btos != 0");
++
++  // btos
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, ConstantPoolCacheEntry::tos_state_mask);
++  __ bne(flags, R0, notByte);
++  __ delayed()->nop();
++
++  __ pop(btos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_BYTE, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ztos
++  __ bind(notByte);
++  __ move(AT, ztos);
++  __ bne(flags, AT, notBool);
++  __ delayed()->nop();
++
++  __ pop(ztos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ andi(FSR, FSR, 0x1);
++  __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_zputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // itos
++  __ bind(notBool);
++  __ move(AT, itos);
++  __ bne(flags, AT, notInt);
++  __ delayed()->nop();
++
++  __ pop(itos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_INT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_iputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // atos
++  __ bind(notInt);
++  __ move(AT, atos);
++  __ bne(flags, AT, notObj);
++  __ delayed()->nop();
++
++  __ pop(atos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++
++  do_oop_store(_masm, Address(obj, off, Address::times_1, 0), FSR);
++
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_aputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ctos
++  __ bind(notObj);
++  __ move(AT, ctos);
++  __ bne(flags, AT, notChar);
++  __ delayed()->nop();
++
++  __ pop(ctos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_CHAR, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // stos
++  __ bind(notChar);
++  __ move(AT, stos);
++  __ bne(flags, AT, notShort);
++  __ delayed()->nop();
++
++  __ pop(stos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_SHORT, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ltos
++  __ bind(notShort);
++  __ move(AT, ltos);
++  __ bne(flags, AT, notLong);
++  __ delayed()->nop();
++
++  __ pop(ltos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_LONG, IN_HEAP, Address(T9), FSR, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_lputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++  // ftos
++  __ bind(notLong);
++  __ move(AT, ftos);
++  __ bne(flags, AT, notFloat);
++  __ delayed()->nop();
++
++  __ pop(ftos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_FLOAT, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fputfield, bc, off, true, byte_no);
++  }
++  __ b(Done);
++  __ delayed()->nop();
++
++
++  // dtos
++  __ bind(notFloat);
++  __ move(AT, dtos);
++#ifdef ASSERT
++  __ bne(flags, AT, notDouble);
++  __ delayed()->nop();
++#endif
++
++  __ pop(dtos);
++  if (!is_static) {
++    pop_and_check_object(obj);
++  }
++  __ daddu(T9, obj, off);
++  __ access_store_at(T_DOUBLE, IN_HEAP, Address(T9), noreg, noreg, noreg);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dputfield, bc, off, true, byte_no);
++  }
++
++#ifdef ASSERT
++  __ b(Done);
++  __ delayed()->nop();
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++// used registers : T1, T2, T3
++// T1 : cp_entry
++// T2 : obj
++// T3 : value pointer
++void TemplateTable::jvmti_post_fast_field_mod() {
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    //kill AT, T1, T2, T3, T9
++    Register tmp1 = T2;
++    Register tmp2 = T1;
++    Register tmp3 = T3;
++    Register tmp4 = T9;
++    __ li(AT, JvmtiExport::get_field_modification_count_addr());
++    __ lw(tmp3, AT, 0);
++    __ beq(tmp3, R0, L2);
++    __ delayed()->nop();
++    __ pop_ptr(tmp1);
++    __ verify_oop(tmp1);
++    __ push_ptr(tmp1);
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push_d(FSF); break;
++    case Bytecodes::_fast_fputfield: __ push_f(); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++      default:  ShouldNotReachHere();
++    }
++    __ move(tmp3, SP);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(tmp2, FSR, 1);
++    __ verify_oop(tmp1);
++    // tmp1: object pointer copied above
++    // tmp2: cache entry pointer
++    // tmp3: jvalue object on the stack
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               tmp1, tmp2, tmp3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop_d(); break;
++    case Bytecodes::_fast_fputfield: __ pop_f(); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    default: break;
++    }
++    __ bind(L2);
++  }
++}
++
++// used registers : T2, T3, T1
++// T2 : index & off & field address
++// T3 : cache & obj
++// T1 : flags
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T8;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // test for volatile with T1
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T1, AT, in_bytes(base + ConstantPoolCacheEntry::flags_offset()));
++
++  // replace index with field offset from cache entry
++  __ ld(T2, AT, in_bytes(base + ConstantPoolCacheEntry::f2_offset()));
++
++  Label Done;
++  {
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, T1);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(T3);
++
++  if (bytecode() != Bytecodes::_fast_aputfield) {
++    // field address
++    __ daddu(T2, T3, T2);
++  }
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_zputfield:
++      __ andi(FSR, FSR, 0x1);  // boolean is true if LSB is 1
++      __ access_store_at(T_BOOLEAN, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, Address(T2), FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, Address(T2), noreg, noreg, noreg);
++      break;
++    case Bytecodes::_fast_aputfield:
++      do_oop_store(_masm, Address(T3, T2, Address::times_1, 0), FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// used registers : T2, T3, T1
++// T3 : cp_entry & cache
++// T2 : index & offset
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T8;
++
++  // do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    __ li(AT, (intptr_t)JvmtiExport::get_field_access_count_addr());
++    __ lw(T3, AT, 0);
++    __ beq(T3, R0, L1);
++    __ delayed()->nop();
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(T3, T1, 1);
++    __ move(TSR, FSR);
++    __ verify_oop(FSR);
++    // FSR: object pointer copied above
++    // T3: cache entry pointer
++    __ call_VM(NOREG,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               FSR, T3);
++    __ move(FSR, TSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 1);
++
++  // Must prevent reordering of the following cp cache loads with bytecode load
++  __ sync();
++
++  // replace index with field offset from cache entry
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  __ daddu(FSR, FSR, T2);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ stop("should not be rewritten");
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg, Address(FSR), noreg, noreg);
++      break;
++    case Bytecodes::_fast_agetfield:
++      //add for compressedoops
++      do_oop_load(_masm, Address(FSR, 0), FSR, IN_HEAP);
++      __ verify_oop(FSR);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++// generator for _fast_iaccess_0, _fast_aaccess_0, _fast_faccess_0
++// used registers : T1, T2, T3, T1
++// T1 : obj & field address
++// T2 : off
++// T3 : cache
++// T1 : index
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T8;
++
++  // get receiver
++  __ ld(T1, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(T3, T2, 2);
++  __ dsll(AT, T2, Address::times_8);
++  __ daddu(AT, T3, AT);
++  __ ld(T2, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::f2_offset()));
++
++  {
++    __ ld(AT, AT, in_bytes(ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++    __ move(scratch, 1 << ConstantPoolCacheEntry::is_volatile_shift);
++    __ andr(scratch, scratch, AT);
++
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ daddiu(BCP, BCP, 1);
++  __ null_check(T1);
++  __ daddu(T1, T1, T2);
++
++  if (state == itos) {
++    __ access_load_at(T_INT, IN_HEAP, FSR, Address(T1), noreg, noreg);
++  } else if (state == atos) {
++    do_oop_load(_masm, Address(T1, 0), FSR, IN_HEAP);
++    __ verify_oop(FSR);
++  } else if (state == ftos) {
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg, Address(T1), noreg, noreg);
++  } else {
++    ShouldNotReachHere();
++  }
++  __ daddiu(BCP, BCP, -1);
++
++  {
++    Label notVolatile;
++    __ beq(scratch, R0, notVolatile);
++    __ delayed()->nop();
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++// method, index, recv, flags: T1, T2, T3, T1
++// byte_no = 2 for _invokevirtual, 1 else
++// T0 : return address
++// get the method & index of the invoke, and push the return address of
++// the invoke(first word in the frame)
++// this address is where the return code jmp to.
++// NOTE : this method will set T3&T1 as recv&flags
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic),"");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == T1, "error flags reg.");
++  assert(recv  == noreg || recv  == T3, "error recv reg.");
++
++  // setup registers & access constant pool cache
++  if(recv == noreg) recv  = T3;
++  if(flags == noreg) flags  = T1;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  if (is_invokedynamic || is_invokehandle) {
++   Label L_no_push;
++     __ move(AT, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ andr(AT, AT, flags);
++     __ beq(AT, R0, L_no_push);
++     __ delayed()->nop();
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     Register tmp = SSR;
++     __ push(tmp);
++     __ move(tmp, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, tmp, recv);
++     __ pop(tmp);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ move(AT, ConstantPoolCacheEntry::parameter_size_mask);
++    __ andr(recv, flags, AT);
++    // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ld(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  if(save_flags) {
++    __ move(BCP, flags);
++  }
++
++  // compute return type
++  __ dsrl(flags, flags, ConstantPoolCacheEntry::tos_state_shift);
++  __ andi(flags, flags, 0xf);
++
++  // Make sure we don't need to mask flags for tos_state_shift after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table = (address) Interpreter::invoke_return_entry_table_for(code);
++    __ li(AT, (long)table);
++    __ dsll(flags, flags, LogBytesPerWord);
++    __ daddu(AT, AT, flags);
++    __ ld(RA, AT, 0);
++  }
++
++  if (save_flags) {
++    __ move(flags, BCP);
++    __ restore_bcp();
++  }
++}
++
++// used registers : T0, T3, T1, T2
++// T3 : recv, this two register using convention is by prepare_invoke
++// T1 : flags, klass
++// Rmethod : method, index must be Rmethod
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {
++
++  assert_different_registers(index, recv, flags, T2);
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, flags, AT);
++  __ beq(AT, R0, notFinal);
++  __ delayed()->nop();
++
++  Register method = index;  // method must be Rmethod
++  assert(method == Rmethod, "methodOop must be Rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // the index is indeed methodOop, for this is vfinal,
++  // see ConstantPoolCacheEntry::set_method for more info
++
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(T2);
++
++  // T2: tmp, used for mdp
++  // method: callee
++  // T9: tmp
++  // is_virtual: true
++  __ profile_arguments_type(T2, method, T9, true);
++
++  __ jump_from_interpreted(method, T2);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T2, recv);
++
++  // profile this call
++  __ profile_virtual_call(T2, T0, T1);
++
++  // get target methodOop & entry point
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++  __ dsll(AT, index, Address::times_ptr);
++  // T2: receiver
++  __ daddu(AT, T2, AT);
++  //this is a ualign read
++  __ ld(method, AT, base + vtableEntry::method_offset_in_bytes());
++  __ profile_arguments_type(T2, method, T9, true);
++  __ jump_from_interpreted(method, T2);
++
++}
++
++void TemplateTable::invokevirtual(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3, T1);
++  // now recv & flags in T3, T1
++  invokevirtual_helper(Rmethod, T3, T1);
++}
++
++// T9 : entry
++// Rmethod : method
++void TemplateTable::invokespecial(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG, T3);
++  // now recv & flags in T3, T1
++  __ verify_oop(T3);
++  __ null_check(T3);
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++  __ move(T0, T3);
++}
++
++void TemplateTable::invokestatic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, Rmethod, NOREG);
++
++  __ profile_call(T9);
++
++  // T8: tmp, used for mdp
++  // Rmethod: callee
++  // T9: tmp
++  // is_virtual: false
++  __ profile_arguments_type(T8, Rmethod, T9, false);
++
++  __ jump_from_interpreted(Rmethod, T9);
++}
++
++// i have no idea what to do here, now. for future change. FIXME.
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on mips64");
++}
++
++// used registers : T0, T1, T2, T3, T1, A7
++// T0 : itable, vtable, entry
++// T1 : interface
++// T3 : receiver
++// T1 : flags, klass
++// Rmethod : index, method, this is required by interpreter_entry
++void TemplateTable::invokeinterface(int byte_no) {
++  transition(vtos, vtos);
++  //this method will use T1-T4 and T0
++  assert(byte_no == f1_byte, "use this argument");
++  prepare_invoke(byte_no, T2, Rmethod, T3, T1);
++  // T2: reference klass (from f1) if interface method
++  // Rmethod: method (from f2)
++  // T3: receiver
++  // T1: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notObjectMethod);
++  __ delayed()->nop();
++
++  invokevirtual_helper(Rmethod, T3, T1);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ move(AT, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ andr(AT, T1, AT);
++  __ beq(AT, R0, notVFinal);
++  __ delayed()->nop();
++
++  // Get receiver klass into FSR - also a null check
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(FSR, T3);
++
++  Label subtype;
++  __ check_klass_subtype(FSR, T2, T0, subtype);
++  // If we get here the typecheck failed
++  recvKlass = T1;
++  __ move(recvKlass, FSR);
++  __ b(no_such_interface);
++  __ delayed()->nop();
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  __ jump_from_interpreted(Rmethod, T1);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into T1 - also a null check
++  __ restore_locals();
++  __ null_check(T3, oopDesc::klass_offset_in_bytes());
++  __ load_klass(T1, T3);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ move(T3, Rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in T2. Subklass in T1.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             T1, T2, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             T0, FSR,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++
++  // profile this call
++  __ restore_bcp();
++  __ profile_virtual_call(T1, T0, FSR);
++
++  // Get declaring interface class from method, and itable index
++  __ ld_ptr(T2, Rmethod, in_bytes(Method::const_offset()));
++  __ ld_ptr(T2, T2, in_bytes(ConstMethod::constants_offset()));
++  __ ld_ptr(T2, T2, ConstantPool::pool_holder_offset_in_bytes());
++  __ lw(Rmethod, Rmethod, in_bytes(Method::itable_index_offset()));
++  __ addiu(Rmethod, Rmethod, (-1) * Method::itable_index_max);
++  __ subu32(Rmethod, R0, Rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ move(FSR, T1);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             FSR, T2, Rmethod,
++                             // outputs: method, scan temp. reg
++                             Rmethod, T0,
++                             no_such_interface);
++
++  // Rmethod: Method* to call
++  // T3: receiver
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ beq(Rmethod, R0, no_such_method);
++  __ delayed()->nop();
++
++  __ profile_called_method(Rmethod, T0, T1);
++  __ profile_arguments_type(T1, Rmethod, T0, true);
++
++  // do the call
++  // T3: receiver
++  // Rmethod: Method*
++  __ jump_from_interpreted(Rmethod, T1);
++  __ should_not_reach_here();
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  recvKlass = A1;
++  Register method = A2;
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  if (method != T3)    { __ move(method, T3);    }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose), recvKlass, method);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++
++  __ bind(no_such_interface);
++  // throw exception
++  __ pop(Rmethod);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();
++  __ restore_locals();
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != T1) { __ move(recvKlass, T1); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose), recvKlass, T2);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here();
++}
++
++
++void TemplateTable::invokehandle(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register T2_method  = Rmethod;
++  const Register FSR_mtype  = FSR;
++  const Register T3_recv    = T3;
++
++   prepare_invoke(byte_no, T2_method, FSR_mtype, T3_recv);
++   //??__ verify_method_ptr(T2_method);
++   __ verify_oop(T3_recv);
++   __ null_check(T3_recv);
++
++   // T9: MethodType object (from cpool->resolved_references[f1], if necessary)
++   // T2_method: MH.invokeExact_MT method (from f2)
++
++   // Note:  T9 is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++   __ profile_final_call(T9);
++
++   // T8: tmp, used for mdp
++   // T2_method: callee
++   // T9: tmp
++   // is_virtual: true
++   __ profile_arguments_type(T8, T2_method, T9, true);
++
++  __ jump_from_interpreted(T2_method, T9);
++}
++
++ void TemplateTable::invokedynamic(int byte_no) {
++   transition(vtos, vtos);
++   assert(byte_no == f1_byte, "use this argument");
++
++   //const Register Rmethod   = T2;
++   const Register T2_callsite = T2;
++
++   prepare_invoke(byte_no, Rmethod, T2_callsite);
++
++   // T2: CallSite object (from cpool->resolved_references[f1])
++   // Rmethod: MH.linkToCallSite method (from f2)
++
++   // Note:  T2_callsite is already pushed by prepare_invoke
++   // %%% should make a type profile for any invokedynamic that takes a ref argument
++   // profile this call
++   __ profile_call(T9);
++
++   // T8: tmp, used for mdp
++   // Rmethod: callee
++   // T9: tmp
++   // is_virtual: false
++   __ profile_arguments_type(T8, Rmethod, T9, false);
++
++   __ verify_oop(T2_callsite);
++
++   __ jump_from_interpreted(Rmethod, T9);
++ }
++
++//-----------------------------------------------------------------------------
++// Allocation
++// T1 : tags & buffer end & thread
++// T2 : object end
++// T3 : klass
++// T1 : object size
++// A1 : cpool
++// A2 : cp index
++// return object in FSR
++void TemplateTable::_new() {
++  transition(vtos, atos);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++
++  Label slow_case;
++  Label done;
++  Label initialize_header;
++  Label initialize_object; // including clearing the fields
++  Label allocate_shared;
++
++  __ get_cpool_and_tags(A1, T1);
++
++  // make sure the class we're about to instantiate has been resolved.
++  // Note: slow_case does a pop of stack, which is why we loaded class/pushed above
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  if (UseLEXT1 && Assembler::is_simm(tags_offset, 8)) {
++    __ gslbx(AT, T1, A2, tags_offset);
++  } else {
++    __ daddu(T1, T1, A2);
++    __ lb(AT, T1, tags_offset);
++  }
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(A1, A2, T3);
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ lhu(T1, T3, in_bytes(InstanceKlass::init_state_offset()));
++  __ daddiu(AT, T1, - (int)InstanceKlass::fully_initialized);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // has_finalizer
++  __ lw(T0, T3, in_bytes(Klass::layout_helper_offset()) );
++  __ andi(AT, T0, Klass::_lh_instance_slow_path_bit);
++  __ bne(AT, R0, slow_case);
++  __ delayed()->nop();
++
++  // Allocate the instance
++  // 1) Try to allocate in the TLAB
++  // 2) if fail and the object is large allocate in the shared Eden
++  // 3) if the above fails (or is not applicable), go to a slow case
++  // (creates a new TLAB, etc.)
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++#ifndef OPT_THREAD
++    const Register thread = T8;
++    if (UseTLAB || allow_shared_alloc) {
++      __ get_thread(thread);
++    }
++#else
++    const Register thread = TREG;
++#endif
++
++  if (UseTLAB) {
++    // get tlab_top
++    __ ld(FSR, thread, in_bytes(JavaThread::tlab_top_offset()));
++    // get tlab_end
++    __ ld(AT, thread, in_bytes(JavaThread::tlab_end_offset()));
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, allow_shared_alloc ? allocate_shared : slow_case);
++    __ delayed()->nop();
++    __ sd(T2, thread, in_bytes(JavaThread::tlab_top_offset()));
++
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ beq(R0, R0, initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ beq(R0, R0, initialize_object);
++    }
++    __ delayed()->nop();
++  }
++
++  // Allocation in the shared Eden , if allowed
++  // T0 : instance size in words
++  if(allow_shared_alloc){
++    __ bind(allocate_shared);
++
++    Label done, retry;
++    Address heap_top(T1);
++    __ set64(T1, (long)Universe::heap()->top_addr());
++    __ ld(FSR, heap_top);
++
++    __ bind(retry);
++    __ set64(AT, (long)Universe::heap()->end_addr());
++    __ ld(AT, AT, 0);
++    __ daddu(T2, FSR, T0);
++    __ slt(AT, AT, T2);
++    __ bne(AT, R0, slow_case);
++    __ delayed()->nop();
++
++    // Compare FSR with the top addr, and if still equal, store the new
++    // top addr in T2 at the address of the top addr pointer. Sets AT if was
++    // equal, and clears it otherwise. Use lock prefix for atomicity on MPs.
++    //
++    // FSR: object begin
++    // T2: object end
++    // T0: instance size in words
++
++    // if someone beat us on the allocation, try again, otherwise continue
++    __ cmpxchg(heap_top, FSR, T2, AT, true, true, done, &retry);
++
++    __ bind(done);
++
++    __ incr_allocated_bytes(thread, T0, 0);
++  }
++
++  if (UseTLAB || Universe::heap()->supports_inline_contig_alloc()) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ bind(initialize_object);
++    __ set64(AT, - sizeof(oopDesc));
++    __ daddu(T0, T0, AT);
++    __ beq(T0, R0, initialize_header);
++    __ delayed()->nop();
++
++    // initialize remaining object fields: T0 is a multiple of 2
++    {
++      Label loop;
++      __ daddu(T1, FSR, T0);
++      __ daddiu(T1, T1, -oopSize);
++
++      __ bind(loop);
++      __ sd(R0, T1, sizeof(oopDesc) + 0 * oopSize);
++      __ bne(T1, FSR, loop); //dont clear header
++      __ delayed()->daddiu(T1, T1, -oopSize);
++    }
++
++    //klass in T3,
++    // initialize object header only.
++    __ bind(initialize_header);
++    if (UseBiasedLocking) {
++      __ ld(AT, T3, in_bytes(Klass::prototype_header_offset()));
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes ());
++    } else {
++      __ set64(AT, (long)markOopDesc::prototype());
++      __ sd(AT, FSR, oopDesc::mark_offset_in_bytes());
++    }
++
++    __ store_klass_gap(FSR, R0);
++    __ store_klass(FSR, T3);
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), FSR);
++      __ pop(atos);
++
++    }
++    __ b(done);
++    __ delayed()->nop();
++  }
++
++  // slow case
++  __ bind(slow_case);
++  __ get_constant_pool(A1);
++  __ get_unsigned_2_byte_index_at_bcp(A2, 1);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), A1, A2);
++
++  // continue
++  __ bind(done);
++  __ sync();
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  __ lbu(A1, at_bcp(1));
++  //type, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray), A1, FSR);
++  __ sync();
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++  __ get_2_byte_integer_at_bcp(A2, AT, 1);
++  __ huswap(A2);
++  __ get_constant_pool(A1);
++  // cp, index, count
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray), A1, A2, FSR);
++  __ sync();
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  __ null_check(FSR, arrayOopDesc::length_offset_in_bytes());
++  __ lw(FSR, FSR, arrayOopDesc::length_offset_in_bytes());
++}
++
++// when invoke gen_subtype_check, super in T3, sub in T2, object in FSR(it's always)
++// T2 : sub klass
++// T3 : cpool
++// T3 : super klass
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  // In InterpreterRuntime::quicken_io_cc, lots of new classes may be loaded.
++  // Then, GC will move the object in V0 to another places in heap.
++  // Therefore, We should never save such an object in register.
++  // Instead, we should save it in the stack. It can be modified automatically by the GC thread.
++  // After GC, the object address in FSR is changed to a new place.
++  //
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // klass already in cp, get superklass in T3
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++
++  // Come here on failure
++  // object is at FSR
++  __ jmp(Interpreter::_throw_ClassCastException_entry);
++  __ delayed()->nop();
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ b(done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);
++  }
++  __ bind(done);
++}
++
++// i use T3 as cpool, T1 as tags, T2 as index
++// object always in FSR, superklass in T3, subklass in T2
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++
++  __ beq(FSR, R0, is_null);
++  __ delayed()->nop();
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(T3, T1);
++  // get index
++  __ get_2_byte_integer_at_bcp(T2, AT, 1);
++  __ huswap(T2);
++
++  // See if bytecode has already been quicked
++  // quicked
++  __ daddu(AT, T1, T2);
++  __ lb(AT, AT, Array<u1>::base_offset_in_bytes());
++  if(os::is_MP()) {
++    __ sync(); // load acquire
++  }
++  __ daddiu(AT, AT, - (int)JVM_CONSTANT_Class);
++  __ beq(AT, R0, quicked);
++  __ delayed()->nop();
++
++  __ push(atos);
++  const Register thread = TREG;
++#ifndef OPT_THREAD
++  __ get_thread(thread);
++#endif
++  call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  __ get_vm_result_2(T3, thread);
++  __ pop_ptr(FSR);
++  __ b(resolved);
++  __ delayed()->nop();
++
++  // get superklass in T3, subklass in T2
++  __ bind(quicked);
++  __ load_resolved_klass_at_index(T3, T2, T3);
++
++  __ bind(resolved);
++  // get subklass in T2
++  //add for compressedoops
++  __ load_klass(T2, FSR);
++
++  // Superklass in T3.  Subklass in T2.
++  __ gen_subtype_check(T3, T2, ok_is_subtype);
++  // Come here on failure
++  __ b(done);
++  __ delayed(); __ move(FSR, R0);
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ move(FSR, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ beq(R0, R0, done);
++    __ delayed()->nop();
++    __ bind(is_null);
++    __ profile_null_seen(T3);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // FSR = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // FSR = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++//--------------------------------------------------------
++//--------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug inists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  // get the unpatched byte code
++  __ get_method(A1);
++  __ call_VM(NOREG,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             A1, BCP);
++  __ move(Rnext, V0); // Rnext will be used in dispatch_only_normal
++
++  // post the breakpoint event
++  __ get_method(A1);
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint), A1, BCP);
++
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  __ null_check(FSR);
++  __ jmp(Interpreter::throw_exception_entry());
++  __ delayed()->nop();
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- SP               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [return addr  ] <--- FP
++
++// we use T2 as monitor entry pointer, T3 as monitor top pointer, c_rarg0 as free slot pointer
++// object always in FSR
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++
++  // check for NULL object
++  __ null_check(FSR);
++
++  const Address monitor_block_top(FP, frame::interpreter_frame_monitor_block_top_offset
++      * wordSize);
++  const int entry_size = (frame::interpreter_frame_monitor_size()* wordSize);
++  Label allocated;
++
++  // initialize entry pointer
++  __ move(c_rarg0, R0);
++
++  // find a free slot in the monitor block (result in c_rarg0)
++  {
++    Label entry, loop, exit;
++    __ ld(T2, monitor_block_top);
++    __ b(entry);
++    __ delayed()->daddiu(T3, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    // free slot?
++    __ bind(loop);
++    __ ld(AT, T2, BasicObjectLock::obj_offset_in_bytes());
++    __ movz(c_rarg0, T2, AT);
++
++    __ beq(FSR, AT, exit);
++    __ delayed()->nop();
++    __ daddiu(T2, T2, entry_size);
++
++    __ bind(entry);
++    __ bne(T3, T2, loop);
++    __ delayed()->nop();
++    __ bind(exit);
++  }
++
++  __ bne(c_rarg0, R0, allocated);
++  __ delayed()->nop();
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers                   // SP: old expression stack top
++    __ ld(c_rarg0, monitor_block_top);
++    __ daddiu(SP, SP, - entry_size);
++    __ daddiu(c_rarg0, c_rarg0, - entry_size);
++    __ sd(c_rarg0, monitor_block_top);
++    __ b(entry);
++    __ delayed(); __ move(T3, SP);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ld(AT, T3, entry_size);
++    __ sd(AT, T3, 0);
++    __ daddiu(T3, T3, wordSize);
++    __ bind(entry);
++    __ bne(T3, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  __ bind(allocated);
++  // Increment bcp to point to the next bytecode,
++  // so exception handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ daddiu(BCP, BCP, 1);
++  __ sd(FSR, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++  __ lock_object(c_rarg0);
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++  // The bcp has already been incremented. Just need to dispatch to next instruction.
++
++  __ dispatch_next(vtos);
++}
++
++// T2 : top
++// c_rarg0 : entry
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++
++  __ null_check(FSR);
++
++  const int entry_size =(frame::interpreter_frame_monitor_size()* wordSize);
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ld(c_rarg0, FP, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ b(entry);
++    __ delayed()->daddiu(T2, FP, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    __ bind(loop);
++    __ ld(AT, c_rarg0, BasicObjectLock::obj_offset_in_bytes());
++    __ beq(FSR, AT, found);
++    __ delayed()->nop();
++    __ daddiu(c_rarg0, c_rarg0, entry_size);
++    __ bind(entry);
++    __ bne(T2, c_rarg0, loop);
++    __ delayed()->nop();
++  }
++
++  // error handling. Unlocking was not block-structured
++  Label end;
++  __ call_VM(NOREG, CAST_FROM_FN_PTR(address,
++  InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here();
++
++  // call run-time routine
++  // c_rarg0: points to monitor entry
++  __ bind(found);
++  __ move(TSR, FSR);
++  __ unlock_object(c_rarg0);
++  __ move(FSR, TSR);
++  __ bind(end);
++}
++
++
++// Wide instructions
++void TemplateTable::wide() {
++  transition(vtos, vtos);
++  __ lbu(Rnext, at_bcp(1));
++  __ dsll(T9, Rnext, Address::times_8);
++  __ li(AT, (long)Interpreter::_wentry_point);
++  __ daddu(AT, T9, AT);
++  __ ld(T9, AT, 0);
++  __ jr(T9);
++  __ delayed()->nop();
++}
++
++
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * wordSize
++  __ lbu(A1, at_bcp(3));  // dimension
++  __ daddiu(A1, A1, -1);
++  __ dsll(A1, A1, Address::times_8);
++  __ daddu(A1, SP, A1);    // now A1 pointer to the count array on the stack
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), A1);
++  __ lbu(AT, at_bcp(3));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(SP, SP, AT);
++  __ sync();
++}
++#endif // !CC_INTERP
+diff --git a/src/hotspot/cpu/mips/vmStructs_mips.hpp b/src/hotspot/cpu/mips/vmStructs_mips.hpp
+new file mode 100644
+index 0000000000..6939914356
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmStructs_mips.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2001, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++#define CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field)            \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* JavaCallWrapper            */                                                                                                   \
++  /******************************/                                                                                                   \
++  /******************************/                                                                                                   \
++  /* JavaFrameAnchor            */                                                                                                   \
++  /******************************/                                                                                                   \
++  volatile_nonstatic_field(JavaFrameAnchor,     _last_Java_fp,                                    intptr_t*)                              \
++                                                                                                                                     \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_STRUCTS_OS_CPU macro (and must  */
++  /* be present there)                                                */
++
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)                               \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used  */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_TYPES_OS_CPU macro (and must    */
++  /* be present there)                                                */
++
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used        */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_INT_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                      */
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)                                                              \
++
++  /* NOTE that we do not use the last_entry() macro here; it is used         */
++  /* in vmStructs_<os>_<cpu>.hpp's VM_LONG_CONSTANTS_OS_CPU macro (and must  */
++  /* be present there)                                                       */
++
++#endif // CPU_MIPS_VM_VMSTRUCTS_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.cpp b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp
+new file mode 100644
+index 0000000000..ac2a43edce
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.cpp
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_mips.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  if (is_loongson()) {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Loongson MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Loongson MIPS %s", cpu_features());
++  } else {
++    snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "MIPS");
++    snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "MIPS %s", cpu_features());
++  }
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/mips/vm_version_ext_mips.hpp b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp
+new file mode 100644
+index 0000000000..ffdcff0677
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_ext_mips.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_EXT_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vm_version_mips.cpp b/src/hotspot/cpu/mips/vm_version_mips.cpp
+new file mode 100644
+index 0000000000..2e7b61390e
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_mips.cpp
+@@ -0,0 +1,516 @@
++/*
++ * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#ifdef TARGET_OS_FAMILY_linux
++# include "os_linux.inline.hpp"
++#endif
++
++int VM_Version::_cpuFeatures;
++const char* VM_Version::_features_str = "";
++VM_Version::CpuidInfo VM_Version::_cpuid_info   = { 0, };
++volatile bool VM_Version::_is_determine_cpucfg_supported_running = false;
++bool VM_Version::_is_cpucfg_instruction_supported = true;
++bool VM_Version::_cpu_info_is_initialized = false;
++
++static BufferBlob* stub_blob;
++static const int stub_size = 600;
++
++extern "C" {
++  typedef void (*get_cpu_info_stub_t)(void*);
++}
++static get_cpu_info_stub_t get_cpu_info_stub = NULL;
++
++
++class VM_Version_StubGenerator: public StubCodeGenerator {
++ public:
++
++  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++
++  address generate_get_cpu_info() {
++    assert(!VM_Version::cpu_info_is_initialized(), "VM_Version should not be initialized");
++    StubCodeMark mark(this, "VM_Version", "get_cpu_info_stub");
++#   define __ _masm->
++
++    address start = __ pc();
++
++    __ enter();
++    __ push(AT);
++    __ push(V0);
++
++    __ li(AT, (long)0);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id0_offset()));
++
++    __ li(AT, 1);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id1_offset()));
++
++    __ li(AT, 2);
++    __ cpucfg(V0, AT);
++    __ lw(AT, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++    __ sw(V0, A0, in_bytes(VM_Version::Loongson_Cpucfg_id2_offset()));
++
++    __ pop(V0);
++    __ pop(AT);
++    __ leave();
++    __ jr(RA);
++    __ delayed()->nop();
++#   undef __
++
++    return start;
++  };
++};
++
++uint32_t VM_Version::get_feature_flags_by_cpucfg() {
++  uint32_t result = 0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MMI != 0)
++    result |= CPU_MMI;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA1 != 0)
++    result |= CPU_MSA1_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.MSA2 != 0)
++    result |= CPU_MSA2_0;
++  if (_cpuid_info.cpucfg_info_id1.bits.CGP != 0)
++    result |= CPU_CGP;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX1 != 0)
++    result |= CPU_LSX1;
++  if (_cpuid_info.cpucfg_info_id1.bits.LSX2 != 0)
++    result |= CPU_LSX2;
++  if (_cpuid_info.cpucfg_info_id1.bits.LASX != 0)
++    result |= CPU_LASX;
++  if (_cpuid_info.cpucfg_info_id1.bits.LLSYNC != 0)
++    result |= CPU_LLSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.TGTSYNC != 0)
++    result |= CPU_TGTSYNC;
++  if (_cpuid_info.cpucfg_info_id1.bits.MUALP != 0)
++    result |= CPU_MUALP;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT1 != 0)
++    result |= CPU_LEXT1;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT2 != 0)
++    result |= CPU_LEXT2;
++  if (_cpuid_info.cpucfg_info_id2.bits.LEXT3 != 0)
++    result |= CPU_LEXT3;
++  if (_cpuid_info.cpucfg_info_id2.bits.LAMO != 0)
++    result |= CPU_LAMO;
++  if (_cpuid_info.cpucfg_info_id2.bits.LPIXU != 0)
++    result |= CPU_LPIXU;
++
++  result |= CPU_ULSYNC;
++
++  return result;
++}
++
++void read_cpu_info(const char *path, char *result) {
++  FILE *ptr;
++  char buf[1024];
++  int i = 0;
++  if((ptr=fopen(path, "r")) != NULL) {
++    while(fgets(buf, 1024, ptr)!=NULL) {
++      strcat(result,buf);
++      i++;
++      if (i == 10) break;
++    }
++    fclose(ptr);
++  } else {
++    warning("Can't detect CPU info - cannot open %s", path);
++  }
++}
++
++void strlwr(char *str) {
++  for (; *str!='\0'; str++)
++    *str = tolower(*str);
++}
++
++int VM_Version::get_feature_flags_by_cpuinfo(int features) {
++  assert(!cpu_info_is_initialized(), "VM_Version should not be initialized");
++
++  char res[10240];
++  int i;
++  memset(res, '\0', 10240 * sizeof(char));
++  read_cpu_info("/proc/cpuinfo", res);
++  // res is converted to lower case
++  strlwr(res);
++
++  if (strstr(res, "loongson")) {
++    // Loongson CPU
++    features |= CPU_LOONGSON;
++
++    const struct Loongson_Cpuinfo loongson_cpuinfo[] = {
++      {L_3A1000,  "3a1000"},
++      {L_3B1500,  "3b1500"},
++      {L_3A2000,  "3a2000"},
++      {L_3B2000,  "3b2000"},
++      {L_3A3000,  "3a3000"},
++      {L_3B3000,  "3b3000"},
++      {L_2K1000,  "2k1000"},
++      {L_UNKNOWN, "unknown"}
++    };
++
++    // Loongson Family
++    int detected = 0;
++    for (i = 0; i <= L_UNKNOWN; i++) {
++      switch (i) {
++        // 3A1000 and 3B1500 may use an old kernel and further comparsion is needed
++        // test PRID REV in /proc/cpuinfo
++        // 3A1000: V0.5, model name: ICT Loongson-3A V0.5  FPU V0.1
++        // 3B1500: V0.7, model name: ICT Loongson-3B V0.7  FPU V0.1
++        case L_3A1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3a v0.5")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3A1000 platform");
++          }
++          break;
++        case L_3B1500:
++          if (strstr(res, loongson_cpuinfo[i].match_str) || strstr(res, "loongson-3b v0.7")) {
++            features |= CPU_LOONGSON_GS464;
++            detected++;
++            //tty->print_cr("3B1500 platform");
++          }
++          break;
++        case L_3A2000:
++        case L_3B2000:
++        case L_3A3000:
++        case L_3B3000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS464E;
++            detected++;
++            //tty->print_cr("3A2000/3A3000/3B2000/3B3000 platform");
++          }
++          break;
++        case L_2K1000:
++          if (strstr(res, loongson_cpuinfo[i].match_str)) {
++            features |= CPU_LOONGSON_GS264;
++            detected++;
++            //tty->print_cr("2K1000 platform");
++          }
++          break;
++        case L_UNKNOWN:
++          if (detected == 0) {
++            detected++;
++            //tty->print_cr("unknown Loongson platform");
++          }
++          break;
++        default:
++          ShouldNotReachHere();
++      }
++    }
++    assert (detected == 1, "one and only one of LOONGSON_CPU_FAMILY should be detected");
++  } else { // not Loongson
++    // Not Loongson CPU
++    //tty->print_cr("MIPS platform");
++  }
++
++  if (features & CPU_LOONGSON_GS264) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++    features |= CPU_MSA1_0;
++    features |= CPU_LSX1;
++  } else if (features & CPU_LOONGSON_GS464) {
++    features |= CPU_LEXT1;
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++  } else if (features & CPU_LOONGSON_GS464E) {
++    features |= CPU_LEXT1;
++    features |= CPU_LEXT2;
++    features |= CPU_LEXT3;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  } else if (features & CPU_LOONGSON) {
++    // unknow loongson
++    features |= CPU_LLSYNC;
++    features |= CPU_TGTSYNC;
++    features |= CPU_ULSYNC;
++  }
++  VM_Version::_cpu_info_is_initialized = true;
++
++  return features;
++}
++
++void VM_Version::get_processor_features() {
++
++  clean_cpuFeatures();
++
++  // test if cpucfg instruction is supported
++  VM_Version::_is_determine_cpucfg_supported_running = true;
++  __asm__ __volatile__(
++    ".insn \n\t"
++    ".word (0xc8080118)\n\t" // cpucfg zero, zero
++    :
++    :
++    :
++    );
++  VM_Version::_is_determine_cpucfg_supported_running = false;
++
++  if (supports_cpucfg()) {
++    get_cpu_info_stub(&_cpuid_info);
++    _cpuFeatures = get_feature_flags_by_cpucfg();
++    // Only Loongson CPUs support cpucfg
++    _cpuFeatures |= CPU_LOONGSON;
++  } else {
++    _cpuFeatures = get_feature_flags_by_cpuinfo(0);
++  }
++
++  _supports_cx8 = true;
++
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_CMDLINE(uintx, MaxGCPauseMillis, 650);
++  }
++
++#ifdef COMPILER2
++  if (MaxVectorSize > 0) {
++    if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2");
++      MaxVectorSize = 8;
++    }
++    if (MaxVectorSize > 0 && supports_ps()) {
++      MaxVectorSize = 8;
++    } else {
++      MaxVectorSize = 0;
++    }
++  }
++  //
++  // Vector optimization of MIPS works in most cases, but cannot pass hotspot/test/compiler/6340864/TestFloatVect.java.
++  // Vector optimization was closed by default.
++  // The reasons:
++  // 1. The kernel does not have emulation of PS instructions yet, so the emulation of PS instructions must be done in JVM, see JVM_handle_linux_signal.
++  // 2. It seems the gcc4.4.7 had some bug related to ucontext_t, which is used in signal handler to emulate PS instructions.
++  //
++  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++    MaxVectorSize = 0;
++  }
++
++#endif
++
++  if (needs_llsync() && needs_tgtsync() && !needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 1000);
++    }
++  } else if (!needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 2000);
++    }
++  } else if (!needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 3000);
++    }
++  } else if (needs_llsync() && !needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 4000);
++    }
++  } else if (needs_llsync() && needs_tgtsync() && needs_ulsync()) {
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  } else {
++    assert(false, "Should Not Reach Here, what is the cpu type?");
++    if (FLAG_IS_DEFAULT(UseSyncLevel)) {
++      FLAG_SET_DEFAULT(UseSyncLevel, 10000);
++    }
++  }
++
++  if (supports_lext1()) {
++    if (FLAG_IS_DEFAULT(UseLEXT1)) {
++      FLAG_SET_DEFAULT(UseLEXT1, true);
++    }
++  } else if (UseLEXT1) {
++    warning("LEXT1 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT1, false);
++  }
++
++  if (supports_lext2()) {
++    if (FLAG_IS_DEFAULT(UseLEXT2)) {
++      FLAG_SET_DEFAULT(UseLEXT2, true);
++    }
++  } else if (UseLEXT2) {
++    warning("LEXT2 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT2, false);
++  }
++
++  if (supports_lext3()) {
++    if (FLAG_IS_DEFAULT(UseLEXT3)) {
++      FLAG_SET_DEFAULT(UseLEXT3, true);
++    }
++  } else if (UseLEXT3) {
++    warning("LEXT3 instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseLEXT3, false);
++  }
++
++  if (UseLEXT2) {
++    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64)) {
++      FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 1);
++    }
++  } else if (UseCountTrailingZerosInstructionMIPS64) {
++    if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstructionMIPS64))
++      warning("ctz/dctz instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseCountTrailingZerosInstructionMIPS64, 0);
++  }
++
++  if (TieredCompilation) {
++    if (!FLAG_IS_DEFAULT(TieredCompilation))
++      warning("TieredCompilation not supported");
++    FLAG_SET_DEFAULT(TieredCompilation, false);
++  }
++
++  char buf[256];
++  bool is_unknown_loongson_cpu = is_loongson() && !is_gs464() && !is_gs464e() && !is_gs264() && !supports_cpucfg();
++
++  // A note on the _features_string format:
++  //   There are jtreg tests checking the _features_string for various properties.
++  //   For some strange reason, these tests require the string to contain
++  //   only _lowercase_ characters. Keep that in mind when being surprised
++  //   about the unusual notation of features - and when adding new ones.
++  //   Features may have one comma at the end.
++  //   Furthermore, use one, and only one, separator space between features.
++  //   Multiple spaces are considered separate tokens, messing up everything.
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s, usesynclevel:%d",
++              (is_loongson()           ?  "mips-compatible loongson cpu"  : "mips cpu"),
++              (is_gs464()              ?  ", gs464 (3a1000/3b1500)" : ""),
++              (is_gs464e()             ?  ", gs464e (3a2000/3a3000/3b2000/3b3000)" : ""),
++              (is_gs264()              ?  ", gs264 (2k1000)" : ""),
++              (is_unknown_loongson_cpu ?  ", unknown loongson cpu" : ""),
++              (supports_dsp()          ?  ", dsp" : ""),
++              (supports_ps()           ?  ", ps" : ""),
++              (supports_3d()           ?  ", 3d" : ""),
++              (supports_mmi()          ?  ", mmi" : ""),
++              (supports_msa1_0()       ?  ", msa1_0" : ""),
++              (supports_msa2_0()       ?  ", msa2_0" : ""),
++              (supports_lsx1()         ?  ", lsx1" : ""),
++              (supports_lsx2()         ?  ", lsx2" : ""),
++              (supports_lasx()         ?  ", lasx" : ""),
++              (supports_lext1()        ?  ", lext1" : ""),
++              (supports_lext2()        ?  ", lext2" : ""),
++              (supports_lext3()        ?  ", lext3" : ""),
++              (supports_cgp()          ?  ", aes, crc, sha1, sha256, sha512" : ""),
++              (supports_lamo()         ?  ", lamo" : ""),
++              (supports_lpixu()        ?  ", lpixu" : ""),
++              (needs_llsync()          ?  ", llsync" : ""),
++              (needs_tgtsync()         ?  ", tgtsync": ""),
++              (needs_ulsync()          ?  ", ulsync": ""),
++              (supports_mualp()        ?  ", mualp" : ""),
++              UseSyncLevel);
++  _features_str = strdup(buf);
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
++    warning("SHA intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  if (UseAES) {
++    if (!FLAG_IS_DEFAULT(UseAES)) {
++      warning("AES instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAES, false);
++    }
++  }
++
++  if (UseCRC32Intrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32Intrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++    }
++  }
++
++  if (UseCRC32CIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++      warning("CRC32CIntrinsics instructions are not available on this CPU");
++      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
++    }
++  }
++
++  if (UseAESIntrinsics) {
++    if (!FLAG_IS_DEFAULT(UseAESIntrinsics)) {
++      warning("AES intrinsics are not available on this CPU");
++      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++    }
++  }
++
++#ifdef COMPILER2
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  if (FLAG_IS_DEFAULT(UseFMA)) {
++    FLAG_SET_DEFAULT(UseFMA, true);
++  }
++
++  UNSUPPORTED_OPTION(CriticalJNINatives);
++}
++
++void VM_Version::initialize() {
++  ResourceMark rm;
++  // Making this stub must be FIRST use of assembler
++
++  stub_blob = BufferBlob::create("get_cpu_info_stub", stub_size);
++  if (stub_blob == NULL) {
++    vm_exit_during_initialization("Unable to allocate get_cpu_info_stub");
++  }
++  CodeBuffer c(stub_blob);
++  VM_Version_StubGenerator g(&c);
++  get_cpu_info_stub = CAST_TO_FN_PTR(get_cpu_info_stub_t,
++                                     g.generate_get_cpu_info());
++
++  get_processor_features();
++}
+diff --git a/src/hotspot/cpu/mips/vm_version_mips.hpp b/src/hotspot/cpu/mips/vm_version_mips.hpp
+new file mode 100644
+index 0000000000..733a0af295
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vm_version_mips.hpp
+@@ -0,0 +1,221 @@
++/*
++ * Copyright (c) 1997, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++#define CPU_MIPS_VM_VM_VERSION_MIPS_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version: public Abstract_VM_Version {
++public:
++
++  union Loongson_Cpucfg_Id1 {
++    uint32_t value;
++    struct {
++      uint32_t FP_CFG  : 1,
++               FPREV   : 3,
++               MMI     : 1,
++               MSA1    : 1,
++               MSA2    : 1,
++               CGP     : 1,
++               WRP     : 1,
++               LSX1    : 1,
++               LSX2    : 1,
++               LASX    : 1,
++               R6FXP   : 1,
++               R6CRCP  : 1,
++               R6FPP   : 1,
++               CNT64   : 1,
++               LSLDR0  : 1,
++               LSPREF  : 1,
++               LSPREFX : 1,
++               LSSYNCI : 1,
++               LSUCA   : 1,
++               LLSYNC  : 1,
++               TGTSYNC : 1,
++               LLEXC   : 1,
++               SCRAND  : 1,
++               MUALP   : 1,
++               KMUALEn : 1,
++               ITLBT   : 1,
++               LSUPERF : 1,
++               SFBP    : 1,
++               CDMAP   : 1,
++                       : 1;
++    } bits;
++  };
++
++  union Loongson_Cpucfg_Id2 {
++    uint32_t value;
++    struct {
++      uint32_t LEXT1    : 1,
++               LEXT2    : 1,
++               LEXT3    : 1,
++               LSPW     : 1,
++               LBT1     : 1,
++               LBT2     : 1,
++               LBT3     : 1,
++               LBTMMU   : 1,
++               LPMP     : 1,
++               LPMRev   : 3,
++               LAMO     : 1,
++               LPIXU    : 1,
++               LPIXNU   : 1,
++               LVZP     : 1,
++               LVZRev   : 3,
++               LGFTP    : 1,
++               LGFTRev  : 3,
++               LLFTP    : 1,
++               LLFTRev  : 3,
++               LCSRP    : 1,
++               DISBLKLY : 1,
++                        : 3;
++    } bits;
++  };
++
++protected:
++
++  enum {
++    CPU_LOONGSON          = (1 << 1),
++    CPU_LOONGSON_GS464    = (1 << 2),
++    CPU_LOONGSON_GS464E   = (1 << 3),
++    CPU_LOONGSON_GS264    = (1 << 4),
++    CPU_MMI               = (1 << 11),
++    CPU_MSA1_0            = (1 << 12),
++    CPU_MSA2_0            = (1 << 13),
++    CPU_CGP               = (1 << 14),
++    CPU_LSX1              = (1 << 15),
++    CPU_LSX2              = (1 << 16),
++    CPU_LASX              = (1 << 17),
++    CPU_LEXT1             = (1 << 18),
++    CPU_LEXT2             = (1 << 19),
++    CPU_LEXT3             = (1 << 20),
++    CPU_LAMO              = (1 << 21),
++    CPU_LPIXU             = (1 << 22),
++    CPU_LLSYNC            = (1 << 23),
++    CPU_TGTSYNC           = (1 << 24),
++    CPU_ULSYNC           = (1 << 25),
++    CPU_MUALP             = (1 << 26),
++
++    //////////////////////add some other feature here//////////////////
++  } cpuFeatureFlags;
++
++  enum Loongson_Family {
++    L_3A1000    = 0,
++    L_3B1500    = 1,
++    L_3A2000    = 2,
++    L_3B2000    = 3,
++    L_3A3000    = 4,
++    L_3B3000    = 5,
++    L_2K1000    = 6,
++    L_UNKNOWN   = 7
++  };
++
++  struct Loongson_Cpuinfo {
++    Loongson_Family    id;
++    const char* const  match_str;
++  };
++
++  static int  _cpuFeatures;
++  static const char* _features_str;
++  static volatile bool _is_determine_cpucfg_supported_running;
++  static bool _is_cpucfg_instruction_supported;
++  static bool _cpu_info_is_initialized;
++
++  struct CpuidInfo {
++    uint32_t            cpucfg_info_id0;
++    Loongson_Cpucfg_Id1 cpucfg_info_id1;
++    Loongson_Cpucfg_Id2 cpucfg_info_id2;
++    uint32_t            cpucfg_info_id3;
++    uint32_t            cpucfg_info_id4;
++    uint32_t            cpucfg_info_id5;
++    uint32_t            cpucfg_info_id6;
++    uint32_t            cpucfg_info_id8;
++  };
++
++  // The actual cpuid info block
++  static CpuidInfo _cpuid_info;
++
++  static uint32_t get_feature_flags_by_cpucfg();
++  static int      get_feature_flags_by_cpuinfo(int features);
++  static void     get_processor_features();
++
++public:
++  // Offsets for cpuid asm stub
++  static ByteSize Loongson_Cpucfg_id0_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id0); }
++  static ByteSize Loongson_Cpucfg_id1_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id1); }
++  static ByteSize Loongson_Cpucfg_id2_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id2); }
++  static ByteSize Loongson_Cpucfg_id3_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id3); }
++  static ByteSize Loongson_Cpucfg_id4_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id4); }
++  static ByteSize Loongson_Cpucfg_id5_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id5); }
++  static ByteSize Loongson_Cpucfg_id6_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id6); }
++  static ByteSize Loongson_Cpucfg_id8_offset() { return byte_offset_of(CpuidInfo, cpucfg_info_id8); }
++
++  static bool is_determine_features_test_running() { return _is_determine_cpucfg_supported_running; }
++
++  static void clean_cpuFeatures()   { _cpuFeatures = 0; }
++
++  // Initialization
++  static void initialize();
++
++  static bool cpu_info_is_initialized()                   { return _cpu_info_is_initialized; }
++
++  static bool supports_cpucfg()                  { return _is_cpucfg_instruction_supported; }
++  static bool set_supports_cpucfg(bool value)    { return _is_cpucfg_instruction_supported = value; }
++
++  static bool is_loongson()      { return _cpuFeatures & CPU_LOONGSON; }
++  static bool is_gs264()         { return _cpuFeatures & CPU_LOONGSON_GS264; }
++  static bool is_gs464()         { return _cpuFeatures & CPU_LOONGSON_GS464; }
++  static bool is_gs464e()        { return _cpuFeatures & CPU_LOONGSON_GS464E; }
++  static bool supports_dsp()     { return 0; /*not supported yet*/}
++  static bool supports_ps()      { return 0; /*not supported yet*/}
++  static bool supports_3d()      { return 0; /*not supported yet*/}
++  static bool supports_msa1_0()  { return _cpuFeatures & CPU_MSA1_0; }
++  static bool supports_msa2_0()  { return _cpuFeatures & CPU_MSA2_0; }
++  static bool supports_cgp()     { return _cpuFeatures & CPU_CGP; }
++  static bool supports_mmi()     { return _cpuFeatures & CPU_MMI; }
++  static bool supports_lsx1()    { return _cpuFeatures & CPU_LSX1; }
++  static bool supports_lsx2()    { return _cpuFeatures & CPU_LSX2; }
++  static bool supports_lasx()    { return _cpuFeatures & CPU_LASX; }
++  static bool supports_lext1()   { return _cpuFeatures & CPU_LEXT1; }
++  static bool supports_lext2()   { return _cpuFeatures & CPU_LEXT2; }
++  static bool supports_lext3()   { return _cpuFeatures & CPU_LEXT3; }
++  static bool supports_lamo()    { return _cpuFeatures & CPU_LAMO; }
++  static bool supports_lpixu()   { return _cpuFeatures & CPU_LPIXU; }
++  static bool needs_llsync()     { return _cpuFeatures & CPU_LLSYNC; }
++  static bool needs_tgtsync()    { return _cpuFeatures & CPU_TGTSYNC; }
++  static bool needs_ulsync()     { return _cpuFeatures & CPU_ULSYNC; }
++  static bool supports_mualp()   { return _cpuFeatures & CPU_MUALP; }
++
++  //mips has no such instructions, use ll/sc instead
++  static bool supports_compare_and_exchange() { return false; }
++
++  static const char* cpu_features()           { return _features_str; }
++
++};
++
++#endif // CPU_MIPS_VM_VM_VERSION_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.cpp b/src/hotspot/cpu/mips/vmreg_mips.cpp
+new file mode 100644
+index 0000000000..86bd74d430
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    regName[i++] = reg->name();
++    regName[i++] = reg->name();
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    regName[i++] = freg->name();
++    regName[i++] = freg->name();
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.hpp b/src/hotspot/cpu/mips/vmreg_mips.hpp
+new file mode 100644
+index 0000000000..8ccc8c513c
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_HPP
++
++inline Register as_Register() {
++  assert( is_Register(), "must be");
++  return ::as_Register(value() >> 1);
++}
++
++inline FloatRegister as_FloatRegister() {
++  assert( is_FloatRegister(), "must be" );
++  assert( is_even(value()), "must be" );
++  return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++}
++
++inline bool is_Register() {
++  return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++}
++
++inline bool is_FloatRegister() {
++  return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++}
++
++inline   bool is_concrete() {
++  assert(is_reg(), "must be");
++  if(is_Register()) return true;
++  if(is_FloatRegister()) return true;
++  assert(false, "what register?");
++  return false;
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_HPP
+diff --git a/src/hotspot/cpu/mips/vmreg_mips.inline.hpp b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp
+new file mode 100644
+index 0000000000..12ad7361aa
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vmreg_mips.inline.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2006, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++#define CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() << 1 );
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_MIPS_VM_VMREG_MIPS_INLINE_HPP
+diff --git a/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp
+new file mode 100644
+index 0000000000..75c23e8088
+--- /dev/null
++++ b/src/hotspot/cpu/mips/vtableStubs_mips_64.cpp
+@@ -0,0 +1,340 @@
++/*
++ * Copyright (c) 2003, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_mips.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_mips.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#define T0 RT0
++#define T1 RT1
++#define T2 RT2
++#define T3 RT3
++#define T8 RT8
++#define T9 RT9
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++// used by compiler only;  reciever in T0.
++// used registers :
++// Rmethod : receiver klass & method
++// NOTE: If this code is used by the C1, the receiver_location is always 0.
++// when reach here, receiver in T0, klass in T8
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++  Register t1 = T8, t2 = Rmethod;
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(t1, AT , 0);
++    __ addiu(t1, t1, 1);
++    __ sw(t1, AT,0);
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  //assert(receiver_location == T0->as_VMReg(), "receiver expected in T0");
++
++  // get receiver klass
++  address npe_addr = __ pc();
++  //add for compressedoops
++  __ load_klass(t1, T0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    // check offset vs vtable length
++    __ lw(t2, t1, in_bytes(Klass::vtable_length_offset()));
++    assert(Assembler::is_simm16(vtable_index*vtableEntry::size()), "change this code");
++    __ move(AT, vtable_index*vtableEntry::size());
++    __ slt(AT, AT, t2);
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ move(A2, vtable_index);
++    __ move(A1, A0);
++
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), A1, A2);
++    const ptrdiff_t estimate = 512;
++    const ptrdiff_t codesize = __ pc() - start_pc;
++    slop_delta  = estimate - codesize;  // call_VM varies in length, depending on data
++    assert(slop_delta >= 0, "vtable #%d: Code size estimate (%d) for DebugVtables too small, required: %d", vtable_index, (int)estimate, (int)codesize);
++    __ bind(L);
++  }
++#endif // PRODUCT
++  const Register method = Rmethod;
++
++  // load methodOop and target address
++  start_pc = __ pc();
++  // lookup_virtual_method generates 18 instructions (worst case)
++  __ lookup_virtual_method(t1, vtable_index, method);
++  slop_delta  = 18*BytesPerInstWord - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ beq(method, R0, L);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L);
++    __ delayed()->nop();
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // T8: receiver klass
++  // T0: receiver
++  // Rmethod: methodOop
++  // T9: entry
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++// used registers :
++//  T1 T2
++// when reach here, the receiver in T0, klass in T1
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  int       load_const_maxLen = 6*BytesPerInstWord;  // load_const generates 6 instructions. Assume that as max size for li
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++  // we T8,T9 as temparary register, they are free from register allocator
++  Register t1 = T8, t2 = T2;
++  // Entry arguments:
++  //  T1: Interface
++  //  T0: Receiver
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    start_pc = __ pc();
++    __ li(AT, SharedRuntime::nof_megamorphic_calls_addr());
++    slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ lw(T8, AT, 0);
++    __ addiu(T8, T8,1);
++    __ sw(T8, AT, 0);
++  }
++#endif // PRODUCT
++
++  const Register holder_klass_reg   = T1; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = Rmethod; // resolved interface klass (REFC)
++
++  const Register icholder_reg = T1;
++  __ ld_ptr(resolved_klass_reg, icholder_reg, CompiledICHolder::holder_klass_offset());
++  __ ld_ptr(holder_klass_reg,   icholder_reg, CompiledICHolder::holder_metadata_offset());
++
++  Label L_no_such_interface;
++
++  // get receiver klass (also an implicit null-check)
++  address npe_addr = __ pc();
++  __ load_klass(t1, T0);
++  {
++    // x86 use lookup_interface_method, but lookup_interface_method does not work on MIPS.
++    // No dynamic code size variance here, so slop_bytes is not needed.
++    const int base = in_bytes(Klass::vtable_start_offset());
++    assert(vtableEntry::size() * wordSize == 8, "adjust the scaling in the code below");
++    assert(Assembler::is_simm16(base), "change this code");
++    __ daddiu(t2, t1, base);
++    __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++    __ dsll(AT, AT, Address::times_8);
++    __ daddu(t2, t2, AT);
++    if (HeapWordsPerLong > 1) {
++      __ round_to(t2, BytesPerLong);
++    }
++
++    Label hit, entry;
++    __ bind(entry);
++
++    // Check that the entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++    __ beq(AT, R0, L_no_such_interface);
++    __ delayed()->nop();
++
++    __ bne(AT, resolved_klass_reg, entry);
++    __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  }
++
++  // add for compressedoops
++  __ load_klass(t1, T0);
++  // compute itable entry offset (in words)
++  const int base = in_bytes(Klass::vtable_start_offset());
++  __ daddiu(t2, t1, base);
++  __ lw(AT, t1, in_bytes(Klass::vtable_length_offset()));
++  __ dsll(AT, AT, Address::times_8);
++  __ daddu(t2, t2, AT);
++  if (HeapWordsPerLong > 1) {
++    __ round_to(t2, BytesPerLong);
++  }
++
++  Label hit, entry;
++  __ bind(entry);
++
++  // Check that the entry is non-null.  A null entry means that
++  // the receiver class doesn't implement the interface, and wasn't the
++  // same as when the caller was compiled.
++  __ ld_ptr(AT, t2, itableOffsetEntry::interface_offset_in_bytes());
++  __ beq(AT, R0, L_no_such_interface);
++  __ delayed()->nop();
++
++  __ bne(AT, holder_klass_reg, entry);
++  __ delayed()->addiu(t2, t2, itableOffsetEntry::size() * wordSize);
++
++  // We found a hit, move offset into T9
++  __ ld_ptr(t2, t2, itableOffsetEntry::offset_offset_in_bytes() - itableOffsetEntry::size() * wordSize);
++
++  // Compute itableMethodEntry.
++  const int method_offset = (itableMethodEntry::size() * wordSize * itable_index) +
++    itableMethodEntry::method_offset_in_bytes();
++
++  // Get methodOop and entrypoint for compiler
++  const Register method = Rmethod;
++  __ dsll(AT, t2, Address::times_1);
++  __ addu(AT, AT, t1 );
++  start_pc = __ pc();
++  __ set64(t1, method_offset);
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ addu(AT, AT, t1 );
++  __ ld_ptr(method, AT, 0);
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L1;
++    __ beq(method, R0, L1);
++    __ delayed()->nop();
++    __ ld(AT, method,in_bytes(Method::from_compiled_offset()));
++    __ bne(AT, R0, L1);
++    __ delayed()->nop();
++    __ stop("methodOop is null");
++    __ bind(L1);
++  }
++#endif // ASSERT
++
++  // Rmethod: methodOop
++  // T0: receiver
++  // T9: entry point
++  address ame_addr = __ pc();
++  __ ld_ptr(T9, method,in_bytes(Method::from_compiled_offset()));
++  __ jr(T9);
++  __ delayed()->nop();
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  start_pc = __ pc();
++  __ set64(T9, (long)SharedRuntime::get_handle_wrong_method_stub());
++  slop_delta  = load_const_maxLen - (__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++  __ jr(T9);
++  __ delayed()->nop();
++
++  masm->flush();
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, 0);
++
++  return s;
++}
++
++// NOTE : whenever you change the code above, dont forget to change the const here
++int VtableStub::pd_code_alignment() {
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff --git a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+index 847f7d61d2..f570946090 100644
+--- a/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
++++ b/src/hotspot/cpu/ppc/c1_LIRAssembler_ppc.cpp
+@@ -488,6 +488,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   Bytecodes::Code code = op->bytecode();
+@@ -1608,6 +1611,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   __ bind(skip);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                              CodeEmitInfo* info, bool pop_fpu_stack) {
+diff --git a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
+index d34ea45c0b..f6b6dbdee3 100644
+--- a/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
++++ b/src/hotspot/cpu/ppc/c1_LIRGenerator_ppc.cpp
+@@ -273,21 +273,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ move(temp, addr);
+ }
+ 
+-
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr tmp = FrameMap::R0_opr;
+   __ load(new LIR_Address(base, disp, T_INT), tmp, info);
+-  __ cmp(condition, tmp, c);
++  __ cmp_branch(condition, tmp, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base,
+-                               int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr tmp = FrameMap::R0_opr;
+   __ load(new LIR_Address(base, disp, type), tmp, info);
+-  __ cmp(condition, reg, tmp);
++  __ cmp_branch(condition, reg, tmp, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff --git a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
+index ef9b0833d3..c6b25bf10e 100644
+--- a/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
++++ b/src/hotspot/cpu/ppc/c1_LIR_ppc.cpp
+@@ -62,3 +62,24 @@ void LIR_Address::verify() const {
+ #endif
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+index 897be2209e..0c27cc20f3 100644
+--- a/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
++++ b/src/hotspot/cpu/s390/c1_LIRAssembler_s390.cpp
+@@ -379,6 +379,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+@@ -1503,6 +1506,10 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   }
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest,
+                              CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+diff --git a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
+index ae297ac635..c786803e0f 100644
+--- a/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
++++ b/src/hotspot/cpu/s390/c1_LIRGenerator_s390.cpp
+@@ -213,16 +213,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr scratch = FrameMap::Z_R1_opr;
+   __ load(new LIR_Address(base, disp, T_INT), scratch, info);
+-  __ cmp(condition, scratch, c);
++  __ cmp_branch(condition, scratch, c, T_INT, tgt);
+ }
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
++
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
++  __ branch(condition, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
++
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   if (tmp->is_valid()) {
+     if (is_power_of_2(c + 1)) {
+diff --git a/src/hotspot/cpu/s390/c1_LIR_s390.cpp b/src/hotspot/cpu/s390/c1_LIR_s390.cpp
+index 9507ca0856..2116e9af2b 100644
+--- a/src/hotspot/cpu/s390/c1_LIR_s390.cpp
++++ b/src/hotspot/cpu/s390/c1_LIR_s390.cpp
+@@ -56,3 +56,23 @@ void LIR_Address::verify() const {
+ }
+ #endif // PRODUCT
+ 
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
+index e503159eb7..2e5609fec8 100644
+--- a/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
++++ b/src/hotspot/cpu/sparc/c1_LIRAssembler_sparc.cpp
+@@ -599,6 +599,9 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   // The peephole pass fills the delay slot
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   Bytecodes::Code code = op->bytecode();
+@@ -1638,6 +1641,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   __ bind(skip);
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "unused on this code path");
+diff --git a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
+index a09a159722..a02ffafc77 100644
+--- a/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
++++ b/src/hotspot/cpu/sparc/c1_LIRGenerator_sparc.cpp
+@@ -267,19 +267,29 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ move(temp, addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   LIR_Opr o7opr = FrameMap::O7_opr;
+   __ load(new LIR_Address(base, disp, T_INT), o7opr, info);
+-  __ cmp(condition, o7opr, c);
++  __ cmp_branch(condition, o7opr, c, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   LIR_Opr o7opr = FrameMap::O7_opr;
+   __ load(new LIR_Address(base, disp, type), o7opr, info);
+-  __ cmp(condition, reg, o7opr);
++  __ cmp_branch(condition, reg, o7opr, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
+   assert(left != result, "should be different registers");
+diff --git a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
+index c21d2c1d9a..9cebb387e2 100644
+--- a/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
++++ b/src/hotspot/cpu/sparc/c1_LIR_sparc.cpp
+@@ -54,3 +54,24 @@ void LIR_Address::verify() const {
+          "wrong type for addresses");
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+index cee3140f4f..7b76eb0b9e 100644
+--- a/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
++++ b/src/hotspot/cpu/x86/c1_LIRAssembler_x86.cpp
+@@ -1442,6 +1442,10 @@ void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
+   }
+ }
+ 
++void LIR_Assembler::emit_opCmpBranch(LIR_OpCmpBranch* op) {
++  ShouldNotReachHere();
++}
++
+ void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
+   LIR_Opr src  = op->in_opr();
+   LIR_Opr dest = op->result_opr();
+@@ -2030,6 +2034,9 @@ void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, L
+   }
+ }
+ 
++void LIR_Assembler::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr result, BasicType type) {
++  ShouldNotReachHere();
++}
+ 
+ void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
+   assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
+diff --git a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+index 905708a9fa..1c6774e1d6 100644
+--- a/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
++++ b/src/hotspot/cpu/x86/c1_LIRGenerator_x86.cpp
+@@ -255,15 +255,27 @@ void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
+   __ add((LIR_Opr)addr, LIR_OprFact::intConst(step), (LIR_Opr)addr);
+ }
+ 
+-void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_mem_int_branch(LIR_Condition condition, LIR_Opr base, int disp, int c, T tgt, CodeEmitInfo* info) {
+   __ cmp_mem_int(condition, base, disp, c, info);
++  __ branch(condition, T_INT, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_mem_int_branch(LIR_Condition, LIR_Opr, int, int, CodeStub*, CodeEmitInfo*);
+ 
+-void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++template<typename T>
++void LIRGenerator::cmp_reg_mem_branch(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, T tgt, CodeEmitInfo* info) {
+   __ cmp_reg_mem(condition, reg, new LIR_Address(base, disp, type), info);
++  __ branch(condition, type, tgt);
+ }
+ 
++// Explicit instantiation for all supported types.
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, Label*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, BlockBegin*, CodeEmitInfo*);
++template void LIRGenerator::cmp_reg_mem_branch(LIR_Condition, LIR_Opr, LIR_Opr, int, BasicType, CodeStub*, CodeEmitInfo*);
+ 
+ bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, jint c, LIR_Opr result, LIR_Opr tmp) {
+   if (tmp->is_valid() && c > 0 && c < max_jint) {
+diff --git a/src/hotspot/cpu/x86/c1_LIR_x86.cpp b/src/hotspot/cpu/x86/c1_LIR_x86.cpp
+index 92277ee063..20e283e302 100644
+--- a/src/hotspot/cpu/x86/c1_LIR_x86.cpp
++++ b/src/hotspot/cpu/x86/c1_LIR_x86.cpp
+@@ -72,3 +72,24 @@ void LIR_Address::verify() const {
+ #endif
+ }
+ #endif // PRODUCT
++
++template<typename T>
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, T tgt, CodeEmitInfo* info) {
++  cmp(condition, left, right, info);
++  branch(condition, type, tgt);
++}
++
++// Explicit instantiation for all supported types.
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, Label*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, BlockBegin*, CodeEmitInfo*);
++template void LIR_List::cmp_branch(LIR_Condition, LIR_Opr, LIR_Opr, BasicType type, CodeStub*, CodeEmitInfo*);
++
++void LIR_List::cmp_branch(LIR_Condition condition, LIR_Opr left, LIR_Opr right, BasicType type, BlockBegin* block, BlockBegin* unordered) {
++  cmp(condition, left, right);
++  branch(condition, type, block, unordered);
++}
++
++void LIR_List::cmp_cmove(LIR_Condition condition, LIR_Opr left, LIR_Opr right, LIR_Opr src1, LIR_Opr src2, LIR_Opr dst, BasicType type) {
++  cmp(condition, left, right);
++  cmove(condition, src1, src2, dst, type);
++}
+diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+index 95d7e51501..8d7b623ee7 100644
+--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.cpp
+@@ -263,7 +263,8 @@ void ZBarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm,
+ #define __ ce->masm()->
+ 
+ void ZBarrierSetAssembler::generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                                         LIR_Opr ref) const {
++                                                         LIR_Opr ref,
++                                                         LIR_Opr res) const {
+   __ testptr(ref->as_register(), address_bad_mask_from_thread(r15_thread));
+ }
+ 
+diff --git a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+index 3687754e71..791e4ed43f 100644
+--- a/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
++++ b/src/hotspot/cpu/x86/gc/z/zBarrierSetAssembler_x86.hpp
+@@ -77,7 +77,8 @@ public:
+ 
+ #ifdef COMPILER1
+   void generate_c1_load_barrier_test(LIR_Assembler* ce,
+-                                     LIR_Opr ref) const;
++                                     LIR_Opr ref,
++                                     LIR_Opr res) const;
+ 
+   void generate_c1_load_barrier_stub(LIR_Assembler* ce,
+                                      ZLoadBarrierStubC1* stub) const;
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index f89a8c360b..68a2a3e4fa 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -23,6 +23,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2021 Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ // no precompiled headers
+ #include "jvm.h"
+ #include "classfile/classLoader.hpp"
+@@ -3977,6 +3983,8 @@ size_t os::Linux::find_large_page_size() {
+     IA64_ONLY(256 * M)
+     PPC_ONLY(4 * M)
+     S390_ONLY(1 * M)
++    MIPS64_ONLY(4 * M)
++    LOONGARCH64_ONLY(4 * M); //In MIPS _large_page_size is seted 4*M. // TODO: LA
+     SPARC_ONLY(4 * M);
+ #endif // ZERO
+ 
+diff --git a/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..30719a0340
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/assembler_linux_loongarch.cpp
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58006,12 +104733,1566 @@ index 0000000000..30719a0340
 + */
 diff --git a/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
 new file mode 100644
-index 0000000000..6c71de772e
+index 0000000000..86f8c963f5
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
++
++#include "runtime/vm_version.hpp"
++
++// Implementation of class atomic
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd
++  : Atomic::AddAndFetch<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D add_and_fetch(I add_value, D volatile* dest, atomic_memory_order order) const {
++    //Unimplemented();
++    return __sync_add_and_fetch(dest, add_value);
++  }
++};
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  T __ret, __tmp;
++
++  STATIC_ASSERT(4 == sizeof(T));
++  __asm__ __volatile__ (
++      "1: ll.w  %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.w  %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value,
++                                             T volatile* dest,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __ret;
++  jlong __tmp;
++  __asm__ __volatile__ (
++      "1: ll.d  %[__ret], %[__dest]  \n\t"
++      "   move  %[__tmp], %[__val]  \n\t"
++      "   sc.d  %[__tmp], %[__dest]  \n\t"
++      "   beqz  %[__tmp], 1b    \n\t"
++
++      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
++      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __ret;
++}
++
++#if 0
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(1 == sizeof(T));
++}
++
++#else
++// No direct support for cmpxchg of bytes; emulate using int.
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++#endif
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++  T __prev;
++  jint __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.w  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.w  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "  dbar 0        \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++
++  return __prev;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  T __prev;
++  jlong __cmp;
++
++  __asm__ __volatile__ (
++      "1: ll.d  %[__prev], %[__dest]    \n\t"
++      "   bne   %[__prev], %[__old], 2f  \n\t"
++      "   move  %[__cmp],  $r0          \n\t"
++      "   move  %[__cmp],  %[__new]  \n\t"
++      "   sc.d  %[__cmp],  %[__dest]  \n\t"
++      "   beqz  %[__cmp],  1b    \n\t"
++      "2:        \n\t"
++      "   dbar 0 \n\t"
++
++      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
++      : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : "memory"
++      );
++  return __prev;
++}
++
++
++#endif // OS_CPU_LINUX_LOONGARCH_ATOMIC_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..c9f675baca
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_LOONGARCH_BYTES_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..826c1fe39a
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
+@@ -0,0 +1,125 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_COPY_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..0b5247aa0b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++define_pd_global(intx, ThreadStackSize,          2048); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        2048);
++
++define_pd_global(intx, CompilerThreadStackSize,  2048);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_LOONGARCH_GLOBALS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
+new file mode 100644
+index 0000000000..ebd73af0c5
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
+@@ -0,0 +1,25 @@
++#
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
+diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..295d20e19e
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
++
++#include "runtime/os.hpp"
++
++// Included in orderAccess.hpp header file.
++
++// Implementation of class OrderAccess.
++#define inlasm_sync() if (os::is_ActiveCoresMP()) \
++                        __asm__ __volatile__ ("nop"   : : : "memory"); \
++                      else \
++                        __asm__ __volatile__ ("dbar 0"   : : : "memory");
++
++inline void OrderAccess::loadload()   { inlasm_sync(); }
++inline void OrderAccess::storestore() { inlasm_sync(); }
++inline void OrderAccess::loadstore()  { inlasm_sync(); }
++inline void OrderAccess::storeload()  { inlasm_sync(); }
++
++inline void OrderAccess::acquire() { inlasm_sync(); }
++inline void OrderAccess::release() { inlasm_sync(); }
++inline void OrderAccess::fence()   { inlasm_sync(); }
++
++
++#undef inlasm_sync
++
++#endif // OS_CPU_LINUX_LOONGARCH_ORDERACCESS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..cf5fff0d04
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
+@@ -0,0 +1,710 @@
++/*
++ * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++#include "compiler/disassembler.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++#define REG_SP 3
++#define REG_FP 22
++
++NOINLINE address os::current_stack_pointer() {
++  register void *sp __asm__ ("$r3");
++  return (address) sp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
++  return (address)uc->uc_mcontext.__pc;
++}
++
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
++  uc->uc_mcontext.__pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  ucontext_t* uc = (ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  return frame(sp, fp, epc.pc());
++}
++
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_context(uc);
++    if (!fr->is_first_java_frame()) {
++      assert(fr->safe_for_sender(thread), "Safety check");
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // In compiled code, the stack banging is performed before LR
++      // has been saved in the frame. RA is live, and SP and FP
++      // belong to the caller.
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)(uc->uc_mcontext.__gregs[1]);
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(fr->safe_for_sender(thread), "Safety check");
++        assert(!fr->is_first_frame(), "Safety check");
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
++
++// By default, gcc always save frame pointer on stack. It may get
++// turned off by -fomit-frame-pointer,
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
++}
++
++frame os::current_frame() {
++  intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset];
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++extern "C" int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++      info->si_signo,
++      info->si_code,
++      info->si_errno,
++      info->si_addr);
++#endif
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE/* || sig == SIGXFSZ*/) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      if (PrintMiscellaneous && (WizardMode || Verbose)) {
++        warning("Ignoring SIGPIPE - see bug 4229104");
++      }
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    handle_assert_poison_fault(ucVoid, info->si_addr);
++    return 1;
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a java thread");
++#endif
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("this thread is a VM thread\n");
++#endif
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults:
++  if (uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
++
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  address pc   = NULL;
++
++  pc = (address) os::Linux::ucontext_get_pc(uc);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("pc=%lx", pc);
++  os::print_context(tty, uc);
++#endif
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++#endif
++
++      // check if fault address is within thread stack
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("stack exception check \n");
++#endif
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in yellow zone\n");
++#endif
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in java\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++#ifdef PRINT_SIGNAL_HANDLE
++            tty->print("this thread is in vm or native codes and return\n");
++#endif
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is in red zone\n");
++#endif
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print("exception addr is neither in yellow zone nor in the red one\n");
++#endif
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      }
++    } // sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("java thread running in java code\n");
++#endif
++
++      // Handle signal from NativeJump::patch_verified_entry().
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++        stub = SharedRuntime::get_handle_wrong_method_stub();
++      } else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("polling address = %lx, sig=%d", os::get_polling_page(), sig);
++#endif
++        stub = SharedRuntime::get_poll_stub(pc);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("cb = %lx, nm = %lx\n", cb, nm);
++#endif
++        if (nm != NULL && nm->has_unsafe_access()) {
++          address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE /* && info->si_code == FPE_INTDIV */) {
++        // HACK: si_code does not work on linux 2.2.12-20!!!
++        int op = pc[0] & 0x3f;
++        int op1 = pc[3] & 0x3f;
++        //FIXME, Must port to LA code!!
++        switch (op) {
++          case 0x1e:  //ddiv
++          case 0x1f:  //ddivu
++          case 0x1a:  //div
++          case 0x1b:  //divu
++          case 0x34:  //trap
++            // In LA, div_by_zero exception can only be triggered by explicit 'trap'.
++            stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                    pc,
++                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++            break;
++          default:
++            // TODO: handle more cases if we are using other x86 instructions
++            //   that can generate SIGFPE signal on linux.
++            tty->print_cr("unknown opcode 0x%X -0x%X with SIGFPE.", op, op1);
++            //fatal("please update this code.");
++        }
++      } else if (sig == SIGSEGV &&
++          !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print("continuation for implicit exception\n");
++#endif
++        // Determination of interpreter/vtable stub/compiled code null exception
++        stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
++#endif
++      }
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("SIGBUS in vm thread \n");
++#endif
++      address next_pc = (address)((unsigned long)pc + sizeof(unsigned int));
++      stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("jni fast get trap: ");
++#endif
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print_cr("addr = %d, stub = %lx", addr, stub);
++#endif
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++        os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++#ifdef PRINT_SIGNAL_HANDLE
++      tty->print("write protecting the memory serialiazation page\n");
++#endif
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  if (stub != NULL) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("resolved stub=%lx\n",stub);
++#endif
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Linux::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++#ifdef PRINT_SIGNAL_HANDLE
++     tty->print_cr("signal chaining\n");
++#endif
++     return true;
++  }
++
++  if (!abort_if_unrecognized) {
++#ifdef PRINT_SIGNAL_HANDLE
++    tty->print_cr("abort becauce of unrecognized\n");
++#endif
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++#ifdef PRINT_SIGNAL_HANDLE
++  tty->print_cr("VMError in signal handler\n");
++#endif
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++void os::Linux::init_thread_fpu_state(void) {
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  return 0; // mute compiler
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++// Minimum usable stack sizes required to get to user code. Space for
++// HotSpot guard pages is added later.
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 64 * K;
++
++// Return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // Default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  ucontext_t *uc = (ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++
++}
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]);
++  st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->cr();
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->cr();
++  st->print(  "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]);
++  st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]);
++  st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]);
++  st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->cr();
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]);
++  st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]);
++  st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->cr();
++  st->print(  "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]);
++  st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->cr();
++  st->print(  "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]);
++  st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  print_hex_dump(st, (address)(sp - 32), (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  st->print_cr("Instructions: (pc=" PTR_FORMAT ")", p2i(pc));
++  print_hex_dump(st, pc - 64, pc + 64, sizeof(char));
++  Disassembler::decode(pc - 80, pc + 80, st);
++}
++
++void os::setup_fpu() {
++  // no use for LA
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // LA does not require the additional stack bang.
++  return 0;
++}
++
++bool os::is_ActiveCoresMP() {
++  return UseActiveCoresMP && _initial_active_processor_count == 1;
++}
+diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..fa02f8ba2f
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/atomic_linux_loongarch.hpp
-@@ -0,0 +1,160 @@
++++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+@@ -0,0 +1,38 @@
 +/*
 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
++
++  static void setup_fpu();
++  static bool is_allocatable(size_t bytes);
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++  static bool is_ActiveCoresMP();
++
++#endif // OS_CPU_LINUX_LOONGARCH_OS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
+new file mode 100644
+index 0000000000..cf3a596387
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++#define OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++// According to previous and present SPECjbb2015 score,
++// comment prefetch is better than if (interval >= 0) prefetch branch.
++// So choose comment prefetch as the base line.
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  0, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++// Ditto
++#if 0
++  __asm__ __volatile__ (
++                        "       preld  8, %[__loc] \n"
++                        :
++                        : [__loc] "m"( *((address)loc + interval) )
++                        : "memory"
++                        );
++#endif
++}
++
++#endif // OS_CPU_LINUX_LOONGARCH_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
+new file mode 100644
+index 0000000000..a1a9f181bd
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
+@@ -0,0 +1,116 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "memory/metaspaceShared.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#ifdef COMPILER2
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
+diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..a3ac28ebd3
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+@@ -0,0 +1,66 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
+new file mode 100644
+index 0000000000..a39cb79bb1
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++#define OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      pid_t)                             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(pid_t)                                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_LOONGARCH_VMSTRUCTS_LINUX_LOONGARCH_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp
+new file mode 100644
+index 0000000000..30719a0340
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/assembler_linux_mips.cpp
+@@ -0,0 +1,24 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
@@ -58034,9 +106315,39 @@ index 0000000000..6c71de772e
 + * questions.
 + *
 + */
+diff --git a/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp
+new file mode 100644
+index 0000000000..cd7cecad63
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/atomic_linux_mips.hpp
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
 +
 +#include "runtime/vm_version.hpp"
 +
@@ -58062,13 +106373,21 @@ index 0000000000..6c71de772e
 +
 +  STATIC_ASSERT(4 == sizeof(T));
 +  __asm__ __volatile__ (
-+      "1: ll.w  %[__ret], %[__dest]  \n\t"
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   ll    %[__ret], %[__dest]  \n\t"
 +      "   move  %[__tmp], %[__val]  \n\t"
-+      "   sc.w  %[__tmp], %[__dest]  \n\t"
++      "   sc    %[__tmp], %[__dest]  \n\t"
 +      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
 +
 +      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
-+      : [__dest] "ZC" (*(volatile jint*)dest), [__val] "r" (exchange_value)
++      : [__dest] "m" (*(volatile jint*)dest), [__val] "r" (exchange_value)
 +      : "memory"
 +      );
 +
@@ -58084,16 +106403,23 @@ index 0000000000..6c71de772e
 +  T __ret;
 +  jlong __tmp;
 +  __asm__ __volatile__ (
-+      "1: ll.d  %[__ret], %[__dest]  \n\t"
++      " .set push\n\t"
++      " .set mips64\n\t"
++      " .set noreorder\n\t"
++
++      "1: sync\n\t"
++      "   lld   %[__ret], %[__dest]  \n\t"
 +      "   move  %[__tmp], %[__val]  \n\t"
-+      "   sc.d  %[__tmp], %[__dest]  \n\t"
++      "   scd   %[__tmp], %[__dest]  \n\t"
 +      "   beqz  %[__tmp], 1b    \n\t"
++      "   nop        \n\t"
++
++      " .set pop\n\t"
 +
 +      : [__ret] "=&r" (__ret), [__tmp] "=&r" (__tmp)
-+      : [__dest] "ZC" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
++      : [__dest] "m" (*(volatile intptr_t*)dest), [__val] "r" (exchange_value)
 +      : "memory"
 +      );
-+
 +  return __ret;
 +}
 +
@@ -58124,17 +106450,25 @@ index 0000000000..6c71de772e
 +  jint __cmp;
 +
 +  __asm__ __volatile__ (
-+      "1: ll.w  %[__prev], %[__dest]    \n\t"
-+      "   bne   %[__prev], %[__old], 2f  \n\t"
-+      "   move  %[__cmp],  $r0          \n\t"
-+      "   move  %[__cmp],  %[__new]  \n\t"
-+      "   sc.w  %[__cmp],  %[__dest]  \n\t"
-+      "   beqz  %[__cmp],  1b    \n\t"
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  ll     %[__prev], %[__dest]    \n\t"
++      "  bne    %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  sc  %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
 +      "2:        \n\t"
-+      "  dbar 0        \n\t"
++      "  sync        \n\t"
++
++      "  .set pop\n\t"
 +
 +      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
-+      : [__dest] "ZC" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : [__dest] "m" (*(volatile jint*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
 +      : "memory"
 +      );
 +
@@ -58152,33 +106486,41 @@ index 0000000000..6c71de772e
 +  jlong __cmp;
 +
 +  __asm__ __volatile__ (
-+      "1: ll.d  %[__prev], %[__dest]    \n\t"
-+      "   bne   %[__prev], %[__old], 2f  \n\t"
-+      "   move  %[__cmp],  $r0          \n\t"
-+      "   move  %[__cmp],  %[__new]  \n\t"
-+      "   sc.d  %[__cmp],  %[__dest]  \n\t"
-+      "   beqz  %[__cmp],  1b    \n\t"
++      "  .set push\n\t"
++      "  .set mips64\n\t"
++      "  .set noreorder\n\t"
++
++      "1:sync \n\t"
++      "  lld   %[__prev], %[__dest]    \n\t"
++      "  bne   %[__prev], %[__old], 2f  \n\t"
++      "  move  %[__cmp],  $0          \n\t"
++      "  move  %[__cmp],  %[__new]  \n\t"
++      "  scd   %[__cmp],  %[__dest]  \n\t"
++      "  beqz  %[__cmp],  1b    \n\t"
++      "  nop        \n\t"
 +      "2:        \n\t"
-+      "   dbar 0 \n\t"
++      "  sync \n\t"
++
++      "  .set pop\n\t"
 +
 +      : [__prev] "=&r" (__prev), [__cmp] "=&r" (__cmp)
-+      : [__dest] "ZC" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
++      : [__dest] "m" (*(volatile jlong*)dest), [__old] "r" (compare_value),  [__new] "r" (exchange_value)
 +      : "memory"
 +      );
 +  return __prev;
 +}
 +
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_ATOMIC_LINUX_LOONGARCH_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
++#endif // OS_CPU_LINUX_MIPS_VM_ATOMIC_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp
 new file mode 100644
-index 0000000000..874ef835ea
+index 0000000000..5b5cd10aa5
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/bytes_linux_loongarch.inline.hpp
++++ b/src/hotspot/os_cpu/linux_mips/bytes_linux_mips.inline.hpp
 @@ -0,0 +1,37 @@
 +/*
 + * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58201,8 +106543,8 @@ index 0000000000..874ef835ea
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
 +
 +#include <byteswap.h>
 +
@@ -58212,16 +106554,16 @@ index 0000000000..874ef835ea
 +inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
 +inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_BYTES_LINUX_LOONGARCH_INLINE_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
++#endif // OS_CPU_LINUX_MIPS_VM_BYTES_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp
 new file mode 100644
-index 0000000000..b1a2de2587
+index 0000000000..3fd6ef7b36
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/copy_linux_loongarch.inline.hpp
++++ b/src/hotspot/os_cpu/linux_mips/copy_linux_mips.inline.hpp
 @@ -0,0 +1,125 @@
 +/*
 + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58244,8 +106586,8 @@ index 0000000000..b1a2de2587
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
 +
 +static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
 +  (void)memmove(to, from, count * HeapWordSize);
@@ -58343,16 +106685,16 @@ index 0000000000..b1a2de2587
 +  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
 +}
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_COPY_LINUX_LOONGARCH_INLINE_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
++#endif // OS_CPU_LINUX_MIPS_VM_COPY_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp
 new file mode 100644
-index 0000000000..f8546270e6
+index 0000000000..f1599ac5f1
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/globals_linux_loongarch.hpp
-@@ -0,0 +1,43 @@
++++ b/src/hotspot/os_cpu/linux_mips/globals_linux_mips.hpp
+@@ -0,0 +1,51 @@
 +/*
 + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58375,33 +106717,41 @@ index 0000000000..f8546270e6
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
 +
 +// Sets the default values for platform dependent flags used by the runtime system.
 +// (see globals.hpp)
 +
 +define_pd_global(bool, DontYieldALot,            false);
-+define_pd_global(intx, ThreadStackSize,          2048); // 0 => use system default
-+define_pd_global(intx, VMThreadStackSize,        2048);
++#ifdef MIPS64
++define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        1024);
++#else
++// ThreadStackSize 320 allows a couple of test cases to run while
++// keeping the number of threads that can be created high.  System
++// default ThreadStackSize appears to be 512 which is too big.
++define_pd_global(intx, ThreadStackSize,          320);
++define_pd_global(intx, VMThreadStackSize,        512);
++#endif // MIPS64
 +
-+define_pd_global(intx, CompilerThreadStackSize,  2048);
++define_pd_global(intx, CompilerThreadStackSize,  0);
 +
 +define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
 +
 +// Used on 64 bit platforms for UseCompressedOops base address
 +define_pd_global(uintx,HeapBaseMinAddress,       2*G);
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_GLOBALS_LINUX_LOONGARCH_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
++#endif // OS_CPU_LINUX_MIPS_VM_GLOBALS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/linux_mips.s b/src/hotspot/os_cpu/linux_mips/linux_mips.s
 new file mode 100644
-index 0000000000..ebd73af0c5
+index 0000000000..36c8d810c3
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/linux_loongarch.s
++++ b/src/hotspot/os_cpu/linux_mips/linux_mips.s
 @@ -0,0 +1,25 @@
 +#
 +# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++# Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
 +# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 +#
 +# This code is free software; you can redistribute it and/or modify it
@@ -58424,11 +106774,11 @@ index 0000000000..ebd73af0c5
 +#
 +
 +
-diff --git a/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
+diff --git a/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp
 new file mode 100644
-index 0000000000..c7becbb96f
+index 0000000000..bf9d679730
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/orderAccess_linux_loongarch.hpp
++++ b/src/hotspot/os_cpu/linux_mips/orderAccess_linux_mips.hpp
 @@ -0,0 +1,51 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
@@ -58455,8 +106805,8 @@ index 0000000000..c7becbb96f
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
 +
 +#include "runtime/os.hpp"
 +
@@ -58466,7 +106816,7 @@ index 0000000000..c7becbb96f
 +#define inlasm_sync() if (os::is_ActiveCoresMP()) \
 +                        __asm__ __volatile__ ("nop"   : : : "memory"); \
 +                      else \
-+                        __asm__ __volatile__ ("dbar 0"   : : : "memory");
++                        __asm__ __volatile__ ("sync"   : : : "memory");
 +
 +inline void OrderAccess::loadload()   { inlasm_sync(); }
 +inline void OrderAccess::storestore() { inlasm_sync(); }
@@ -58480,16 +106830,16 @@ index 0000000000..c7becbb96f
 +
 +#undef inlasm_sync
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_ORDERACCESS_LINUX_LOONGARCH_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
++#endif // OS_CPU_LINUX_MIPS_VM_ORDERACCESS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp
 new file mode 100644
-index 0000000000..2eee2eb549
+index 0000000000..d035d8edbb
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.cpp
-@@ -0,0 +1,886 @@
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.cpp
+@@ -0,0 +1,1020 @@
 +/*
 + * Copyright (c) 1999, 2014, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2023, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -58562,11 +106912,11 @@ index 0000000000..2eee2eb549
 +# include <ucontext.h>
 +# include <fpu_control.h>
 +
-+#define REG_SP 3
-+#define REG_FP 22
++#define REG_SP 29
++#define REG_FP 30
 +
-+NOINLINE address os::current_stack_pointer() {
-+  register void *sp __asm__ ("$r3");
++address os::current_stack_pointer() {
++  register void *sp __asm__ ("$29");
 +  return (address) sp;
 +}
 +
@@ -58579,20 +106929,19 @@ index 0000000000..2eee2eb549
 +}
 +
 +address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
-+  //return (address)uc->uc_mcontext.pc;
-+  return (address)uc->uc_mcontext.__pc;
++  return (address)uc->uc_mcontext.pc;
 +}
 +
 +void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
-+  uc->uc_mcontext.__pc = (intptr_t)pc;
++  uc->uc_mcontext.pc = (intptr_t)pc;
 +}
 +
 +intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
-+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_SP];
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_SP];
 +}
 +
 +intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
-+  return (intptr_t*)uc->uc_mcontext.__gregs[REG_FP];
++  return (intptr_t*)uc->uc_mcontext.gregs[REG_FP];
 +}
 +
 +// For Forte Analyzer AsyncGetCallTrace profiling support - thread
@@ -58663,7 +107012,7 @@ index 0000000000..2eee2eb549
 +      // belong to the caller.
 +      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
 +      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
-+      address pc = (address)(uc->uc_mcontext.__gregs[1]);
++      address pc = (address)(uc->uc_mcontext.gregs[31]);
 +      *fr = frame(sp, fp, pc);
 +      if (!fr->is_java_frame()) {
 +        assert(fr->safe_for_sender(thread), "Safety check");
@@ -58682,8 +107031,38 @@ index 0000000000..2eee2eb549
 +  return frame(fr->sender_sp(), fr->link(), fr->sender_pc());
 +}
 +
++//intptr_t* _get_previous_fp() {
++intptr_t* __attribute__((noinline)) os::get_previous_fp() {
++  int *pc;
++  intptr_t sp;
++  int *pc_limit = (int*)(void*)&os::get_previous_fp;
++  int insn;
++
++  {
++    l_pc:;
++    pc = (int*)&&l_pc;
++    __asm__ __volatile__ ("move %0,  $sp" : "=r" (sp));
++  }
++
++  do {
++    insn = *pc;
++    switch(bitfield(insn, 16, 16)) {
++      case 0x27bd:  /* addiu $sp,$sp,-i */
++      case 0x67bd:  /* daddiu $sp,$sp,-i */
++        assert ((short)bitfield(insn, 0, 16)<0, "bad frame");
++        sp -= (short)bitfield(insn, 0, 16);
++        return (intptr_t*)sp;
++    }
++    --pc;
++  } while (pc>=pc_limit); // The initial value of pc may be equal to pc_limit, because of GCC optimization.
++
++  ShouldNotReachHere();
++  return NULL; // mute compiler
++}
++
++
 +frame os::current_frame() {
-+  intptr_t *fp = ((intptr_t **)__builtin_frame_address(0))[frame::native_frame_link_offset];
++  intptr_t* fp = (intptr_t*)get_previous_fp();
 +  frame myframe((intptr_t*)os::current_stack_pointer(),
 +                (intptr_t*)fp,
 +                CAST_FROM_FN_PTR(address, os::current_frame));
@@ -58881,7 +107260,7 @@ index 0000000000..2eee2eb549
 +#endif
 +
 +      // Handle signal from NativeJump::patch_verified_entry().
-+      if (sig == SIGILL & nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
++      if (sig == SIGILL && nativeInstruction_at(pc)->is_sigill_zombie_not_entrant()) {
 +#ifdef PRINT_SIGNAL_HANDLE
 +        tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
 +#endif
@@ -58908,14 +107287,16 @@ index 0000000000..2eee2eb549
 +        // HACK: si_code does not work on linux 2.2.12-20!!!
 +        int op = pc[0] & 0x3f;
 +        int op1 = pc[3] & 0x3f;
-+        //FIXME, Must port to LA code!!
++        //FIXME, Must port to mips code!!
 +        switch (op) {
 +          case 0x1e:  //ddiv
 +          case 0x1f:  //ddivu
 +          case 0x1a:  //div
 +          case 0x1b:  //divu
 +          case 0x34:  //trap
-+            // In LA, div_by_zero exception can only be triggered by explicit 'trap'.
++            /* In MIPS, div_by_zero exception can only be triggered by explicit 'trap'.
++             * Ref: [c1_LIRAssembler_mips.cpp] arithmetic_idiv()
++             */
 +            stub = SharedRuntime::continuation_for_implicit_exception(thread,
 +                                    pc,
 +                                    SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
@@ -58936,7 +107317,110 @@ index 0000000000..2eee2eb549
 +#ifdef PRINT_SIGNAL_HANDLE
 +        tty->print_cr("continuation_for_implicit_exception stub: %lx", stub);
 +#endif
-+      }
++      } else if (/*thread->thread_state() == _thread_in_Java && */sig == SIGILL) {
++        //Since kernel does not have emulation of PS instructions yet, the emulation must be handled here.
++        //The method is to trigger kernel emulation of float emulation.
++        int inst = *(int*)pc;
++        int ops = (inst >> 26) & 0x3f;
++        int ops_fmt = (inst >> 21) & 0x1f;
++        int op = inst & 0x3f;
++        if (ops == Assembler::cop1_op && ops_fmt == Assembler::ps_fmt) {
++          int ft, fs, fd;
++          ft = (inst >> 16) & 0x1f;
++          fs = (inst >> 11) & 0x1f;
++          fd = (inst >> 6) & 0x1f;
++          float ft_upper, ft_lower, fs_upper, fs_lower, fd_upper, fd_lower;
++          double ft_value, fs_value, fd_value;
++          ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++          fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++          __asm__ __volatile__ (
++            "cvt.s.pl %0, %4\n\t"
++            "cvt.s.pu %1, %4\n\t"
++            "cvt.s.pl %2, %5\n\t"
++            "cvt.s.pu %3, %5\n\t"
++            : "=f" (fs_lower), "=f" (fs_upper), "=f" (ft_lower), "=f" (ft_upper)
++            : "f" (fs_value), "f" (ft_value)
++          );
++
++          switch (op) {
++            case Assembler::fadd_op:
++              __asm__ __volatile__ (
++                "add.s  %1, %3, %5\n\t"
++                "add.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fsub_op:
++              //fd = fs - ft
++              __asm__ __volatile__ (
++                "sub.s  %1, %3, %5\n\t"
++                "sub.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            case Assembler::fmul_op:
++              __asm__ __volatile__ (
++                "mul.s  %1, %3, %5\n\t"
++                "mul.s  %2, %4, %6\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower)
++                : "f" (fs_upper), "f" (fs_lower), "f" (ft_upper), "f" (ft_lower)
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1 opcode 0x%x with SIGILL.", op);
++          }
++        } else if (ops == Assembler::cop1x_op /*&& op == Assembler::nmadd_ps_op*/) {
++          // madd.ps is not used, the code below were not tested
++          int fr, ft, fs, fd;
++          float fr_upper, fr_lower, fs_upper, fs_lower, ft_upper, ft_lower, fd_upper, fd_lower;
++          double fr_value, ft_value, fs_value, fd_value;
++          switch (op) {
++            case Assembler::madd_ps_op:
++              // fd = (fs * ft) + fr
++              fr = (inst >> 21) & 0x1f;
++              ft = (inst >> 16) & 0x1f;
++              fs = (inst >> 11) & 0x1f;
++              fd = (inst >> 6) & 0x1f;
++              fr_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fr];
++              ft_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[ft];
++              fs_value = uc->uc_mcontext.fpregs.fp_r.fp_dregs[fs];
++              __asm__ __volatile__ (
++                "cvt.s.pu %3, %9\n\t"
++                "cvt.s.pl %4, %9\n\t"
++                "cvt.s.pu %5, %10\n\t"
++                "cvt.s.pl %6, %10\n\t"
++                "cvt.s.pu %7, %11\n\t"
++                "cvt.s.pl %8, %11\n\t"
++                "madd.s %1, %3, %5, %7\n\t"
++                "madd.s %2, %4, %6, %8\n\t"
++                "pll.ps %0, %1, %2\n\t"
++                : "=f" (fd_value), "=f" (fd_upper), "=f" (fd_lower), "=f" (fr_upper), "=f" (fr_lower), "=f" (fs_upper), "=f" (fs_lower), "=f" (ft_upper), "=f" (ft_lower)
++                : "f" (fr_value)/*9*/, "f" (fs_value)/*10*/, "f" (ft_value)/*11*/
++              );
++              uc->uc_mcontext.fpregs.fp_r.fp_dregs[fd] = fd_value;
++              stub = pc + 4;
++              break;
++            default:
++              tty->print_cr("unknown cop1x opcode 0x%x with SIGILL.", op);
++          }
++        }
++      } //SIGILL
++    } else if (sig == SIGILL && VM_Version::is_determine_features_test_running()) {
++      // thread->thread_state() != _thread_in_Java
++      // SIGILL must be caused by VM_Version::determine_features().
++      VM_Version::set_supports_cpucfg(false);
++      stub = pc + 4;  // continue with next instruction.
 +    } else if (thread->thread_state() == _thread_in_vm &&
 +               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
 +               thread->doing_unsafe_access()) {
@@ -58994,19 +107478,13 @@ index 0000000000..2eee2eb549
 +#ifdef OPT_RANGECHECK
 +     || sig == SIGSYS
 +#endif
-+    )
-+#if 0
-+      // LoongArch doesn't have hi1
-+            &&
++    ) &&
 +      //(uc->uc_mcontext.cause == 2 || uc->uc_mcontext.cause == 3)) {
 +      (uc->uc_mcontext.hi1 == 2 || uc->uc_mcontext.hi1 == 3)) {
-+#endif
-+            ) {
 +#ifdef PRINT_SIGNAL_HANDLE
 +    tty->print_cr("execution protection violation\n");
 +#endif
 +
-+    guarantee(0, "LA not implemented yet");
 +    int page_size = os::vm_page_size();
 +    address addr = (address) info->si_addr;
 +    address pc = os::Linux::ucontext_get_pc(uc);
@@ -59145,30 +107623,23 @@ index 0000000000..2eee2eb549
 +}
 +
 +int os::Linux::get_fpu_control_word(void) {
-+  guarantee(0, "LA not implemented yet");
-+/*
 +  int fcsr;
 +  __asm__ __volatile__ (
 +      ".set noat;"
-+      "addi.d     %0, $r0, 0;"
-+      "movfcsr2gr %0, $r1;"
++      "daddiu  %0, $0, 0;"
++      "cfc1 %0, $31;"
 +      : "=r" (fcsr)
 +      );
 +  return fcsr;
-+*/
-+  return 0; // mute compiler
 +}
 +
 +void os::Linux::set_fpu_control_word(int fpu_control) {
-+  guarantee(0, "LA not implemented yet");
-+/*
 +  __asm__ __volatile__ (
 +      ".set noat;"
-+      "movgr2fcsr %0, $r1;"
++      "ctc1 %0, $31;"
 +      :
 +      : "r" (fpu_control)
 +      );
-+*/
 +}
 +
 +bool os::is_allocatable(size_t bytes) {
@@ -59251,45 +107722,45 @@ index 0000000000..2eee2eb549
 +  //     // we can't just iterate through the gregs area
 +  //
 +  //       // this is only for the "general purpose" registers
-+  st->print("ZERO=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[0]);
-+  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[1]);
-+  st->print("TP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[2]);
-+  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->print("R0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print("AT=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print("V0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print("V1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[3]);
 +  st->cr();
-+  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[4]);
-+  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[5]);
-+  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[6]);
-+  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->print("A0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print("A1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print("A2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print("A3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[7]);
 +  st->cr();
-+  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[8]);
-+  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[9]);
-+  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[10]);
-+  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->print("A4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print("A5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print("A6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print("A7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[11]);
 +  st->cr();
-+  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[12]);
-+  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[13]);
-+  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[14]);
-+  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->print("T0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print("T1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print("T2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print("T3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[15]);
 +  st->cr();
-+  st->print("T4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[16]);
-+  st->print("T5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[17]);
-+  st->print("T6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[18]);
-+  st->print("T7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[19]);
 +  st->cr();
-+  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[20]);
-+  st->print("RX=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[21]);
-+  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[22]);
-+  st->print("S0=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[23]);
 +  st->cr();
-+  st->print("S1=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[24]);
-+  st->print("S2=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[25]);
-+  st->print("S3=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[26]);
-+  st->print("S4=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->print("T8=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print("T9=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print("K0=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print("K1=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[27]);
 +  st->cr();
-+  st->print("S5=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[28]);
-+  st->print("S6=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[29]);
-+  st->print("S7=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[30]);
-+  st->print("S8=" ); print_location(st, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->print("GP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print("SP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print("FP=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print("RA=" ); print_location(st, (intptr_t)uc->uc_mcontext.gregs[31]);
 +  st->cr();
 +
 +}
@@ -59299,45 +107770,45 @@ index 0000000000..2eee2eb549
 +
 +  const ucontext_t *uc = (const ucontext_t*)context;
 +  st->print_cr("Registers:");
-+  st->print(  "ZERO=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[0]);
-+  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[1]);
-+  st->print(", TP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[2]);
-+  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[3]);
++  st->print(  "R0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[0]);
++  st->print(", AT=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[1]);
++  st->print(", V0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[2]);
++  st->print(", V1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[3]);
 +  st->cr();
-+  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[4]);
-+  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[5]);
-+  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[6]);
-+  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[7]);
++  st->print(  "A0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[4]);
++  st->print(", A1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[5]);
++  st->print(", A2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[6]);
++  st->print(", A3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[7]);
 +  st->cr();
-+  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[8]);
-+  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[9]);
-+  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[10]);
-+  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[11]);
++  st->print(  "A4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[8]);
++  st->print(", A5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[9]);
++  st->print(", A6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[10]);
++  st->print(", A7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[11]);
 +  st->cr();
-+  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[12]);
-+  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[13]);
-+  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[14]);
-+  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[15]);
++  st->print(  "T0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[12]);
++  st->print(", T1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[13]);
++  st->print(", T2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[14]);
++  st->print(", T3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[15]);
 +  st->cr();
-+  st->print(  "T4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[16]);
-+  st->print(", T5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[17]);
-+  st->print(", T6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[18]);
-+  st->print(", T7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[19]);
++  st->print(  "S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[16]);
++  st->print(", S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[17]);
++  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[18]);
++  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[19]);
 +  st->cr();
-+  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[20]);
-+  st->print(", RX=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[21]);
-+  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[22]);
-+  st->print(", S0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[23]);
++  st->print(  "S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[20]);
++  st->print(", S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[21]);
++  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[22]);
++  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[23]);
 +  st->cr();
-+  st->print(  "S1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[24]);
-+  st->print(", S2=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[25]);
-+  st->print(", S3=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[26]);
-+  st->print(", S4=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[27]);
++  st->print(  "T8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[24]);
++  st->print(", T9=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[25]);
++  st->print(", K0=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[26]);
++  st->print(", K1=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[27]);
 +  st->cr();
-+  st->print(  "S5=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[28]);
-+  st->print(", S6=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[29]);
-+  st->print(", S7=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[30]);
-+  st->print(", S8=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.__gregs[31]);
++  st->print(  "GP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[28]);
++  st->print(", SP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[29]);
++  st->print(", FP=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[30]);
++  st->print(", RA=" INTPTR_FORMAT, (intptr_t)uc->uc_mcontext.gregs[31]);
 +  st->cr();
 +  st->cr();
 +
@@ -59356,7 +107827,20 @@ index 0000000000..2eee2eb549
 +}
 +
 +void os::setup_fpu() {
-+  // no use for LA
++  /*
++  //no use for MIPS
++  int fcsr;
++  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
++  __asm__ __volatile__ (
++      ".set noat;"
++      "cfc1 %0, $31;"
++      "sw   %0, 0(%1);"
++      : "=r" (fcsr)
++      : "r" (fpu_cntrl)
++      : "memory"
++  );
++  printf("fpu_cntrl:  %lx\n", fpu_cntrl);
++  */
 +}
 +
 +#ifndef PRODUCT
@@ -59366,19 +107850,19 @@ index 0000000000..2eee2eb549
 +#endif
 +
 +int os::extra_bang_size_in_bytes() {
-+  // LA does not require the additional stack bang.
++  // MIPS does not require the additional stack bang.
 +  return 0;
 +}
 +
 +bool os::is_ActiveCoresMP() {
 +  return UseActiveCoresMP && _initial_active_processor_count == 1;
 +}
-diff --git a/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
+diff --git a/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp
 new file mode 100644
-index 0000000000..93ed1ae033
+index 0000000000..c07d08156f
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/os_linux_loongarch.hpp
-@@ -0,0 +1,38 @@
++++ b/src/hotspot/os_cpu/linux_mips/os_linux_mips.hpp
+@@ -0,0 +1,39 @@
 +/*
 + * Copyright (c) 1999, 2013, Oracle and/or its affiliates. All rights reserved.
 + * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
@@ -59404,11 +107888,12 @@ index 0000000000..93ed1ae033
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
 +
 +  static void setup_fpu();
 +  static bool is_allocatable(size_t bytes);
++  static intptr_t *get_previous_fp();
 +
 +  // Used to register dynamic code cache area with the OS
 +  // Note: Currently only used in 64 bit Windows implementations
@@ -59416,16 +107901,16 @@ index 0000000000..93ed1ae033
 +
 +  static bool is_ActiveCoresMP();
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_OS_LINUX_LOONGARCH_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
++#endif // OS_CPU_LINUX_MIPS_VM_OS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp
 new file mode 100644
-index 0000000000..a1cedcd8cf
+index 0000000000..93490345f0
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/prefetch_linux_loongarch.inline.hpp
-@@ -0,0 +1,56 @@
++++ b/src/hotspot/os_cpu/linux_mips/prefetch_linux_mips.inline.hpp
+@@ -0,0 +1,58 @@
 +/*
 + * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59448,46 +107933,48 @@ index 0000000000..a1cedcd8cf
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
++#define OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
 +
 +
 +inline void Prefetch::read (void *loc, intx interval) {
-+// According to previous and present SPECjbb2015 score,
-+// comment prefetch is better than if (interval >= 0) prefetch branch.
-+// So choose comment prefetch as the base line.
-+#if 0
-+  __asm__ __volatile__ (
-+                        "       preld  0, %[__loc] \n"
++        // 'pref' is implemented as NOP in Loongson 3A
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  0, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
 +                        :
-+                        : [__loc] "m"( *((address)loc + interval) )
 +                        : "memory"
 +                        );
-+#endif
 +}
 +
 +inline void Prefetch::write(void *loc, intx interval) {
-+// Ditto
-+#if 0
-+  __asm__ __volatile__ (
-+                        "       preld  8, %[__loc] \n"
++        __asm__ __volatile__ (
++                        "               .set push\n"
++                        "               .set mips32\n"
++                        "               .set noreorder\n"
++                        "       pref  1, 0(%[__loc]) \n"
++                        "       .set pop\n"
++                        : [__loc] "=&r"(loc)
 +                        :
-+                        : [__loc] "m"( *((address)loc + interval) )
 +                        : "memory"
 +                        );
-+#endif
++
 +}
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_PREFETCH_LINUX_LOONGARCH_INLINE_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
++#endif // OS_CPU_LINUX_MIPS_VM_PREFETCH_LINUX_MIPS_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp
 new file mode 100644
-index 0000000000..0f8992735a
+index 0000000000..dbe8efe164
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.cpp
-@@ -0,0 +1,106 @@
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.cpp
+@@ -0,0 +1,117 @@
 +/*
 + * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59511,6 +107998,7 @@ index 0000000000..0f8992735a
 + */
 +
 +#include "precompiled.hpp"
++#include "compiler/compileBroker.hpp"
 +#include "memory/metaspaceShared.hpp"
 +#include "runtime/frame.inline.hpp"
 +#include "runtime/thread.inline.hpp"
@@ -59521,6 +108009,16 @@ index 0000000000..0f8992735a
 +    _anchor.clear();
 +}
 +
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  if (_anchor.last_Java_pc() != NULL) {
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++  } else {
++    // This will pick up pc from sp
++    return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
++  }
++}
++
 +// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
 +// currently interrupted by SIGPROF
 +bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
@@ -59569,7 +108067,7 @@ index 0000000000..0f8992735a
 +
 +    frame ret_frame(ret_sp, ret_fp, addr.pc());
 +    if (!ret_frame.safe_for_sender(jt)) {
-+#if COMPILER2
++#ifdef COMPILER2
 +      // C2 and JVMCI use ebp as a general register see if NULL fp helps
 +      frame ret_frame2(ret_sp, NULL, addr.pc());
 +      if (!ret_frame2.safe_for_sender(jt)) {
@@ -59591,15 +108089,15 @@ index 0000000000..0f8992735a
 +}
 +
 +void JavaThread::cache_global_variables() { }
-diff --git a/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
+diff --git a/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp
 new file mode 100644
-index 0000000000..c67e0f80d8
+index 0000000000..8b8dbe219c
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/thread_linux_loongarch.hpp
-@@ -0,0 +1,74 @@
++++ b/src/hotspot/os_cpu/linux_mips/thread_linux_mips.hpp
+@@ -0,0 +1,66 @@
 +/*
 + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59622,21 +108120,13 @@ index 0000000000..c67e0f80d8
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
 +
 + private:
 +  void pd_initialize();
 +
-+  frame pd_last_frame() {
-+    assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
-+    if (_anchor.last_Java_pc() != NULL) {
-+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
-+    } else {
-+      // This will pick up pc from sp
-+      return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp());
-+    }
-+  }
++  frame pd_last_frame();
 +
 + public:
 +  // Mutators are highly dangerous....
@@ -59670,16 +108160,16 @@ index 0000000000..c67e0f80d8
 +  static void enable_register_stack_guard() {}
 +  static void disable_register_stack_guard() {}
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_THREAD_LINUX_LOONGARCH_HPP
-diff --git a/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
++#endif // OS_CPU_LINUX_MIPS_VM_THREAD_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp
 new file mode 100644
-index 0000000000..5ff935c1b7
+index 0000000000..b7454bf045
 --- /dev/null
-+++ b/src/hotspot/os_cpu/linux_loongarch/vmStructs_linux_loongarch.hpp
++++ b/src/hotspot/os_cpu/linux_mips/vmStructs_linux_mips.hpp
 @@ -0,0 +1,55 @@
 +/*
 + * Copyright (c) 2000, 2013, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2015, 2021, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2016, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -59702,8 +108192,8 @@ index 0000000000..5ff935c1b7
 + *
 + */
 +
-+#ifndef OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
-+#define OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
++#ifndef OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
++#define OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
 +
 +// These are the OS and CPU-specific fields, types and integer
 +// constants required by the Serviceability Agent. This file is
@@ -59731,7 +108221,41 @@ index 0000000000..5ff935c1b7
 +
 +#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
 +
-+#endif // OS_CPU_LINUX_LOONGARCH_VM_VMSTRUCTS_LINUX_LOONGARCH_HPP
++#endif // OS_CPU_LINUX_MIPS_VM_VMSTRUCTS_LINUX_MIPS_HPP
+diff --git a/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp
+new file mode 100644
+index 0000000000..93e4bea04c
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_mips/vm_version_linux_mips.cpp
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
 diff --git a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp b/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp
 index 2b0fa83c1a..270e0bc180 100644
 --- a/src/hotspot/os_cpu/linux_x86/gc/z/zGlobals_linux_x86.hpp
@@ -59744,38 +108268,48 @@ index 2b0fa83c1a..270e0bc180 100644
 +
  #endif // OS_CPU_LINUX_X86_ZGLOBALS_LINUX_X86_HPP
 diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
-index 22704aa7ea..69258dfcdc 100644
+index 4912f88056..a420f7807b 100644
 --- a/src/hotspot/share/asm/codeBuffer.cpp
 +++ b/src/hotspot/share/asm/codeBuffer.cpp
-@@ -352,6 +352,9 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format)
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2023. These
++ * modifications are Copyright (c) 2018, 2023, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "asm/codeBuffer.hpp"
+ #include "compiler/disassembler.hpp"
+@@ -351,6 +357,7 @@ void CodeSection::relocate(address at, RelocationHolder const& spec, int format)
      assert(rtype == relocInfo::none              ||
             rtype == relocInfo::runtime_call_type ||
             rtype == relocInfo::internal_word_type||
-+#ifdef MIPS
-+           rtype == relocInfo::internal_pc_type  ||
-+#endif
++           NOT_ZERO(MIPS64_ONLY(rtype == relocInfo::internal_pc_type ||))
             rtype == relocInfo::section_word_type ||
             rtype == relocInfo::external_word_type,
             "code needs relocation information");
-diff --git a/src/hotspot/share/asm/codeBuffer.hpp b/src/hotspot/share/asm/codeBuffer.hpp
-index 2f6b2ed4f1..f672acc52f 100644
---- a/src/hotspot/share/asm/codeBuffer.hpp
-+++ b/src/hotspot/share/asm/codeBuffer.hpp
-@@ -402,6 +402,9 @@ class CodeBuffer: public StackObj {
-     _last_insn       = NULL;
- #if INCLUDE_AOT
-     _immutable_PIC   = false;
-+#endif
-+#if defined(MIPS) && !defined(ZERO)
-+    _continuous_load_instuctions_count = 0;
- #endif
-   }
- 
 diff --git a/src/hotspot/share/c1/c1_Compiler.cpp b/src/hotspot/share/c1/c1_Compiler.cpp
-index aff12954b3..d2d09238e7 100644
+index aff12954b3..caa93fc804 100644
 --- a/src/hotspot/share/c1/c1_Compiler.cpp
 +++ b/src/hotspot/share/c1/c1_Compiler.cpp
-@@ -211,7 +211,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
+@@ -44,6 +44,12 @@
+ #include "utilities/bitMap.inline.hpp"
+ #include "utilities/macros.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ 
+ Compiler::Compiler() : AbstractCompiler(compiler_c1) {
+ }
+@@ -211,7 +217,7 @@ bool Compiler::is_intrinsic_supported(const methodHandle& method) {
    case vmIntrinsics::_updateCRC32:
    case vmIntrinsics::_updateBytesCRC32:
    case vmIntrinsics::_updateByteBufferCRC32:
@@ -59785,7 +108319,7 @@ index aff12954b3..d2d09238e7 100644
    case vmIntrinsics::_updateDirectByteBufferCRC32C:
  #endif
 diff --git a/src/hotspot/share/c1/c1_LIR.cpp b/src/hotspot/share/c1/c1_LIR.cpp
-index e30d39f73d..417771d535 100644
+index e30d39f73d..7461b7449a 100644
 --- a/src/hotspot/share/c1/c1_LIR.cpp
 +++ b/src/hotspot/share/c1/c1_LIR.cpp
 @@ -250,6 +250,18 @@ void LIR_Op2::verify() const {
@@ -60012,7 +108546,7 @@ index e30d39f73d..417771d535 100644
  void LIR_Op::print_condition(outputStream* out, LIR_Condition cond) {
    switch(cond) {
      case lir_cond_equal:           out->print("[EQ]");      break;
-@@ -1876,12 +2011,7 @@ void LIR_OpConvert::print_instr(outputStream* out) const {
+@@ -1876,12 +2011,9 @@ void LIR_OpConvert::print_instr(outputStream* out) const {
    print_bytecode(out, bytecode());
    in_opr()->print(out);                  out->print(" ");
    result_opr()->print(out);              out->print(" ");
@@ -60020,13 +108554,14 @@ index e30d39f73d..417771d535 100644
 -  if(tmp1()->is_valid()) {
 -    tmp1()->print(out); out->print(" ");
 -    tmp2()->print(out); out->print(" ");
--  }
++  if(tmp()->is_valid()) {
++    tmp()->print(out);                   out->print(" ");
+   }
 -#endif
-+  if(tmp()->is_valid())                  tmp()->print(out); out->print(" ");
  }
  
  void LIR_OpConvert::print_bytecode(outputStream* out, Bytecodes::Code code) {
-@@ -1979,6 +2109,19 @@ void LIR_Op3::print_instr(outputStream* out) const {
+@@ -1979,6 +2111,19 @@ void LIR_Op3::print_instr(outputStream* out) const {
  }
  
  
@@ -60047,7 +108582,7 @@ index e30d39f73d..417771d535 100644
    hdr_opr()->print(out);   out->print(" ");
    obj_opr()->print(out);   out->print(" ");
 diff --git a/src/hotspot/share/c1/c1_LIR.hpp b/src/hotspot/share/c1/c1_LIR.hpp
-index 3912b41d3f..fe7cfefe4e 100644
+index 3234ca018b..1f46e44c77 100644
 --- a/src/hotspot/share/c1/c1_LIR.hpp
 +++ b/src/hotspot/share/c1/c1_LIR.hpp
 @@ -864,9 +864,11 @@ class      LIR_OpConvert;
@@ -60667,10 +109202,23 @@ index 3ad325d759..f377b27859 100644
                                      ciMethod *method, LIR_Opr step, int frequency,
                                      int bci, bool backedge, bool notify);
 diff --git a/src/hotspot/share/c1/c1_LinearScan.cpp b/src/hotspot/share/c1/c1_LinearScan.cpp
-index c28055fd99..b6f7685779 100644
+index c28055fd99..4e7df88102 100644
 --- a/src/hotspot/share/c1/c1_LinearScan.cpp
 +++ b/src/hotspot/share/c1/c1_LinearScan.cpp
-@@ -1258,6 +1258,23 @@ void LinearScan::add_register_hints(LIR_Op* op) {
+@@ -35,6 +35,12 @@
+ #include "runtime/timerTrace.hpp"
+ #include "utilities/bitMap.inline.hpp"
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef PRODUCT
+ 
+   static LinearScanStatistic _stat_before_alloc;
+@@ -1258,6 +1264,23 @@ void LinearScan::add_register_hints(LIR_Op* op) {
        }
        break;
      }
@@ -60694,7 +109242,7 @@ index c28055fd99..b6f7685779 100644
      default:
        break;
    }
-@@ -3342,7 +3359,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() {
+@@ -3342,7 +3365,9 @@ void LinearScan::verify_no_oops_in_fixed_intervals() {
            check_live = (move->patch_code() == lir_patch_none);
          }
          LIR_OpBranch* branch = op->as_OpBranch();
@@ -60705,7 +109253,7 @@ index c28055fd99..b6f7685779 100644
            // Don't bother checking the stub in this case since the
            // exception stub will never return to normal control flow.
            check_live = false;
-@@ -6192,6 +6211,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi
+@@ -6192,6 +6217,16 @@ void ControlFlowOptimizer::substitute_branch_target(BlockBegin* block, BlockBegi
        assert(op->as_OpBranch() != NULL, "branch must be of type LIR_OpBranch");
        LIR_OpBranch* branch = (LIR_OpBranch*)op;
  
@@ -60722,7 +109270,7 @@ index c28055fd99..b6f7685779 100644
        if (branch->block() == target_from) {
          branch->change_block(target_to);
        }
-@@ -6320,6 +6349,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
+@@ -6320,6 +6355,20 @@ void ControlFlowOptimizer::delete_unnecessary_jumps(BlockList* code) {
                  }
                }
              }
@@ -60743,7 +109291,7 @@ index c28055fd99..b6f7685779 100644
            }
          }
        }
-@@ -6395,6 +6438,13 @@ void ControlFlowOptimizer::verify(BlockList* code) {
+@@ -6395,6 +6444,13 @@ void ControlFlowOptimizer::verify(BlockList* code) {
          assert(op_branch->block() == NULL || code->find(op_branch->block()) != -1, "branch target not valid");
          assert(op_branch->ublock() == NULL || code->find(op_branch->ublock()) != -1, "branch target not valid");
        }
@@ -60757,7 +109305,7 @@ index c28055fd99..b6f7685779 100644
      }
  
      for (j = 0; j < block->number_of_sux() - 1; j++) {
-@@ -6639,6 +6689,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
+@@ -6639,6 +6695,24 @@ void LinearScanStatistic::collect(LinearScan* allocator) {
            break;
          }
  
@@ -60975,6 +109523,31 @@ index 4289e5e5c4..9502463bd5 100644
    __ branch_destination(slow->continuation());
  }
  
+diff --git a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp
+index 98a2fe7f1c..b43a441066 100644
+--- a/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp
++++ b/src/hotspot/share/gc/g1/g1FullGCMarker.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP
+ #define SHARE_VM_GC_G1_G1MARKSTACK_INLINE_HPP
+ 
+@@ -71,6 +77,7 @@ template <class T> inline void G1FullGCMarker::mark_and_push(T* p) {
+       _oop_stack.push(obj);
+       assert(_bitmap->is_marked(obj), "Must be marked now - map self");
+     } else {
++      DEBUG_ONLY(OrderAccess::loadload());
+       assert(_bitmap->is_marked(obj) || G1ArchiveAllocator::is_closed_archive_object(obj),
+              "Must be marked by other or closed archive object");
+     }
 diff --git a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp b/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
 index 1ef900783d..b30456429d 100644
 --- a/src/hotspot/share/gc/parallel/psPromotionManager.inline.hpp
@@ -61274,11 +109847,64 @@ index e01a242a57..0661f3b9d1 100644
    return false;
  #else
    #warning "Unconfigured platform"
+diff --git a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+index 8927063330..b5bb5c2887 100644
+--- a/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
++++ b/src/hotspot/share/jvmci/vmStructs_jvmci.cpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #include "precompiled.hpp"
+ #include "code/codeBlob.hpp"
+ #include "compiler/abstractCompiler.hpp"
+@@ -715,6 +721,35 @@
+ #endif
+ 
+ 
++#ifdef LOONGARCH64
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant) \
++  declare_constant(VM_Version::CPU_LA32)           \
++  declare_constant(VM_Version::CPU_LA64)           \
++  declare_constant(VM_Version::CPU_LLEXC)          \
++  declare_constant(VM_Version::CPU_SCDLY)          \
++  declare_constant(VM_Version::CPU_LLDBAR)         \
++  declare_constant(VM_Version::CPU_LBT_X86)        \
++  declare_constant(VM_Version::CPU_LBT_ARM)        \
++  declare_constant(VM_Version::CPU_LBT_MIPS)       \
++  declare_constant(VM_Version::CPU_CCDMA)          \
++  declare_constant(VM_Version::CPU_COMPLEX)        \
++  declare_constant(VM_Version::CPU_FP)             \
++  declare_constant(VM_Version::CPU_CRYPTO)         \
++  declare_constant(VM_Version::CPU_LSX)            \
++  declare_constant(VM_Version::CPU_LASX)           \
++  declare_constant(VM_Version::CPU_LAM)            \
++  declare_constant(VM_Version::CPU_LLSYNC)         \
++  declare_constant(VM_Version::CPU_TGTSYNC)        \
++  declare_constant(VM_Version::CPU_ULSYNC)         \
++  declare_constant(VM_Version::CPU_UAL)
++
++#endif
++
++
+ #ifdef X86
+ 
+ #define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
 diff --git a/src/hotspot/share/memory/metaspace.cpp b/src/hotspot/share/memory/metaspace.cpp
-index b37fec5829..707521abfa 100644
+index 80958b0469..08d13a4189 100644
 --- a/src/hotspot/share/memory/metaspace.cpp
 +++ b/src/hotspot/share/memory/metaspace.cpp
-@@ -1055,12 +1055,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+@@ -1082,12 +1082,12 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
    // Don't use large pages for the class space.
    bool large_pages = false;
  
@@ -61293,7 +109919,7 @@ index b37fec5829..707521abfa 100644
  
    ReservedSpace metaspace_rs;
  
-@@ -1086,7 +1086,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+@@ -1113,7 +1113,8 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
      // below 32g to get a zerobased CCS. For simplicity we reuse the search
      // strategy for AARCH64.
  
@@ -61303,7 +109929,7 @@ index b37fec5829..707521abfa 100644
      for (char *a = align_up(requested_addr, increment);
           a < (char*)(1024*G);
           a += increment) {
-@@ -1117,7 +1118,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
+@@ -1144,7 +1145,7 @@ void Metaspace::allocate_metaspace_compressed_klass_ptrs(char* requested_addr, a
      }
    }
  
@@ -61312,11 +109938,37 @@ index b37fec5829..707521abfa 100644
  
    if (!metaspace_rs.is_reserved()) {
  #if INCLUDE_CDS
+diff --git a/src/hotspot/share/oops/oop.inline.hpp b/src/hotspot/share/oops/oop.inline.hpp
+index 6c631f5458..9865106720 100644
+--- a/src/hotspot/share/oops/oop.inline.hpp
++++ b/src/hotspot/share/oops/oop.inline.hpp
+@@ -22,6 +22,12 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ */
++
+ #ifndef SHARE_VM_OOPS_OOP_INLINE_HPP
+ #define SHARE_VM_OOPS_OOP_INLINE_HPP
+ 
+@@ -389,7 +395,7 @@ oop oopDesc::forward_to_atomic(oop p, atomic_memory_order order) {
+     // forwarding pointer.
+     oldMark = curMark;
+   }
+-  return forwardee();
++  return (oop)oldMark->decode_pointer();
+ }
+ 
+ // Note that the forwardee is not the same thing as the displaced_mark.
 diff --git a/src/hotspot/share/opto/compile.hpp b/src/hotspot/share/opto/compile.hpp
-index 69e210b66b..b796c07d8f 100644
+index 569fbc6d69..c1f1b82ffa 100644
 --- a/src/hotspot/share/opto/compile.hpp
 +++ b/src/hotspot/share/opto/compile.hpp
-@@ -1186,7 +1186,7 @@ class Compile : public Phase {
+@@ -1204,7 +1204,7 @@ class Compile : public Phase {
    bool           in_scratch_emit_size() const   { return _in_scratch_emit_size;     }
  
    enum ScratchBufferBlob {
@@ -61326,7 +109978,7 @@ index 69e210b66b..b796c07d8f 100644
  #else
      MAX_inst_size       = 1024,
 diff --git a/src/hotspot/share/opto/output.cpp b/src/hotspot/share/opto/output.cpp
-index b6540e06a3..b1578a4442 100644
+index b6540e06a3..52d1fc9fb9 100644
 --- a/src/hotspot/share/opto/output.cpp
 +++ b/src/hotspot/share/opto/output.cpp
 @@ -22,6 +22,12 @@
@@ -61370,7 +110022,7 @@ index b6540e06a3..b1578a4442 100644
      debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
    } else {
      mcall = mach->as_MachCall();
-@@ -1393,6 +1420,31 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
+@@ -1393,6 +1420,22 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
        DEBUG_ONLY(uint instr_offset = cb->insts_size());
        n->emit(*cb, _regalloc);
        current_offset  = cb->insts_size();
@@ -61387,49 +110039,14 @@ index b6540e06a3..b1578a4442 100644
 +          adjust += 4;
 +          inst = (NativeInstruction*) (cb->insts()->end() - 8);
 +        }
-+#ifdef MIPS64
-+        if (PatchContinuousLoad) {
-+          // if PatchContinuousLoad is true, a nop may be inserted after a load instruction and
-+          // the adjust would be 2 instructions.
-+          if (inst->is_nop()) {
-+            adjust += 4;
-+          }
-+        }
-+#endif
 +        previous_offset = current_offset - adjust;
 +      }
 +#endif
  
        // Above we only verified that there is enough space in the instruction section.
        // However, the instruction may emit stubs that cause code buffer expansion.
-@@ -1402,7 +1454,9 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
-       }
- 
- #ifdef ASSERT
--      uint n_size = n->size(_regalloc);
-+      // adjust: one node may be inserted one and only one nop.
-+      int adjust = MIPS64_ONLY(PatchContinuousLoad ? block->number_of_nodes() * 4 :) 0;
-+      uint n_size = n->size(regalloc()) + adjust;
-       if (n_size < (current_offset-instr_offset)) {
-         MachNode* mach = n->as_Mach();
-         n->dump();
-@@ -1488,7 +1542,13 @@ void Compile::fill_buffer(CodeBuffer* cb, uint* blk_starts) {
-     }
-     // Verify that the distance for generated before forward
-     // short branches is still valid.
--    guarantee((int)(blk_starts[i+1] - blk_starts[i]) >= (current_offset - blk_offset), "shouldn't increase block size");
-+    // adjust: one node may be inserted one and only one nop.
-+    int adjust = MIPS64_ONLY(PatchContinuousLoad ? block->number_of_nodes() * 4 :) 0;
-+#ifndef PRODUCT
-+    if ((int)(blk_starts[i+1] - blk_starts[i] + adjust) < (current_offset - blk_offset))
-+      tty->print_cr("%s:%d blk_starts[i+1]:%d, blk_starts[i]:%d, adjust: %d, current_offset:%d, blk_offset:%d", __func__, __LINE__, blk_starts[i+1], blk_starts[i], adjust, current_offset, blk_offset);
-+#endif
-+    guarantee((int)(blk_starts[i+1] - blk_starts[i] + adjust) >= (current_offset - blk_offset), "shouldn't increase block size");
- 
-     // Save new block start offset
-     blk_starts[i] = blk_offset;
 diff --git a/src/hotspot/share/opto/type.cpp b/src/hotspot/share/opto/type.cpp
-index 223b7a1c66..01ca28e7ab 100644
+index 7d767c47c9..23ec34e5e2 100644
 --- a/src/hotspot/share/opto/type.cpp
 +++ b/src/hotspot/share/opto/type.cpp
 @@ -22,6 +22,12 @@
@@ -61458,8 +110075,20 @@ index 223b7a1c66..01ca28e7ab 100644
  #else // all other
    { Bad,             T_ILLEGAL,    "vectors:",      false, Op_VecS,              relocInfo::none          },  // VectorS
    { Bad,             T_ILLEGAL,    "vectord:",      false, Op_VecD,              relocInfo::none          },  // VectorD
+diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp
+index 84123b29ec..77fbacf2d8 100644
+--- a/src/hotspot/share/runtime/java.cpp
++++ b/src/hotspot/share/runtime/java.cpp
+@@ -68,6 +68,7 @@
+ #include "runtime/thread.inline.hpp"
+ #include "runtime/timer.hpp"
+ #include "runtime/vmOperations.hpp"
++#include "runtime/vmThread.hpp"
+ #include "services/memTracker.hpp"
+ #include "utilities/dtrace.hpp"
+ #include "utilities/globalDefinitions.hpp"
 diff --git a/src/hotspot/share/runtime/os.cpp b/src/hotspot/share/runtime/os.cpp
-index 389a1e95f8..2f9b990456 100644
+index e0f4a2af1f..09cc4b1ba5 100644
 --- a/src/hotspot/share/runtime/os.cpp
 +++ b/src/hotspot/share/runtime/os.cpp
 @@ -22,6 +22,12 @@
@@ -61467,104 +110096,24 @@ index 389a1e95f8..2f9b990456 100644
   */
  
 +/*
-+ * This file has been modified by Loongson Technology in 2021, These
-+ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
 + * available on the same license terms set forth above.
 + */
 +
  #include "precompiled.hpp"
  #include "jvm.h"
  #include "classfile/classLoader.hpp"
-@@ -1237,7 +1243,8 @@ bool os::is_first_C_frame(frame* fr) {
+@@ -1242,7 +1248,8 @@ bool os::is_first_C_frame(frame* fr) {
+   if ((uintptr_t)fr->sender_sp() == (uintptr_t)-1 || is_pointer_bad(fr->sender_sp())) return true;
  
-   uintptr_t old_fp = (uintptr_t)fr->link();
-   if ((old_fp & fp_align_mask) != 0) return true;
--  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp) return true;
+   uintptr_t old_fp = (uintptr_t)fr->link_or_null();
+-  if (old_fp == 0 || old_fp == (uintptr_t)-1 || old_fp == ufp ||
 +  // The check for old_fp and ufp is harmful on LoongArch and MIPS due to their special ABIs.
-+  if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp)) return true;
++  if (old_fp == 0 || old_fp == (uintptr_t)-1 NOT_LOONGARCH64_AND_MIPS64(|| old_fp == ufp) ||
+     is_pointer_bad(fr->link_or_null())) return true;
  
    // stack grows downwards; if old_fp is below current fp or if the stack
-   // frame is too large, either the stack is corrupted or fp is not saved
-diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
-index 595ff7495a..2d6d63b00b 100644
---- a/src/hotspot/share/runtime/sharedRuntime.cpp
-+++ b/src/hotspot/share/runtime/sharedRuntime.cpp
-@@ -22,6 +22,12 @@
-  *
-  */
- 
-+/*
-+ * This file has been modified by Loongson Technology in 2018, These
-+ * modifications are Copyright (c) 2018, Loongson Technology, and are made
-+ * available on the same license terms set forth above.
-+ */
-+
- #include "precompiled.hpp"
- #include "jvm.h"
- #include "aot/aotLoader.hpp"
-@@ -3215,3 +3221,31 @@ void SharedRuntime::on_slowpath_allocation_exit(JavaThread* thread) {
-   BarrierSet *bs = BarrierSet::barrier_set();
-   bs->on_slowpath_allocation_exit(thread, new_obj);
- }
-+
-+void SharedRuntime::print_long(long long i) {
-+  tty->print("%llx", i);
-+}
-+
-+void SharedRuntime::print_int(int i) {
-+  tty->print("%x", i);
-+}
-+
-+void SharedRuntime::print_float(float f) {
-+  //tty->print("ld:%ld ", f);
-+  //tty->print("lx:%lx ", f);
-+  tty->print("lf:%g ", f);
-+}
-+
-+void SharedRuntime::print_double(double f) {
-+  //tty->print("%ld ", f);
-+  //tty->print("0x%lx ", f);
-+  tty->print("%g ", f);
-+}
-+
-+void SharedRuntime::print_str(char *str) {
-+  tty->print("%s", str);
-+}
-+
-+void SharedRuntime::print_reg_with_pc(char *reg_name, long i, long pc) {
-+  tty->print_cr("%s: %lx pc: %lx", reg_name, i, pc);
-+}
-diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp
-index a110098376..c05f7411f7 100644
---- a/src/hotspot/share/runtime/sharedRuntime.hpp
-+++ b/src/hotspot/share/runtime/sharedRuntime.hpp
-@@ -22,6 +22,12 @@
-  *
-  */
- 
-+/*
-+ * This file has been modified by Loongson Technology in 2018, These
-+ * modifications are Copyright (c) 2018, Loongson Technology, and are made
-+ * available on the same license terms set forth above.
-+ */
-+
- #ifndef SHARE_VM_RUNTIME_SHAREDRUNTIME_HPP
- #define SHARE_VM_RUNTIME_SHAREDRUNTIME_HPP
- 
-@@ -596,6 +602,13 @@ class SharedRuntime: AllStatic {
-   static void print_ic_miss_histogram();
- 
- #endif // PRODUCT
-+  static void print_long(long long i);
-+  static void print_int(int i);
-+  static void print_float(float i);
-+  static void print_double(double i);
-+  static void print_str(char *str);
-+
-+  static void print_reg_with_pc(char *reg_name, long i, long pc);
- };
- 
- 
 diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
 index e086f794cd..f480195775 100644
 --- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
@@ -61599,10 +110148,10 @@ index e086f794cd..f480195775 100644
  S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
  S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
 diff --git a/src/hotspot/share/utilities/globalDefinitions.hpp b/src/hotspot/share/utilities/globalDefinitions.hpp
-index aef662ec15..9a907de8b1 100644
+index c758fc5743..a8c4638f6a 100644
 --- a/src/hotspot/share/utilities/globalDefinitions.hpp
 +++ b/src/hotspot/share/utilities/globalDefinitions.hpp
-@@ -1154,6 +1154,15 @@ inline int exact_log2_long(jlong x) {
+@@ -1161,6 +1161,15 @@ inline int exact_log2_long(jlong x) {
    return log2_long(x);
  }
  
@@ -61709,28 +110258,8 @@ index cf80253868..f611daf36d 100644
  // basename<compiler>.hpp / basename<compiler>.inline.hpp
  #define COMPILER_HEADER(basename)        XSTR(COMPILER_HEADER_STEM(basename).hpp)
  #define COMPILER_HEADER_INLINE(basename) XSTR(COMPILER_HEADER_STEM(basename).inline.hpp)
-diff --git a/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c b/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c
-index 5a83e747f7..bf4bb4bc70 100644
---- a/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c
-+++ b/src/java.base/unix/native/libnio/fs/UnixNativeDispatcher.c
-@@ -613,7 +613,15 @@ Java_sun_nio_fs_UnixNativeDispatcher_fstatat0(JNIEnv* env, jclass this, jint dfd
-         JNU_ThrowInternalError(env, "should not reach here");
-         return;
-     }
-+
-+#ifdef __mips__
-+    // __NR_newfstatat is incorrect on Loongnix
-+    // workaround it using glibc's fstatat64
-+    RESTARTABLE(fstatat64((int)dfd, path, &buf, (int)flag), err);
-+#else
-     RESTARTABLE((*my_fstatat64_func)((int)dfd, path, &buf, (int)flag), err);
-+#endif
-+
-     if (err == -1) {
-         throwUnixException(env, errno);
-     } else {
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
-index 0d834302c5..38de59100a 100644
+index 0d834302c5..6afafea095 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
 @@ -22,6 +22,13 @@
@@ -61738,8 +110267,8 @@ index 0d834302c5..38de59100a 100644
   */
  
 +/*
-+ * This file has been modified by Loongson Technology in 2021. These
-+ * modifications are Copyright (c) 2021, Loongson Technology, and are made
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2021, 2022, Loongson Technology, and are made
 + * available on the same license terms set forth above.
 + *
 + */
@@ -61801,8 +110330,8 @@ index 0d834302c5..38de59100a 100644
 +  {
 +    int i;
 +    for (i = 0; i < 31; i++)
-+      regs[i] = gregs.gpr[i];
-+    regs[REG_INDEX(PC)] = gregs.pc;
++      regs[i] = gregs.regs[i];
++    regs[REG_INDEX(PC)] = gregs.csr_era;
 +  }
 +#endif /* loongarch64 */
 +
@@ -61856,10 +110385,32 @@ index 0d834302c5..38de59100a 100644
    return array;
  }
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-index 8318e8e021..e4546370f8 100644
+index 8318e8e021..07064e76ee 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
-@@ -44,6 +44,10 @@
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2022. These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ #ifndef _LIBPROC_H_
+ #define _LIBPROC_H_
+ 
+@@ -37,13 +44,17 @@
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
+-#if defined(aarch64) || defined(arm64)
++#if defined(aarch64) || defined(arm64) || defined(loongarch64)
+ #include <asm/ptrace.h>
+ #define user_regs_struct user_pt_regs
+ #elif defined(arm)
  #include <asm/ptrace.h>
  #define user_regs_struct  pt_regs
  #endif
@@ -61871,7 +110422,7 @@ index 8318e8e021..e4546370f8 100644
  // This C bool type must be int for compatibility with Linux calls and
  // it would be a mistake to equivalence it to C++ bool on many platforms
 diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
-index 0b4e8e4e35..c4a67d7a95 100644
+index de5254d859..eefe55959c 100644
 --- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 +++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
 @@ -22,6 +22,12 @@
@@ -62028,7 +110579,7 @@ index 0000000000..1b49efd201
 +  }
 +}
 diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
-index efde22ef76..025c4af761 100644
+index 5e5a6bb714..7d7f6424e6 100644
 --- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
 +++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
 @@ -23,6 +23,12 @@
@@ -65507,17 +114058,1077 @@ index 0000000000..65d88016ea
 +        System.out.println(addr + ": " + addr.getAddressAt(0));
 +      }
 +    }
-+  }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..dfe3066af0
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public MIPS64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+new file mode 100644
+index 0000000000..f2da760af4
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.mips64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class MIPS64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public MIPS64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected MIPS64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    MIPS64RegisterMap retval = new MIPS64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index 7d7a6107ca..06d79318d9 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -22,6 +22,13 @@
+  *
+  */
+ 
++/*
++ * This file has been modified by Loongson Technology in 2021. These
++ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * available on the same license terms set forth above.
++ *
++ */
++
+ package sun.jvm.hotspot.utilities;
+ 
+ /** Provides canonicalized OS and CPU information for the rest of the
+@@ -54,7 +61,7 @@ public class PlatformInfo {
+ 
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
++        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"};
+ 
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+@@ -101,6 +108,12 @@ public class PlatformInfo {
+     if (cpu.equals("ppc64le"))
+       return "ppc64";
+ 
++    if (cpu.equals("mips64el"))
++      return "mips64";
++
++    if (cpu.equals("loongarch64"))
++      return "loongarch64";
++
+     return cpu;
+ 
+   }
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java
+new file mode 100644
+index 0000000000..0d3953ddff
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotJVMCIBackendFactory.java
+@@ -0,0 +1,220 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static java.util.Collections.emptyMap;
++import static jdk.vm.ci.common.InitTimer.timer;
++
++import java.util.EnumSet;
++import java.util.Map;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64.CPUFeature;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.stack.StackIntrospection;
++import jdk.vm.ci.common.InitTimer;
++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
++import jdk.vm.ci.hotspot.HotSpotStackIntrospection;
++import jdk.vm.ci.meta.ConstantReflectionProvider;
++import jdk.vm.ci.runtime.JVMCIBackend;
++
++public class LoongArch64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory {
++
++    protected EnumSet<LoongArch64.CPUFeature> computeFeatures(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) {
++        // Configure the feature set using the HotSpot flag settings.
++        EnumSet<LoongArch64.CPUFeature> features = EnumSet.noneOf(LoongArch64.CPUFeature.class);
++
++        if ((config.vmVersionFeatures & config.loongarch64LA32) != 0) {
++            features.add(LoongArch64.CPUFeature.LA32);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LA64) != 0) {
++            features.add(LoongArch64.CPUFeature.LA64);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLEXC) != 0) {
++            features.add(LoongArch64.CPUFeature.LLEXC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64SCDLY) != 0) {
++            features.add(LoongArch64.CPUFeature.SCDLY);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLDBAR) != 0) {
++            features.add(LoongArch64.CPUFeature.LLDBAR);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_X86) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_X86);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_ARM) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_ARM);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LBT_MIPS) != 0) {
++            features.add(LoongArch64.CPUFeature.LBT_MIPS);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64CCDMA) != 0) {
++            features.add(LoongArch64.CPUFeature.CCDMA);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64COMPLEX) != 0) {
++            features.add(LoongArch64.CPUFeature.COMPLEX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64FP) != 0) {
++            features.add(LoongArch64.CPUFeature.FP);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64CRYPTO) != 0) {
++            features.add(LoongArch64.CPUFeature.CRYPTO);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LSX) != 0) {
++            features.add(LoongArch64.CPUFeature.LSX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LASX) != 0) {
++            features.add(LoongArch64.CPUFeature.LASX);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LAM) != 0) {
++            features.add(LoongArch64.CPUFeature.LAM);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64LLSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.LLSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64TGTSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.TGTSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64ULSYNC) != 0) {
++            features.add(LoongArch64.CPUFeature.ULSYNC);
++        }
++
++        if ((config.vmVersionFeatures & config.loongarch64UAL) != 0) {
++            features.add(LoongArch64.CPUFeature.UAL);
++        }
++
++        return features;
++    }
++
++    protected EnumSet<LoongArch64.Flag> computeFlags(@SuppressWarnings("unused") LoongArch64HotSpotVMConfig config) {
++        EnumSet<LoongArch64.Flag> flags = EnumSet.noneOf(LoongArch64.Flag.class);
++
++        if (config.useLSX) {
++            flags.add(LoongArch64.Flag.useLSX);
++        }
++
++        if (config.useLASX) {
++            flags.add(LoongArch64.Flag.useLASX);
++        }
++
++        return flags;
++    }
++
++    protected TargetDescription createTarget(LoongArch64HotSpotVMConfig config) {
++        final int stackFrameAlignment = 16;
++        final int implicitNullCheckLimit = 4096;
++        final boolean inlineObjects = true;
++        Architecture arch = new LoongArch64(computeFeatures(config), computeFlags(config));
++        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
++    }
++
++    protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotConstantReflectionProvider(runtime);
++    }
++
++    protected RegisterConfig createRegisterConfig(LoongArch64HotSpotVMConfig config, TargetDescription target) {
++        return new LoongArch64HotSpotRegisterConfig(target, config.useCompressedOops);
++    }
++
++    protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) {
++        return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig);
++    }
++
++    protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotMetaAccessProvider(runtime);
++    }
++
++    @Override
++    public String getArchitecture() {
++        return "loongarch64";
++    }
++
++    @Override
++    public String toString() {
++        return "JVMCIBackend:" + getArchitecture();
++    }
++
++    @Override
++    @SuppressWarnings("try")
++    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) {
++
++        assert host == null;
++        LoongArch64HotSpotVMConfig config = new LoongArch64HotSpotVMConfig(runtime.getConfigStore());
++        TargetDescription target = createTarget(config);
++
++        RegisterConfig regConfig;
++        HotSpotCodeCacheProvider codeCache;
++        ConstantReflectionProvider constantReflection;
++        HotSpotMetaAccessProvider metaAccess;
++        StackIntrospection stackIntrospection;
++        try (InitTimer t = timer("create providers")) {
++            try (InitTimer rt = timer("create MetaAccess provider")) {
++                metaAccess = createMetaAccess(runtime);
++            }
++            try (InitTimer rt = timer("create RegisterConfig")) {
++                regConfig = createRegisterConfig(config, target);
++            }
++            try (InitTimer rt = timer("create CodeCache provider")) {
++                codeCache = createCodeCache(runtime, target, regConfig);
++            }
++            try (InitTimer rt = timer("create ConstantReflection provider")) {
++                constantReflection = createConstantReflection(runtime);
++            }
++            try (InitTimer rt = timer("create StackIntrospection provider")) {
++                stackIntrospection = new HotSpotStackIntrospection(runtime);
++            }
++        }
++        try (InitTimer rt = timer("instantiate backend")) {
++            return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++        }
++    }
++
++    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection,
++                    StackIntrospection stackIntrospection) {
++        return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java
+new file mode 100644
+index 0000000000..2ee6a4b847
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotRegisterConfig.java
+@@ -0,0 +1,297 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import static jdk.vm.ci.loongarch64.LoongArch64.ra;
++import static jdk.vm.ci.loongarch64.LoongArch64.a0;
++import static jdk.vm.ci.loongarch64.LoongArch64.a1;
++import static jdk.vm.ci.loongarch64.LoongArch64.a2;
++import static jdk.vm.ci.loongarch64.LoongArch64.a3;
++import static jdk.vm.ci.loongarch64.LoongArch64.a4;
++import static jdk.vm.ci.loongarch64.LoongArch64.a5;
++import static jdk.vm.ci.loongarch64.LoongArch64.a6;
++import static jdk.vm.ci.loongarch64.LoongArch64.a7;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR1;
++import static jdk.vm.ci.loongarch64.LoongArch64.SCR2;
++import static jdk.vm.ci.loongarch64.LoongArch64.t0;
++import static jdk.vm.ci.loongarch64.LoongArch64.v0;
++import static jdk.vm.ci.loongarch64.LoongArch64.s5;
++import static jdk.vm.ci.loongarch64.LoongArch64.s6;
++import static jdk.vm.ci.loongarch64.LoongArch64.sp;
++import static jdk.vm.ci.loongarch64.LoongArch64.fp;
++import static jdk.vm.ci.loongarch64.LoongArch64.tp;
++import static jdk.vm.ci.loongarch64.LoongArch64.rx;
++import static jdk.vm.ci.loongarch64.LoongArch64.f0;
++import static jdk.vm.ci.loongarch64.LoongArch64.f1;
++import static jdk.vm.ci.loongarch64.LoongArch64.f2;
++import static jdk.vm.ci.loongarch64.LoongArch64.f3;
++import static jdk.vm.ci.loongarch64.LoongArch64.f4;
++import static jdk.vm.ci.loongarch64.LoongArch64.f5;
++import static jdk.vm.ci.loongarch64.LoongArch64.f6;
++import static jdk.vm.ci.loongarch64.LoongArch64.f7;
++import static jdk.vm.ci.loongarch64.LoongArch64.fv0;
++import static jdk.vm.ci.loongarch64.LoongArch64.zero;
++
++import java.util.ArrayList;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CallingConvention.Type;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterAttributes;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.ValueKindFactory;
++import jdk.vm.ci.common.JVMCIError;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.JavaType;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public class LoongArch64HotSpotRegisterConfig implements RegisterConfig {
++
++    private final TargetDescription target;
++
++    private final RegisterArray allocatable;
++
++    /**
++     * The caller saved registers always include all parameter registers.
++     */
++    private final RegisterArray callerSaved;
++
++    private final boolean allAllocatableAreCallerSaved;
++
++    private final RegisterAttributes[] attributesMap;
++
++    @Override
++    public RegisterArray getAllocatableRegisters() {
++        return allocatable;
++    }
++
++    @Override
++    public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) {
++        ArrayList<Register> list = new ArrayList<>();
++        for (Register reg : registers) {
++            if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) {
++                list.add(reg);
++            }
++        }
++
++        return new RegisterArray(list);
++    }
++
++    @Override
++    public RegisterAttributes[] getAttributesMap() {
++        return attributesMap.clone();
++    }
++
++    private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(t0, a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(a0, a1, a2, a3, a4, a5, a6, a7);
++    private final RegisterArray floatParameterRegisters = new RegisterArray(f0, f1, f2, f3, f4, f5, f6, f7);
++
++    public static final Register heapBaseRegister = s5;
++    public static final Register TREG = s6;
++
++    private static final RegisterArray reservedRegisters = new RegisterArray(fp, ra, zero, sp, tp, rx, SCR1, SCR2, TREG);
++
++    private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) {
++        RegisterArray allRegisters = arch.getAvailableValueRegisters();
++        Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)];
++        List<Register> reservedRegistersList = reservedRegisters.asList();
++
++        int idx = 0;
++        for (Register reg : allRegisters) {
++            if (reservedRegistersList.contains(reg)) {
++                // skip reserved registers
++                continue;
++            }
++            if (reserveForHeapBase && reg.equals(heapBaseRegister)) {
++                // skip heap base register
++                continue;
++            }
++
++            registers[idx++] = reg;
++        }
++
++        assert idx == registers.length;
++        return new RegisterArray(registers);
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) {
++        this(target, initAllocatable(target.arch, useCompressedOops));
++        assert callerSaved.size() >= allocatable.size();
++    }
++
++    public LoongArch64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) {
++        this.target = target;
++
++        this.allocatable = allocatable;
++        Set<Register> callerSaveSet = new HashSet<>();
++        allocatable.addTo(callerSaveSet);
++        floatParameterRegisters.addTo(callerSaveSet);
++        javaGeneralParameterRegisters.addTo(callerSaveSet);
++        nativeGeneralParameterRegisters.addTo(callerSaveSet);
++        callerSaved = new RegisterArray(callerSaveSet);
++
++        allAllocatableAreCallerSaved = true;
++        attributesMap = RegisterAttributes.createMap(this, LoongArch64.allRegisters);
++    }
++
++    @Override
++    public RegisterArray getCallerSaveRegisters() {
++        return callerSaved;
++    }
++
++    @Override
++    public RegisterArray getCalleeSaveRegisters() {
++        return null;
++    }
++
++    @Override
++    public boolean areAllAllocatableRegistersCallerSaved() {
++        return allAllocatableAreCallerSaved;
++    }
++
++    @Override
++    public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory<?> valueKindFactory) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        if (type == HotSpotCallingConventionType.NativeCall) {
++            return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++        }
++        // On x64, parameter locations are the same whether viewed
++        // from the caller or callee perspective
++        return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++    }
++
++    @Override
++    public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Short:
++            case Char:
++            case Int:
++            case Long:
++            case Object:
++                return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters;
++            case Float:
++            case Double:
++                return floatParameterRegisters;
++            default:
++                throw JVMCIError.shouldNotReachHere();
++        }
++    }
++
++    private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type,
++                    ValueKindFactory<?> valueKindFactory) {
++        AllocatableValue[] locations = new AllocatableValue[parameterTypes.length];
++
++        int currentGeneral = 0;
++        int currentFloat = 0;
++        int currentStackOffset = 0;
++
++        for (int i = 0; i < parameterTypes.length; i++) {
++            final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind();
++
++            switch (kind) {
++                case Byte:
++                case Boolean:
++                case Short:
++                case Char:
++                case Int:
++                case Long:
++                case Object:
++                    if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                case Float:
++                case Double:
++                    if (currentFloat < floatParameterRegisters.size()) {
++                        Register register = floatParameterRegisters.get(currentFloat++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    } else if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                default:
++                    throw JVMCIError.shouldNotReachHere();
++            }
++
++            if (locations[i] == null) {
++                ValueKind<?> valueKind = valueKindFactory.getValueKind(kind);
++                locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out);
++                currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize);
++            }
++        }
++
++        JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind();
++        AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind()));
++        return new CallingConvention(currentStackOffset, returnLocation, locations);
++    }
++
++    @Override
++    public Register getReturnRegister(JavaKind kind) {
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Char:
++            case Short:
++            case Int:
++            case Long:
++            case Object:
++                return v0;
++            case Float:
++            case Double:
++                return fv0;
++            case Void:
++            case Illegal:
++                return null;
++            default:
++                throw new UnsupportedOperationException("no return register for type " + kind);
++        }
++    }
++
++    @Override
++    public Register getFrameRegister() {
++        return sp;
++    }
++
++    @Override
++    public String toString() {
++        return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave:  " + getCallerSaveRegisters() + "%n");
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java
+new file mode 100644
+index 0000000000..c8605976a0
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/LoongArch64HotSpotVMConfig.java
+@@ -0,0 +1,77 @@
++/*
++ * Copyright (c) 2016, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
++
++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess;
++import jdk.vm.ci.hotspot.HotSpotVMConfigStore;
++import jdk.vm.ci.services.Services;
++
++/**
++ * Used to access native configuration details.
++ *
++ * All non-static, public fields in this class are so that they can be compiled as constants.
++ */
++class LoongArch64HotSpotVMConfig extends HotSpotVMConfigAccess {
++
++    LoongArch64HotSpotVMConfig(HotSpotVMConfigStore config) {
++        super(config);
++    }
++
++    final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class);
++
++    // CPU Capabilities
++
++    /*
++     * These flags are set based on the corresponding command line flags.
++     */
++    final boolean useLSX = getFlag("UseLSX", Boolean.class);
++    final boolean useLASX = getFlag("UseLASX", Boolean.class);
++
++    final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t");
++
++    /*
++     * These flags are set if the corresponding support is in the hardware.
++     */
++    // Checkstyle: stop
++    final long loongarch64LA32 = getConstant("VM_Version::CPU_LA32", Long.class);
++    final long loongarch64LA64 = getConstant("VM_Version::CPU_LA64", Long.class);
++    final long loongarch64LLEXC = getConstant("VM_Version::CPU_LLEXC", Long.class);
++    final long loongarch64SCDLY = getConstant("VM_Version::CPU_SCDLY", Long.class);
++    final long loongarch64LLDBAR = getConstant("VM_Version::CPU_LLDBAR", Long.class);
++    final long loongarch64LBT_X86 = getConstant("VM_Version::CPU_LBT_X86", Long.class);
++    final long loongarch64LBT_ARM = getConstant("VM_Version::CPU_LBT_ARM", Long.class);
++    final long loongarch64LBT_MIPS = getConstant("VM_Version::CPU_LBT_MIPS", Long.class);
++    final long loongarch64CCDMA = getConstant("VM_Version::CPU_CCDMA", Long.class);
++    final long loongarch64COMPLEX = getConstant("VM_Version::CPU_COMPLEX", Long.class);
++    final long loongarch64FP = getConstant("VM_Version::CPU_FP", Long.class);
++    final long loongarch64CRYPTO = getConstant("VM_Version::CPU_CRYPTO", Long.class);
++    final long loongarch64LSX = getConstant("VM_Version::CPU_LSX", Long.class);
++    final long loongarch64LASX = getConstant("VM_Version::CPU_LASX", Long.class);
++    final long loongarch64LAM = getConstant("VM_Version::CPU_LAM", Long.class);
++    final long loongarch64LLSYNC = getConstant("VM_Version::CPU_LLSYNC", Long.class);
++    final long loongarch64TGTSYNC = getConstant("VM_Version::CPU_TGTSYNC", Long.class);
++    final long loongarch64ULSYNC = getConstant("VM_Version::CPU_ULSYNC", Long.class);
++    final long loongarch64UAL = getConstant("VM_Version::CPU_UAL", Long.class);
++    // Checkstyle: resume
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java
+new file mode 100644
+index 0000000000..1048ea9d64
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.loongarch64/src/jdk/vm/ci/hotspot/loongarch64/package-info.java
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/**
++ * The LoongArch64 HotSpot specific portions of the JVMCI API.
++ */
++package jdk.vm.ci.hotspot.loongarch64;
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java
+new file mode 100644
+index 0000000000..1bb12e7a5f
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64.java
+@@ -0,0 +1,247 @@
++/*
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.loongarch64;
++
++import java.nio.ByteOrder;
++import java.util.EnumSet;
++
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.Register.RegisterCategory;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.PlatformKind;
++
++/**
++ * Represents the LoongArch64 architecture.
++ */
++public class LoongArch64 extends Architecture {
++
++    public static final RegisterCategory CPU = new RegisterCategory("CPU");
++
++    // General purpose CPU registers
++    public static final Register zero = new Register(0, 0, "r0", CPU);
++    public static final Register ra = new Register(1, 1, "r1", CPU);
++    public static final Register tp = new Register(2, 2, "r2", CPU);
++    public static final Register sp = new Register(3, 3, "r3", CPU);
++    public static final Register a0 = new Register(4, 4, "r4", CPU);
++    public static final Register a1 = new Register(5, 5, "r5", CPU);
++    public static final Register a2 = new Register(6, 6, "r6", CPU);
++    public static final Register a3 = new Register(7, 7, "r7", CPU);
++    public static final Register a4 = new Register(8, 8, "r8", CPU);
++    public static final Register a5 = new Register(9, 9, "r9", CPU);
++    public static final Register a6 = new Register(10, 10, "r10", CPU);
++    public static final Register a7 = new Register(11, 11, "r11", CPU);
++    public static final Register t0 = new Register(12, 12, "r12", CPU);
++    public static final Register t1 = new Register(13, 13, "r13", CPU);
++    public static final Register t2 = new Register(14, 14, "r14", CPU);
++    public static final Register t3 = new Register(15, 15, "r15", CPU);
++    public static final Register t4 = new Register(16, 16, "r16", CPU);
++    public static final Register t5 = new Register(17, 17, "r17", CPU);
++    public static final Register t6 = new Register(18, 18, "r18", CPU);
++    public static final Register t7 = new Register(19, 19, "r19", CPU);
++    public static final Register t8 = new Register(20, 20, "r20", CPU);
++    public static final Register rx = new Register(21, 21, "r21", CPU);
++    public static final Register fp = new Register(22, 22, "r22", CPU);
++    public static final Register s0 = new Register(23, 23, "r23", CPU);
++    public static final Register s1 = new Register(24, 24, "r24", CPU);
++    public static final Register s2 = new Register(25, 25, "r25", CPU);
++    public static final Register s3 = new Register(26, 26, "r26", CPU);
++    public static final Register s4 = new Register(27, 27, "r27", CPU);
++    public static final Register s5 = new Register(28, 28, "r28", CPU);
++    public static final Register s6 = new Register(29, 29, "r29", CPU);
++    public static final Register s7 = new Register(30, 30, "r30", CPU);
++    public static final Register s8 = new Register(31, 31, "r31", CPU);
++
++    public static final Register SCR1 = t7;
++    public static final Register SCR2 = t4;
++    public static final Register v0 = a0;
++
++    // @formatter:off
++    public static final RegisterArray cpuRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8
++    );
++    // @formatter:on
++
++    public static final RegisterCategory SIMD = new RegisterCategory("SIMD");
++
++    // Simd registers
++    public static final Register f0 = new Register(32, 0, "f0", SIMD);
++    public static final Register f1 = new Register(33, 1, "f1", SIMD);
++    public static final Register f2 = new Register(34, 2, "f2", SIMD);
++    public static final Register f3 = new Register(35, 3, "f3", SIMD);
++    public static final Register f4 = new Register(36, 4, "f4", SIMD);
++    public static final Register f5 = new Register(37, 5, "f5", SIMD);
++    public static final Register f6 = new Register(38, 6, "f6", SIMD);
++    public static final Register f7 = new Register(39, 7, "f7", SIMD);
++    public static final Register f8 = new Register(40, 8, "f8", SIMD);
++    public static final Register f9 = new Register(41, 9, "f9", SIMD);
++    public static final Register f10 = new Register(42, 10, "f10", SIMD);
++    public static final Register f11 = new Register(43, 11, "f11", SIMD);
++    public static final Register f12 = new Register(44, 12, "f12", SIMD);
++    public static final Register f13 = new Register(45, 13, "f13", SIMD);
++    public static final Register f14 = new Register(46, 14, "f14", SIMD);
++    public static final Register f15 = new Register(47, 15, "f15", SIMD);
++    public static final Register f16 = new Register(48, 16, "f16", SIMD);
++    public static final Register f17 = new Register(49, 17, "f17", SIMD);
++    public static final Register f18 = new Register(50, 18, "f18", SIMD);
++    public static final Register f19 = new Register(51, 19, "f19", SIMD);
++    public static final Register f20 = new Register(52, 20, "f20", SIMD);
++    public static final Register f21 = new Register(53, 21, "f21", SIMD);
++    public static final Register f22 = new Register(54, 22, "f22", SIMD);
++    public static final Register f23 = new Register(55, 23, "f23", SIMD);
++    public static final Register f24 = new Register(56, 24, "f24", SIMD);
++    public static final Register f25 = new Register(57, 25, "f25", SIMD);
++    public static final Register f26 = new Register(58, 26, "f26", SIMD);
++    public static final Register f27 = new Register(59, 27, "f27", SIMD);
++    public static final Register f28 = new Register(60, 28, "f28", SIMD);
++    public static final Register f29 = new Register(61, 29, "f29", SIMD);
++    public static final Register f30 = new Register(62, 30, "f30", SIMD);
++    public static final Register f31 = new Register(63, 31, "f31", SIMD);
++
++    public static final Register fv0 = f0;
++
++    // @formatter:off
++    public static final RegisterArray simdRegisters = new RegisterArray(
++        f0,  f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,  f9,  f10, f11, f12, f13, f14, f15,
++        f16, f17, f18, f19, f20, f21, f22, f23,
++        f24, f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    // @formatter:off
++    public static final RegisterArray allRegisters = new RegisterArray(
++        zero, ra,  tp,  sp,  a0,  a1,  a2,  a3,
++        a4,   a5,  a6,  a7,  t0,  t1,  t2,  t3,
++        t4,   t5,  t6,  t7,  t8,  rx,  fp,  s0,
++        s1,   s2,  s3,  s4,  s5,  s6,  s7,  s8,
++
++        f0,   f1,  f2,  f3,  f4,  f5,  f6,  f7,
++        f8,   f9,  f10, f11, f12, f13, f14, f15,
++        f16,  f17, f18, f19, f20, f21, f22, f23,
++        f24,  f25, f26, f27, f28, f29, f30, f31
++    );
++    // @formatter:on
++
++    /**
++     * Basic set of CPU features mirroring what is returned from the cpuid instruction. See:
++     * {@code VM_Version::cpuFeatureFlags}.
++     */
++    public enum CPUFeature {
++        LA32,
++        LA64,
++        LLEXC,
++        SCDLY,
++        LLDBAR,
++        LBT_X86,
++        LBT_ARM,
++        LBT_MIPS,
++        CCDMA,
++        COMPLEX,
++        FP,
++        CRYPTO,
++        LSX,
++        LASX,
++        LAM,
++        LLSYNC,
++        TGTSYNC,
++        ULSYNC,
++        UAL
++    }
++
++    private final EnumSet<CPUFeature> features;
++
++    /**
++     * Set of flags to control code emission.
++     */
++    public enum Flag {
++        useLSX,
++        useLASX
++    }
++
++    private final EnumSet<Flag> flags;
++
++    public LoongArch64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
++        super("loongarch64", LoongArch64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0);
++        this.features = features;
++        this.flags = flags;
++    }
++
++    public EnumSet<CPUFeature> getFeatures() {
++        return features;
++    }
++
++    public EnumSet<Flag> getFlags() {
++        return flags;
++    }
++
++    @Override
++    public PlatformKind getPlatformKind(JavaKind javaKind) {
++        switch (javaKind) {
++            case Boolean:
++            case Byte:
++                return LoongArch64Kind.BYTE;
++            case Short:
++            case Char:
++                return LoongArch64Kind.WORD;
++            case Int:
++                return LoongArch64Kind.DWORD;
++            case Long:
++            case Object:
++                return LoongArch64Kind.QWORD;
++            case Float:
++                return LoongArch64Kind.SINGLE;
++            case Double:
++                return LoongArch64Kind.DOUBLE;
++            default:
++                return null;
++        }
++    }
++
++    @Override
++    public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) {
++        LoongArch64Kind kind = (LoongArch64Kind) platformKind;
++        if (kind.isInteger()) {
++            return category.equals(CPU);
++        } else if (kind.isSIMD()) {
++            return category.equals(SIMD);
++        }
++        return false;
++    }
++
++    @Override
++    public LoongArch64Kind getLargestStorableKind(RegisterCategory category) {
++        if (category.equals(CPU)) {
++            return LoongArch64Kind.QWORD;
++        } else if (category.equals(SIMD)) {
++            return LoongArch64Kind.V256_QWORD;
++        } else {
++            return null;
++        }
++    }
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java
 new file mode 100644
-index 0000000000..dfe3066af0
+index 0000000000..84b7f2027f
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64JavaCallWrapper.java
-@@ -0,0 +1,57 @@
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/LoongArch64Kind.java
+@@ -0,0 +1,163 @@
 +/*
-+ * Copyright (c) 2001, 2002, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * Copyright (c) 2015, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -65537,50 +115148,156 @@ index 0000000000..dfe3066af0
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
 + * questions.
-+ *
 + */
++package jdk.vm.ci.loongarch64;
 +
-+package sun.jvm.hotspot.runtime.mips64;
++import jdk.vm.ci.meta.PlatformKind;
 +
-+import java.util.*;
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.types.*;
-+import sun.jvm.hotspot.runtime.*;
++public enum LoongArch64Kind implements PlatformKind {
 +
-+public class MIPS64JavaCallWrapper extends JavaCallWrapper {
-+  private static AddressField lastJavaFPField;
++    // scalar
++    BYTE(1),
++    WORD(2),
++    DWORD(4),
++    QWORD(8),
++    UBYTE(1),
++    UWORD(2),
++    UDWORD(4),
++    SINGLE(4),
++    DOUBLE(8),
 +
-+  static {
-+    VM.registerVMInitializedObserver(new Observer() {
-+        public void update(Observable o, Object data) {
-+          initialize(VM.getVM().getTypeDataBase());
-+        }
-+      });
-+  }
++    // SIMD
++    V128_BYTE(16, BYTE),
++    V128_WORD(16, WORD),
++    V128_DWORD(16, DWORD),
++    V128_QWORD(16, QWORD),
++    V128_SINGLE(16, SINGLE),
++    V128_DOUBLE(16, DOUBLE),
++    V256_BYTE(32, BYTE),
++    V256_WORD(32, WORD),
++    V256_DWORD(32, DWORD),
++    V256_QWORD(32, QWORD),
++    V256_SINGLE(32, SINGLE),
++    V256_DOUBLE(32, DOUBLE);
 +
-+  private static synchronized void initialize(TypeDataBase db) {
-+    Type type = db.lookupType("JavaFrameAnchor");
++    private final int size;
++    private final int vectorLength;
 +
-+    lastJavaFPField  = type.getAddressField("_last_Java_fp");
-+  }
++    private final LoongArch64Kind scalar;
++    private final EnumKey<LoongArch64Kind> key = new EnumKey<>(this);
 +
-+  public MIPS64JavaCallWrapper(Address addr) {
-+    super(addr);
-+  }
++    LoongArch64Kind(int size) {
++        this.size = size;
++        this.scalar = this;
++        this.vectorLength = 1;
++    }
 +
-+  public Address getLastJavaFP() {
-+    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
-+  }
++    LoongArch64Kind(int size, LoongArch64Kind scalar) {
++        this.size = size;
++        this.scalar = scalar;
++
++        assert size % scalar.size == 0;
++        this.vectorLength = size / scalar.size;
++    }
++
++    public LoongArch64Kind getScalar() {
++        return scalar;
++    }
++
++    @Override
++    public int getSizeInBytes() {
++        return size;
++    }
++
++    @Override
++    public int getVectorLength() {
++        return vectorLength;
++    }
++
++    @Override
++    public Key getKey() {
++        return key;
++    }
++
++    public boolean isInteger() {
++        switch (this) {
++            case BYTE:
++            case WORD:
++            case DWORD:
++            case QWORD:
++            case UBYTE:
++            case UWORD:
++            case UDWORD:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    public boolean isSIMD() {
++        switch (this) {
++            case SINGLE:
++            case DOUBLE:
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    @Override
++    public char getTypeChar() {
++        switch (this) {
++            case BYTE:
++                return 'b';
++            case WORD:
++                return 'w';
++            case DWORD:
++                return 'd';
++            case QWORD:
++                return 'q';
++            case SINGLE:
++                return 'S';
++            case DOUBLE:
++                return 'D';
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++            case V256_BYTE:
++            case V256_WORD:
++            case V256_DWORD:
++            case V256_QWORD:
++            case V256_SINGLE:
++            case V256_DOUBLE:
++                return 'v';
++            default:
++                return '-';
++        }
++    }
 +}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java
 new file mode 100644
-index 0000000000..f2da760af4
+index 0000000000..9d020833ea
 --- /dev/null
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/mips64/MIPS64RegisterMap.java
-@@ -0,0 +1,52 @@
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.loongarch64/src/jdk/vm/ci/loongarch64/package-info.java
+@@ -0,0 +1,28 @@
 +/*
-+ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
-+ * Copyright (c) 2018, Loongson Technology. All rights reserved.
++ * Copyright (c) 2018, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
 + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 + *
 + * This code is free software; you can redistribute it and/or modify it
@@ -65600,76 +115317,37 @@ index 0000000000..f2da760af4
 + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 + * or visit www.oracle.com if you need additional information or have any
 + * questions.
-+ *
 + */
 +
-+package sun.jvm.hotspot.runtime.mips64;
-+
-+import sun.jvm.hotspot.debugger.*;
-+import sun.jvm.hotspot.runtime.*;
-+
-+public class MIPS64RegisterMap extends RegisterMap {
-+
-+  /** This is the only public constructor */
-+  public MIPS64RegisterMap(JavaThread thread, boolean updateMap) {
-+    super(thread, updateMap);
-+  }
-+
-+  protected MIPS64RegisterMap(RegisterMap map) {
-+    super(map);
-+  }
-+
-+  public Object clone() {
-+    MIPS64RegisterMap retval = new MIPS64RegisterMap(this);
-+    return retval;
-+  }
-+
-+  // no PD state to clear or copy:
-+  protected void clearPD() {}
-+  protected void initializePD() {}
-+  protected void initializeFromPD(RegisterMap map) {}
-+  protected Address getLocationPD(VMReg reg) { return null; }
-+}
-diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-index 7d7a6107ca..06d79318d9 100644
---- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-+++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
-@@ -22,6 +22,13 @@
-  *
++/**
++ * The LoongArch64 platform independent portions of the JVMCI API.
++ */
++package jdk.vm.ci.loongarch64;
+diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java
+index fed310d386..661f106d30 100644
+--- a/src/jdk.internal.vm.ci/share/classes/module-info.java
++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java
+@@ -23,6 +23,12 @@
+  * questions.
   */
  
 +/*
-+ * This file has been modified by Loongson Technology in 2021. These
-+ * modifications are Copyright (c) 2018, 2021, Loongson Technology, and are made
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2022, Loongson Technology, and are made
 + * available on the same license terms set forth above.
-+ *
 + */
 +
- package sun.jvm.hotspot.utilities;
- 
- /** Provides canonicalized OS and CPU information for the rest of the
-@@ -54,7 +61,7 @@ public class PlatformInfo {
- 
-   public static boolean knownCPU(String cpu) {
-     final String[] KNOWN =
--        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
-+        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "mips64", "mips64el", "loongarch64"};
- 
-     for(String s : KNOWN) {
-       if(s.equals(cpu))
-@@ -101,6 +108,12 @@ public class PlatformInfo {
-     if (cpu.equals("ppc64le"))
-       return "ppc64";
- 
-+    if (cpu.equals("mips64el"))
-+      return "mips64";
-+
-+    if (cpu.equals("loongarch64"))
-+      return "loongarch64";
-+
-     return cpu;
+ module jdk.internal.vm.ci {
+     exports jdk.vm.ci.services to jdk.internal.vm.compiler;
+     exports jdk.vm.ci.runtime to
+@@ -37,6 +43,7 @@ module jdk.internal.vm.ci {
  
-   }
+     provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with
+         jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory,
++        jdk.vm.ci.hotspot.loongarch64.LoongArch64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory;
+ }
 diff --git a/src/utils/hsdis/Makefile b/src/utils/hsdis/Makefile
 index 2514a895da..08fbe3b953 100644
 --- a/src/utils/hsdis/Makefile
@@ -65684,75 +115362,6 @@ index 2514a895da..08fbe3b953 100644
  LDFLAGS         += -ldl
  OUTFLAGS	+= -o $@
  else
-diff --git a/test/hotspot/jtreg/ProblemList-Xcomp.txt b/test/hotspot/jtreg/ProblemList-Xcomp.txt
-index 4d6159a22b..2ff512758f 100644
---- a/test/hotspot/jtreg/ProblemList-Xcomp.txt
-+++ b/test/hotspot/jtreg/ProblemList-Xcomp.txt
-@@ -29,3 +29,6 @@
- 
- vmTestbase/vm/mlvm/meth/stress/jni/nativeAndMH/Test.java 8208235 solaris-all
- runtime/appcds/cacheObject/DifferentHeapSizes.java 8210102 solaris-all
-+
-+# loongson added
-+compiler/intrinsics/bigInteger/TestMultiplyToLenReturnProfile.java generic-mips64el
-diff --git a/test/hotspot/jtreg/ProblemList.txt b/test/hotspot/jtreg/ProblemList.txt
-index 941429356c..f66c98b1fb 100644
---- a/test/hotspot/jtreg/ProblemList.txt
-+++ b/test/hotspot/jtreg/ProblemList.txt
-@@ -21,6 +21,12 @@
- # questions.
- #
- 
-+#
-+# This file has been modified by Loongson Technology in 2022. These
-+# modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
-+# available on the same license terms set forth above.
-+#
-+
- #############################################################################
- #
- # List of quarantined tests -- tests that should not be run by default, because
-@@ -233,3 +239,40 @@ vmTestbase/nsk/jdb/exclude/exclude001/exclude001.java 8197938 windows-all
- vmTestbase/nsk/jdwp/ThreadReference/ForceEarlyReturn/forceEarlyReturn001/forceEarlyReturn001.java 7199837 generic-all
- 
- #############################################################################
-+
-+# loongson added
-+compiler/loopopts/TestSkeletonPredicateNegation.java #25538 generic-loongarch64
-+compiler/profiling/TestTypeProfiling.java #25171 generic-loongarch64
-+compiler/tiered/Level2RecompilationTest.java #10070 generic-mips64el,generic-loongarch64
-+containers/cgroup/PlainRead.java #20028 generic-mips64el
-+gc/cms/TestBubbleUpRef.java #17221 generic-mips64el
-+gc/stress/gcbasher/TestGCBasherWithCMS.java #17221 generic-mips64el
-+resourcehogs/serviceability/sa/ClhsdbRegionDetailsScanOopsForG1.java #24312 generic-loongarch64
-+resourcehogs/serviceability/sa/TestHeapDumpForLargeArray.java #9797 generic-mips64el
-+runtime/classFileParserBug/TestEmptyBootstrapMethodsAttr.java generic-all
-+runtime/NMT/CheckForProperDetailStackTrace.java #9499 generic-mips64el,generic-loongarch64
-+serviceability/sa/CDSJMapClstats.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbCDSJstackPrintAll.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbInspect.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbJdis.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbJstack.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbJstackXcompStress.java #10632 generic-mips64el
-+serviceability/sa/ClhsdbPrintAs.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbPstack.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbSource.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbThread.java #9797 generic-mips64el
-+serviceability/sa/ClhsdbWhere.java #9797 generic-mips64el
-+serviceability/sa/DeadlockDetectionTest.java #9797 generic-mips64el
-+serviceability/sa/JhsdbThreadInfoTest.java #9797 generic-mips64el
-+serviceability/sa/jmap-hprof/JMapHProfLargeHeapTest.java #9797 generic-mips64el
-+serviceability/sa/sadebugd/DebugdConnectTest.java #9797 generic-mips64el
-+serviceability/sa/TestClhsdbJstackLock.java #9797 generic-mips64el
-+serviceability/sa/TestHeapDumpForInvokeDynamic.java #9797 generic-mips64el
-+serviceability/sa/TestHeapDumpForLargeArray.java #9797 generic-mips64el
-+serviceability/sa/TestInstanceKlassSize.java #9797 generic-mips64el
-+serviceability/sa/TestJhsdbJstackLock.java #9797 generic-mips64el
-+serviceability/sa/TestJhsdbJstackMixed.java #9797 generic-mips64el
-+serviceability/sa/TestJmapCore.java #9797 generic-mips64el
-+serviceability/sa/TestJmapCoreMetaspace.java #9797 generic-mips64el
-+serviceability/sa/TestPrintMdo.java #9797,#25534 generic-mips64el,generic-loongarch64
-+vmTestbase/jit/tiered/Test.java generic-mips64el
 diff --git a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java b/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
 index ac17e567b0..9b004a2033 100644
 --- a/test/hotspot/jtreg/compiler/cpuflags/TestAESIntrinsicsOnSupportedConfig.java
@@ -65844,6 +115453,770 @@ index faa9fdbae6..a635f03d24 100644
      }
  
      @Override
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
+index 62d0e99155..c3fa3fb93e 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/CodeInstallationTest.java
+@@ -29,6 +29,7 @@ import jdk.vm.ci.code.InstalledCode;
+ import jdk.vm.ci.code.TargetDescription;
+ import jdk.vm.ci.code.test.amd64.AMD64TestAssembler;
+ import jdk.vm.ci.code.test.sparc.SPARCTestAssembler;
++import jdk.vm.ci.code.test.loongarch64.LoongArch64TestAssembler;
+ import jdk.vm.ci.hotspot.HotSpotCompiledCode;
+ import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
+ import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
+@@ -37,6 +38,7 @@ import jdk.vm.ci.meta.MetaAccessProvider;
+ import jdk.vm.ci.runtime.JVMCI;
+ import jdk.vm.ci.runtime.JVMCIBackend;
+ import jdk.vm.ci.sparc.SPARC;
++import jdk.vm.ci.loongarch64.LoongArch64;
+ import org.junit.Assert;
+ 
+ import java.lang.reflect.Method;
+@@ -72,6 +74,8 @@ public class CodeInstallationTest {
+             return new AMD64TestAssembler(codeCache, config);
+         } else if (arch instanceof SPARC) {
+             return new SPARCTestAssembler(codeCache, config);
++        } else if (arch instanceof LoongArch64) {
++            return new LoongArch64TestAssembler(codeCache, config);
+         } else {
+             Assert.fail("unsupported architecture");
+             return null;
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
+index 8afc7d7b98..520d7707a2 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/DataPatchTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.DataPatchTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
+index 75d0748da5..a6826e2ffe 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/InterpreterFrameSizeTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+  *          jdk.internal.vm.ci/jdk.vm.ci.code.site
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.InterpreterFrameSizeTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
+index a67fa2c1df..59cce6454d 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/MaxOopMapStackOffsetTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -33,7 +33,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.MaxOopMapStackOffsetTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
+index d9e1f24c30..259218b305 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/NativeCallTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /test/lib /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.code
+@@ -33,7 +33,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.common
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java TestHotSpotVMConfig.java NativeCallTest.java TestAssembler.java sparc/SPARCTestAssembler.java amd64/AMD64TestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm/native -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI  -Xbootclasspath/a:. jdk.vm.ci.code.test.NativeCallTest
+  */
+ package jdk.vm.ci.code.test;
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
+index 9b92114055..00d0f53cdb 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleCodeInstallationTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleCodeInstallationTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
+index 5b2204868c..ecfcb1cf01 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/SimpleDebugInfoTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.SimpleDebugInfoTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
+index a10e90acda..5b1a58c74b 100644
+--- a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/VirtualObjectDebugInfoTest.java
+@@ -23,7 +23,7 @@
+ 
+ /**
+  * @test
+- * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9")
++ * @requires vm.jvmci & (vm.simpleArch == "x64" | vm.simpleArch == "sparcv9" | vm.simpleArch == "loongarch64")
+  * @library /
+  * @modules jdk.internal.vm.ci/jdk.vm.ci.hotspot
+  *          jdk.internal.vm.ci/jdk.vm.ci.meta
+@@ -32,7 +32,8 @@
+  *          jdk.internal.vm.ci/jdk.vm.ci.runtime
+  *          jdk.internal.vm.ci/jdk.vm.ci.amd64
+  *          jdk.internal.vm.ci/jdk.vm.ci.sparc
+- * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java
++ *          jdk.internal.vm.ci/jdk.vm.ci.loongarch64
++ * @compile CodeInstallationTest.java DebugInfoTest.java TestAssembler.java TestHotSpotVMConfig.java amd64/AMD64TestAssembler.java sparc/SPARCTestAssembler.java loongarch64/LoongArch64TestAssembler.java
+  * @run junit/othervm -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI -Djvmci.Compiler=null jdk.vm.ci.code.test.VirtualObjectDebugInfoTest
+  */
+ 
+diff --git a/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java
+new file mode 100644
+index 0000000000..4c76868453
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/jvmci/jdk.vm.ci.code.test/src/jdk/vm/ci/code/test/loongarch64/LoongArch64TestAssembler.java
+@@ -0,0 +1,568 @@
++/*
++ * Copyright (c) 2020, 2022, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2022, Loongson Technology. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++package jdk.vm.ci.code.test.loongarch64;
++
++import jdk.vm.ci.loongarch64.LoongArch64;
++import jdk.vm.ci.loongarch64.LoongArch64Kind;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CodeCacheProvider;
++import jdk.vm.ci.code.DebugInfo;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.site.ConstantReference;
++import jdk.vm.ci.code.site.DataSectionReference;
++import jdk.vm.ci.code.test.TestAssembler;
++import jdk.vm.ci.code.test.TestHotSpotVMConfig;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.hotspot.HotSpotConstant;
++import jdk.vm.ci.hotspot.HotSpotForeignCallTarget;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.VMConstant;
++
++public class LoongArch64TestAssembler extends TestAssembler {
++
++    private static final Register scratchRegister = LoongArch64.SCR1;
++    private static final Register doubleScratch = LoongArch64.f23;
++    private static final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(LoongArch64.a0,
++                                                                      LoongArch64.a1, LoongArch64.a2,
++                                                                      LoongArch64.a3, LoongArch64.a4,
++                                                                      LoongArch64.a5, LoongArch64.a6,
++                                                                      LoongArch64.a7);
++    private static final RegisterArray floatParameterRegisters = new RegisterArray(LoongArch64.f0,
++                                                                      LoongArch64.f1, LoongArch64.f2,
++                                                                      LoongArch64.f3, LoongArch64.f4,
++                                                                      LoongArch64.f5, LoongArch64.f6,
++                                                                      LoongArch64.f7);
++    private static int currentGeneral = 0;
++    private static int currentFloat = 0;
++    public LoongArch64TestAssembler(CodeCacheProvider codeCache, TestHotSpotVMConfig config) {
++        super(codeCache, config,
++              16 /* initialFrameSize */, 16 /* stackAlignment */,
++              LoongArch64Kind.UDWORD /* narrowOopKind */,
++              /* registers */
++              LoongArch64.a0, LoongArch64.a1, LoongArch64.a2, LoongArch64.a3,
++              LoongArch64.a4, LoongArch64.a5, LoongArch64.a6, LoongArch64.a7);
++    }
++
++    private static int low(int x, int l) {
++        assert l < 32;
++        return (x >> 0) & ((1 << l)-1);
++    }
++
++    private static int low16(int x) {
++        return low(x, 16);
++    }
++
++    private void emitNop() {
++        code.emitInt(0x3400000);
++    }
++
++    private void emitPcaddu12i(Register rj, int si20) {
++        // pcaddu12i
++        code.emitInt((0b0001110 << 25)
++                     | (low(si20, 20) << 5)
++                     | rj.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, Register rk) {
++        // add_d
++        code.emitInt((0b00000000000100001 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitAdd(Register rd, Register rj, int si12) {
++        // addi_d
++        code.emitInt((0b0000001011 << 22)
++                     | (low(si12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitSub(Register rd, Register rj, Register rk) {
++        // sub_d
++        code.emitInt((0b00000000000100011 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitShiftLeft(Register rd, Register rj, int shift) {
++        // slli_d
++        code.emitInt((0b00000000010000 << 18)
++                     | (low(( (0b01  << 6) | shift ), 8) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu12i_w(Register rj, int imm20) {
++        // lu12i_w
++        code.emitInt((0b0001010 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitOri(Register rd, Register rj, int ui12) {
++        // ori
++        code.emitInt((0b0000001110 << 22)
++                     | (low(ui12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLu32i_d(Register rj, int imm20) {
++         // lu32i_d
++        code.emitInt((0b0001011 << 25)
++                     | (low(imm20, 20)<<5)
++                     | rj.encoding);
++    }
++
++    private void emitLu52i_d(Register rd, Register rj, int imm12) {
++        // lu52i_d
++        code.emitInt((0b0000001100 << 22)
++                     | (low(imm12, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadImmediate(Register rd, int imm32) {
++        emitLu12i_w(rd, (imm32 >> 12) & 0xfffff);
++        emitOri(rd, rd, imm32 & 0xfff);
++    }
++
++    private void emitLi52(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++    }
++
++    private void emitLi64(Register rj, long imm) {
++        emitLu12i_w(rj, (int) ((imm >> 12) & 0xfffff));
++        emitOri(rj, rj, (int) (imm & 0xfff));
++        emitLu32i_d(rj, (int) ((imm >> 32) & 0xfffff));
++        emitLu52i_d(rj, rj, (int) ((imm >> 52) & 0xfff));
++    }
++
++    private void emitOr(Register rd, Register rj, Register rk) {
++        // orr
++        code.emitInt((0b00000000000101010 << 15)
++                     | (rk.encoding << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitMove(Register rd, Register rs) {
++        // move
++        emitOr(rd, rs, LoongArch64.zero);
++    }
++
++    private void emitMovfr2gr(Register rd, LoongArch64Kind kind, Register rj) {
++        // movfr2gr_s/movfr2gr_d
++        int opc = 0;
++        switch (kind) {
++            case SINGLE: opc = 0b0000000100010100101101; break;
++            case DOUBLE: opc = 0b0000000100010100101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitLoadRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // load
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100000; break;
++            case WORD:   opc = 0b0010100001; break;
++            case DWORD:  opc = 0b0010100010; break;
++            case QWORD:  opc = 0b0010100011; break;
++            case UDWORD: opc = 0b0010101010; break;
++            case SINGLE: opc = 0b0010101100; break;
++            case DOUBLE: opc = 0b0010101110; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitStoreRegister(Register rd, LoongArch64Kind kind, Register rj, int offset) {
++        // store
++        assert offset >= 0;
++        int opc = 0;
++        switch (kind) {
++            case BYTE:   opc = 0b0010100100; break;
++            case WORD:   opc = 0b0010100101; break;
++            case DWORD:  opc = 0b0010100110; break;
++            case QWORD:  opc = 0b0010100111; break;
++            case SINGLE: opc = 0b0010101101; break;
++            case DOUBLE: opc = 0b0010101111; break;
++            default: throw new IllegalArgumentException();
++        }
++        code.emitInt((opc << 22)
++                     | (low(offset, 12) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    private void emitJirl(Register rd, Register rj, int offs) {
++        // jirl
++        code.emitInt((0b010011 << 26)
++                     | (low16(offs >> 2) << 10)
++                     | (rj.encoding << 5)
++                     | rd.encoding);
++    }
++
++    @Override
++    public void emitGrowStack(int size) {
++        assert size % 16 == 0;
++        if (size > -4096 && size < 0) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size == 0) {
++            // No-op
++        } else if (size < 4096) {
++            emitAdd(LoongArch64.sp, LoongArch64.sp, -size);
++        } else if (size < 65535) {
++            emitLoadImmediate(scratchRegister, size);
++            emitSub(LoongArch64.sp, LoongArch64.sp, scratchRegister);
++        } else {
++            throw new IllegalArgumentException();
++        }
++    }
++
++    @Override
++    public void emitPrologue() {
++        // Must be patchable by NativeJump::patch_verified_entry
++        emitNop();
++        emitGrowStack(32);
++        emitStoreRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 24);
++        emitStoreRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 16);
++        emitGrowStack(-16);
++        emitMove(LoongArch64.fp, LoongArch64.sp);
++        setDeoptRescueSlot(newStackSlot(LoongArch64Kind.QWORD));
++    }
++
++    @Override
++    public void emitEpilogue() {
++        recordMark(config.MARKID_DEOPT_HANDLER_ENTRY);
++        recordCall(new HotSpotForeignCallTarget(config.handleDeoptStub), 4*4, true, null);
++        emitCall(0xdeaddeaddeadL);
++    }
++
++    @Override
++    public void emitCallPrologue(CallingConvention cc, Object... prim) {
++        emitGrowStack(cc.getStackSize());
++        frameSize += cc.getStackSize();
++        AllocatableValue[] args = cc.getArguments();
++        for (int i = 0; i < args.length; i++) {
++            emitLoad(args[i], prim[i]);
++        }
++        currentGeneral = 0;
++        currentFloat = 0;
++    }
++
++    @Override
++    public void emitCallEpilogue(CallingConvention cc) {
++        emitGrowStack(-cc.getStackSize());
++        frameSize -= cc.getStackSize();
++    }
++
++    @Override
++    public void emitCall(long addr) {
++        // long call (absolute)
++        // lu12i_w(T4, split_low20(value >> 12));
++        // lu32i_d(T4, split_low20(value >> 32));
++        // jirl(RA, T4, split_low12(value));
++        emitLu12i_w(LoongArch64.t4, (int) ((addr >> 12) & 0xfffff));
++        emitLu32i_d(LoongArch64.t4, (int) ((addr >> 32) & 0xfffff));
++        emitJirl(LoongArch64.ra, LoongArch64.t4, (int) (addr & 0xfff));
++    }
++
++    @Override
++    public void emitLoad(AllocatableValue av, Object prim) {
++        if (av instanceof RegisterValue) {
++            Register reg = ((RegisterValue) av).getRegister();
++            if (prim instanceof Float) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadFloat(reg, (Float) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadFloat(doubleScratch, (Float) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.SINGLE, doubleScratch);
++                }
++            } else if (prim instanceof Double) {
++                if (currentFloat < floatParameterRegisters.size()) {
++                  currentFloat++;
++                  emitLoadDouble(reg, (Double) prim);
++                } else if (currentGeneral < nativeGeneralParameterRegisters.size()) {
++                  currentGeneral++;
++                  emitLoadDouble(doubleScratch, (Double) prim);
++                  emitMovfr2gr(reg, LoongArch64Kind.DOUBLE, doubleScratch);
++                }
++            } else if (prim instanceof Integer) {
++                emitLoadInt(reg, (Integer) prim);
++            } else if (prim instanceof Long) {
++                emitLoadLong(reg, (Long) prim);
++            }
++        } else if (av instanceof StackSlot) {
++            StackSlot slot = (StackSlot) av;
++            if (prim instanceof Float) {
++                emitFloatToStack(slot, emitLoadFloat(doubleScratch, (Float) prim));
++            } else if (prim instanceof Double) {
++                emitDoubleToStack(slot, emitLoadDouble(doubleScratch, (Double) prim));
++            } else if (prim instanceof Integer) {
++                emitIntToStack(slot, emitLoadInt(scratchRegister, (Integer) prim));
++            } else if (prim instanceof Long) {
++                emitLongToStack(slot, emitLoadLong(scratchRegister, (Long) prim));
++            } else {
++                assert false : "Unimplemented";
++            }
++        } else {
++            throw new IllegalArgumentException("Unknown value " + av);
++        }
++    }
++
++    @Override
++    public Register emitLoadPointer(HotSpotConstant c) {
++        recordDataPatchInCode(new ConstantReference((VMConstant) c));
++
++        Register ret = newRegister();
++        // need to match patchable_li52 instruction sequence
++        // lu12i_ori_lu32i
++        emitLi52(ret, 0xdeaddead);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(Register b, int offset) {
++        Register ret = newRegister();
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, b, offset);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadNarrowPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.UDWORD, ret, 0);
++        return ret;
++    }
++
++    @Override
++    public Register emitLoadPointer(DataSectionReference ref) {
++        recordDataPatchInCode(ref);
++
++        Register ret = newRegister();
++        emitPcaddu12i(ret, 0xdead >> 12);
++        emitAdd(ret, ret, 0xdead & 0xfff);
++        emitLoadRegister(ret, LoongArch64Kind.QWORD, ret, 0);
++        return ret;
++    }
++
++    private Register emitLoadDouble(Register reg, double c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitDouble(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.DOUBLE, scratchRegister, 0);
++        return reg;
++    }
++
++    private Register emitLoadFloat(Register reg, float c) {
++        DataSectionReference ref = new DataSectionReference();
++        ref.setOffset(data.position());
++        data.emitFloat(c);
++
++        recordDataPatchInCode(ref);
++        emitPcaddu12i(scratchRegister, 0xdead >> 12);
++        emitAdd(scratchRegister, scratchRegister, 0xdead & 0xfff);
++        emitLoadRegister(reg, LoongArch64Kind.SINGLE, scratchRegister, 0);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadFloat(float c) {
++        Register ret = LoongArch64.fv0;
++        return emitLoadFloat(ret, c);
++    }
++
++    private Register emitLoadLong(Register reg, long c) {
++        emitLi64(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadLong(long c) {
++        Register ret = newRegister();
++        return emitLoadLong(ret, c);
++    }
++
++    private Register emitLoadInt(Register reg, int c) {
++        emitLoadImmediate(reg, c);
++        return reg;
++    }
++
++    @Override
++    public Register emitLoadInt(int c) {
++        Register ret = newRegister();
++        return emitLoadInt(ret, c);
++    }
++
++    @Override
++    public Register emitIntArg0() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(0);
++    }
++
++    @Override
++    public Register emitIntArg1() {
++        return codeCache.getRegisterConfig()
++            .getCallingConventionRegisters(HotSpotCallingConventionType.JavaCall, JavaKind.Int)
++            .get(1);
++    }
++
++    @Override
++    public Register emitIntAdd(Register a, Register b) {
++        emitAdd(a, a, b);
++        return a;
++    }
++
++    @Override
++    public void emitTrap(DebugInfo info) {
++        // Dereference null pointer
++        emitMove(scratchRegister, LoongArch64.zero);
++        recordImplicitException(info);
++        emitLoadRegister(LoongArch64.zero, LoongArch64Kind.QWORD, scratchRegister, 0);
++    }
++
++    @Override
++    public void emitIntRet(Register a) {
++        emitMove(LoongArch64.v0, a);
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitFloatRet(Register a) {
++        assert a == LoongArch64.fv0 : "Unimplemented move " + a;
++        emitMove(LoongArch64.sp, LoongArch64.fp);
++        emitLoadRegister(LoongArch64.ra, LoongArch64Kind.QWORD, LoongArch64.sp, 8);
++        emitLoadRegister(LoongArch64.fp, LoongArch64Kind.QWORD, LoongArch64.sp, 0);
++        emitGrowStack(-16);
++        emitJirl(LoongArch64.zero, LoongArch64.ra, 0);
++    }
++
++    @Override
++    public void emitPointerRet(Register a) {
++        emitIntRet(a);
++    }
++
++    @Override
++    public StackSlot emitPointerToStack(Register a) {
++        return emitLongToStack(a);
++    }
++
++    @Override
++    public StackSlot emitNarrowPointerToStack(Register a) {
++        return emitIntToStack(a);
++    }
++
++    @Override
++    public Register emitUncompressPointer(Register compressed, long base, int shift) {
++        if (shift > 0) {
++            emitShiftLeft(compressed, compressed, shift);
++        }
++
++        if (base != 0) {
++            emitLoadLong(scratchRegister, base);
++            emitAdd(compressed, compressed, scratchRegister);
++        }
++
++        return compressed;
++    }
++
++    private StackSlot emitDoubleToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DOUBLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitDoubleToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DOUBLE);
++        return emitDoubleToStack(ret, a);
++    }
++
++    private StackSlot emitFloatToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.SINGLE, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitFloatToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.SINGLE);
++        return emitFloatToStack(ret, a);
++    }
++
++    private StackSlot emitIntToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.DWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitIntToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.DWORD);
++        return emitIntToStack(ret, a);
++    }
++
++    private StackSlot emitLongToStack(StackSlot slot, Register a) {
++        emitStoreRegister(a, LoongArch64Kind.QWORD, LoongArch64.sp, slot.getOffset(frameSize));
++        return slot;
++    }
++
++    @Override
++    public StackSlot emitLongToStack(Register a) {
++        StackSlot ret = newStackSlot(LoongArch64Kind.QWORD);
++        return emitLongToStack(ret, a);
++    }
++
++}
 diff --git a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java b/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
 index acb86812d2..664ea11d0d 100644
 --- a/test/hotspot/jtreg/compiler/runtime/criticalnatives/argumentcorruption/CheckLongArgs.java
@@ -65935,139 +116308,8 @@ index 7774dabcb5..c1cb6e00f3 100644
  
      public static final BooleanSupplier SHA512_INSTRUCTION_AVAILABLE
              = new OrPredicate(new CPUSpecificPredicate("aarch64.*", new String[] { "sha512"       }, null),
-diff --git a/test/hotspot/jtreg/loongson/25443/Test25443.java b/test/hotspot/jtreg/loongson/25443/Test25443.java
-new file mode 100644
-index 0000000000..200485d1fd
---- /dev/null
-+++ b/test/hotspot/jtreg/loongson/25443/Test25443.java
-@@ -0,0 +1,58 @@
-+/*
-+ * Copyright (c) 2015, 2022, Loongson Technology. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+/**
-+ * @test
-+ * @summary test c2 or2s
-+ *
-+ * @run main/othervm -Xcomp -XX:-TieredCompilation Test25443
-+ */
-+public class Test25443 {
-+    static short test_ori2s(int v1) {
-+        short t = (short)(v1 | 0x14);
-+        return t;
-+    }
-+
-+    static short test_or2s(int v1, int v2) {
-+        short t = (short)(v1 | v2);
-+        return t;
-+    }
-+
-+    static short ret;
-+    public static void main(String[] args) {
-+        for (int i = 0; i < 12000; i++) { //warmup
-+            test_ori2s(0x333300);
-+            test_or2s(0x333300, 0x14);
-+        }
-+
-+        if ( (test_ori2s(0x333300) == 0x3314)
-+            && (test_or2s(0x333300, 0x14) == 0x3314)
-+            && (test_or2s(0x333300, 0x1000) == 0x3300)
-+            && (test_or2s(0x333300, 0x8000) == 0xffffb300)) {
-+            System.out.println("TEST PASSED");
-+        } else {
-+            throw new AssertionError("Not be expected results");
-+        }
-+    }
-+}
-diff --git a/test/hotspot/jtreg/loongson/7432/Test7423.java b/test/hotspot/jtreg/loongson/7432/Test7423.java
-new file mode 100644
-index 0000000000..defa026410
---- /dev/null
-+++ b/test/hotspot/jtreg/loongson/7432/Test7423.java
-@@ -0,0 +1,61 @@
-+/*
-+ * Copyright (c) 2015, 2018, Loongson Technology. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ *
-+ */
-+
-+/**
-+ * @test
-+ * @summary Divide by zero
-+ *
-+ * @run main/othervm -Xint Test7423
-+ * @run main/othervm -Xcomp Test7423
-+ */
-+public class Test7423 {
-+
-+  private static int divInt(int n) {
-+    int a = 1 / n;
-+    return a;
-+  }
-+
-+  private static long divLong(long n) {
-+    long a = (long)1 / n;
-+    return a;
-+  }
-+
-+  public static void main(String[] args) throws Exception {
-+
-+    try {
-+      for (int i = 0; i < 20000; i++) {
-+        if (i == 18000) {
-+          divInt(0);
-+          divLong((long)0);
-+        } else {
-+          divInt(1);
-+          divLong((long)1);
-+        }
-+      }
-+    } catch (java.lang.ArithmeticException exc) {
-+      System.out.println("expected-exception " + exc);
-+    }
-+  }
-+
-+}
 diff --git a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
-index 3386cfeb1f..a548c37d09 100644
+index 127bb6abcd..c9277604ae 100644
 --- a/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
 +++ b/test/hotspot/jtreg/runtime/ReservedStack/ReservedStackTest.java
 @@ -21,6 +21,12 @@
@@ -66093,7 +116335,7 @@ index 3386cfeb1f..a548c37d09 100644
              Platform.isSolaris();
      }
 diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
-index a016f233e7..a60fc94158 100644
+index 77458554b7..05aee6b84c 100644
 --- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 +++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
 @@ -45,7 +45,7 @@ import java.util.Set;
@@ -66105,146 +116347,6 @@ index a016f233e7..a60fc94158 100644
          BITNESS("is32bit", "is64bit"),
          OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
          VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
-diff --git a/test/jdk/ProblemList-Xcomp.txt b/test/jdk/ProblemList-Xcomp.txt
-index 0758209f2d..f849f73fc6 100644
---- a/test/jdk/ProblemList-Xcomp.txt
-+++ b/test/jdk/ProblemList-Xcomp.txt
-@@ -21,6 +21,12 @@
- # questions.
- #
- 
-+#
-+# This file has been modified by Loongson Technology in 2021. These
-+# modifications are Copyright (c) 2020, 2021, Loongson Technology, and are made
-+# available on the same license terms set forth above.
-+#
-+
- #############################################################################
- #
- # List of quarantined tests for testing in Xcomp mode.
-@@ -29,3 +35,55 @@
- 
- java/lang/invoke/MethodHandles/CatchExceptionTest.java 8146623 generic-all
- java/lang/Class/forName/modules/TestDriver.java 8208212 solaris-all
-+
-+# loongson added
-+com/sun/net/httpserver/bugs/6725892/Test.java #uos generic-mips64el
-+com/sun/net/httpserver/bugs/B6393710.java generic-mips64el
-+com/sun/net/httpserver/bugs/B6529200.java generic-mips64el
-+com/sun/net/httpserver/Test10.java generic-mips64el
-+com/sun/net/httpserver/Test1.java generic-mips64el
-+java/lang/annotation/loaderLeak/Main.java generic-all
-+java/lang/Thread/UncaughtExceptions.sh  generic-mips64el
-+java/net/httpclient/MaxStreams.java generic-mips64el
-+java/net/httpclient/ShortRequestBody.java #uos generic-mips64el
-+java/net/httpclient/TimeoutOrdering.java generic-mips64el
-+java/net/Socket/DeadlockTest.java #uos  generic-mips64el
-+java/net/Socket/LingerTest.java #uos generic-mips64el
-+java/net/Socket/RejectIPv6.java generic-mips64el
-+java/security/Security/ClassLoaderDeadlock/ClassLoaderDeadlock.sh #uos generic-mips64el
-+javax/net/ssl/ServerName/SSLSocketSNISensitive.java generic-mips64el
-+javax/net/ssl/SSLSession/SessionCacheSizeTests.java #uos generic-mips64el
-+javax/net/ssl/SSLSession/SessionTimeOutTests.java generic-mips64el
-+javax/net/ssl/Stapling/HttpsUrlConnClient.java generic-mips64el
-+javax/net/ssl/Stapling/SSLEngineWithStapling.java generic-mips64el
-+javax/net/ssl/Stapling/SSLSocketWithStapling.java generic-mips64el
-+javax/net/ssl/Stapling/StapleEnableProps.java generic-mips64el
-+javax/net/ssl/TLSCommon/TestSessionLocalPrincipal.java generic-mips64el
-+javax/net/ssl/TLS/TestJSSEClientProtocol.java #uos generic-mips64el
-+javax/net/ssl/TLS/TestJSSEServerProtocol.java #uos generic-mips64el
-+jdk/security/logging/TestTLSHandshakeLog.java #uos generic-mips64el
-+sun/net/InetAddress/nameservice/simple/DefaultCaching.java #uos generic-mips64el
-+sun/net/www/http/HttpClient/B8209178.java generic-all
-+sun/net/www/protocol/https/HttpsURLConnection/CookieHttpsClientTest.java #uos generic-mips64el
-+sun/net/www/protocol/https/HttpsURLConnection/PostThruProxy.java #uos generic-mips64el
-+sun/net/www/protocol/https/HttpsURLConnection/ReadTimeout.java generic-mips64el
-+sun/security/ec/TestEC.java #uos generic-mips64el
-+sun/security/krb5/auto/BogusKDC.java #uos generic-mips64el
-+sun/security/krb5/auto/NullRenewUntil.java generic-mips64el
-+sun/security/krb5/auto/rcache_usemd5.sh #uos generic-mips64el
-+sun/security/krb5/auto/RefreshKrb5Config.java #uos generic-mips64el
-+sun/security/krb5/auto/ReplayCacheTestProc.java #uos generic-mips64el
-+sun/security/krb5/auto/Unreachable.java generic-mips64el
-+sun/security/ssl/SSLSocketImpl/SSLSocketCloseHang.java generic-mips64el
-+sun/security/ssl/Stapling/StatusResponseManager.java generic-mips64el
-+sun/security/tools/jarsigner/certpolicy.sh #uos generic-mips64el
-+sun/security/tools/jarsigner/checkusage.sh #uos generic-mips64el
-+sun/security/tools/jarsigner/concise_jarsigner.sh generic-mips64el
-+sun/security/tools/jarsigner/ec.sh #uos generic-mips64el
-+sun/security/tools/keytool/selfissued.sh #uos generic-mips64el
-+sun/security/tools/keytool/StorePasswordsByShell.sh #uos generic-mips64el
-+sun/security/validator/certreplace.sh #uos generic-mips64el
-+sun/security/validator/samedn.sh #uos generic-mips64el
-+tools/jar/compat/CLICompatibility.java generic-mips64el
-+tools/jar/modularJar/Basic.java generic-mips64el
-+tools/jar/multiRelease/Basic.java generic-mips64el
-diff --git a/test/jdk/ProblemList.txt b/test/jdk/ProblemList.txt
-index 3923811fb4..7fa36b8f44 100644
---- a/test/jdk/ProblemList.txt
-+++ b/test/jdk/ProblemList.txt
-@@ -21,6 +21,12 @@
- # or visit www.oracle.com if you need additional information or have any
- # questions.
- #
-+
-+#
-+# This file has been modified by Loongson Technology in 2022. These
-+# modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
-+# available on the same license terms set forth above.
-+#
- ###########################################################################
- #
- # List of tests that should not be run by test/Makefile, for various reasons:
-@@ -624,7 +630,7 @@ sun/security/pkcs11/sslecc/ClientJSSEServerJSSE.java            8161536 generic-
- 
- sun/security/tools/keytool/ListKeychainStore.sh                 8156889 macosx-all
- 
--sun/security/tools/jarsigner/compatibility/SignTwice.java       8217375 windows-all
-+sun/security/tools/jarsigner/compatibility/SignTwice.java       8217375,#24291 windows-all,generic-mips64el,generic-loongarch64
- sun/security/tools/jarsigner/warnings/BadKeyUsageTest.java      8026393 generic-all
- 
- javax/net/ssl/ServerName/SSLEngineExplorerMatchedSNI.java       8212096 generic-all
-@@ -895,3 +901,40 @@ jdk/jfr/event/oldobject/TestLargeRootSet.java                   8205651    gener
-  
- ############################################################################
- 
-+# loongson added
-+java/awt/font/GlyphVector/NLGlyphTest.java #21476 generic-all
-+java/lang/System/LoggerFinder/internal/BootstrapLogger/BootstrapLoggerTest.java generic-all
-+java/rmi/server/UnicastRemoteObject/exportObject/GcDuringExport.java #10949 generic-mips64el,generic-loongarch64
-+java/util/logging/LocalizedLevelName.java generic-all
-+java/util/logging/SimpleFormatterFormat.java generic-all
-+jdk/jfr/api/consumer/TestRecordedFrame.java #10010 generic-mips64el,generic-loongarch64
-+jdk/jfr/jcmd/TestJcmdStartWithSettings.java #24259 generic-mips64el,generic-loongarch64
-+jdk/jfr/jvm/TestJFRIntrinsic.java #10011,JDK-8239423 generic-mips64el,generic-loongarch64
-+security/infra/java/security/cert/CertPathValidator/certification/LetsEncryptCA.java #24472 generic-loongarch64
-+sun/tools/jhsdb/BasicLauncherTest.java #9381 generic-mips64el
-+sun/tools/jhsdb/HeapDumpTest.java #9381 generic-mips64el
-+sun/tools/jhsdb/JShellHeapDumpTest.java #23705 generic-mips64el
-+sun/util/logging/SourceClassName.java generic-all
-+tools/pack200/DeprecatePack200.java generic-all
-+tools/jpackage/linux/AppAboutUrlTest.java#id0 #24942 generic-loongarch64
-+tools/jpackage/linux/AppCategoryTest.java #24942 generic-loongarch64
-+tools/jpackage/linux/jdk/jpackage/tests/UsrTreeTest.java #24942 generic-loongarch64
-+tools/jpackage/linux/LicenseTypeTest.java #24942 generic-loongarch64
-+tools/jpackage/linux/LinuxBundleNameTest.java #24942 generic-loongarch64
-+tools/jpackage/linux/LinuxResourceTest.java #24942 generic-loongarch64
-+tools/jpackage/linux/PackageDepsTest.java #24942 generic-loongarch64
-+tools/jpackage/linux/ReleaseTest.java#id0 #24942 generic-loongarch64
-+tools/jpackage/linux/ShortcutHintTest.java#id0 #24942 generic-loongarch64
-+tools/jpackage/share/AddLauncherTest.java#id1 #24942 generic-loongarch64
-+tools/jpackage/share/AddLShortcutTest.java #24942 generic-loongarch64
-+tools/jpackage/share/AppContentTest.java #24942 generic-loongarch64
-+tools/jpackage/share/AppImagePackageTest.java #24942 generic-loongarch64
-+tools/jpackage/share/EmptyFolderPackageTest.java #24942 generic-loongarch64
-+tools/jpackage/share/FileAssociationsTest.java#id0 #24942 generic-loongarch64
-+tools/jpackage/share/IconTest.java #24942 generic-loongarch64
-+tools/jpackage/share/InstallDirTest.java#id0 #24942 generic-loongarch64
-+tools/jpackage/share/jdk/jpackage/tests/VendorTest.java#id1 #24942 generic-loongarch64
-+tools/jpackage/share/MultiLauncherTwoPhaseTest.java #24942 generic-loongarch64
-+tools/jpackage/share/MultiNameTwoPhaseTest.java #24942 generic-loongarch64
-+tools/jpackage/share/RuntimePackageTest.java#id0 #24942 generic-loongarch64
-+tools/jpackage/share/SimplePackageTest.java #24942 generic-loongarch64
 diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
 index 7990c49a1f..025048c6b0 100644
 --- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
@@ -66300,71 +116402,8 @@ index 7ae0b68401..19689722d2 100644
          osMap.put("Linux-s390x-64", new String[] { "/usr/lib64/" });
          osMap.put("Windows-x86-32", new String[] {});
          osMap.put("Windows-amd64-64", new String[] {});
-diff --git a/test/langtools/ProblemList-Xcomp.txt b/test/langtools/ProblemList-Xcomp.txt
-new file mode 100644
-index 0000000000..60c3cd14ac
---- /dev/null
-+++ b/test/langtools/ProblemList-Xcomp.txt
-@@ -0,0 +1,35 @@
-+#
-+# Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
-+# Copyright (c) 2021, Loongson Technology. All rights reserved.
-+# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+#
-+# This code is free software; you can redistribute it and/or modify it
-+# under the terms of the GNU General Public License version 2 only, as
-+# published by the Free Software Foundation.
-+#
-+# This code is distributed in the hope that it will be useful, but WITHOUT
-+# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+# version 2 for more details (a copy is included in the LICENSE file that
-+# accompanied this code).
-+#
-+# You should have received a copy of the GNU General Public License version
-+# 2 along with this work; if not, write to the Free Software Foundation,
-+# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+#
-+# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+# or visit www.oracle.com if you need additional information or have any
-+# questions.
-+#
-+
-+#############################################################################
-+# loongson added
-+jdk/jshell/FailOverExecutionControlTest.java generic-mips64el
-+jdk/jshell/JdiBadOptionLaunchExecutionControlTest.java generic-mips64el
-+jdk/jshell/JdiFailingLaunchExecutionControlTest.java generic-mips64el
-+jdk/jshell/JdiFailingListenExecutionControlTest.java generic-mips64el
-+jdk/jshell/JdiHangingLaunchExecutionControlTest.java generic-mips64el
-+tools/javac/completionDeps/DepsAndAnno.java #error generic-mips64el
-+tools/javac/Paths/Class-Path.sh #error generic-mips64el
-+tools/javac/Paths/Diagnostics.sh #error generic-mips64el
-+tools/javac/Paths/wcMineField.sh #error generic-mips64el
-diff --git a/test/langtools/ProblemList.txt b/test/langtools/ProblemList.txt
-index 847d4f6939..8e085d9ae7 100644
---- a/test/langtools/ProblemList.txt
-+++ b/test/langtools/ProblemList.txt
-@@ -21,6 +21,12 @@
- # or visit www.oracle.com if you need additional information or have any
- # questions.
- #
-+
-+#
-+# This file has been modified by Loongson Technology in 2021. These
-+# modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
-+# available on the same license terms set forth above.
-+#
- ###########################################################################
- 
- ###########################################################################
-@@ -78,3 +84,4 @@ tools/sjavac/ClasspathDependencies.java						8158002	   generic-all	  Requires i
- #
- # jdeps 
- 
-+# loongson added
 diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
-index e526252fe2..fc8114c965 100644
+index 739c351806..0ade89fff1 100644
 --- a/test/lib/jdk/test/lib/Platform.java
 +++ b/test/lib/jdk/test/lib/Platform.java
 @@ -21,6 +21,12 @@
@@ -66372,15 +116411,15 @@ index e526252fe2..fc8114c965 100644
   */
  
 +/*
-+ * This file has been modified by Loongson Technology in 2021, These
-+ * modifications are Copyright (c) 2019, 2021, Loongson Technology, and are made
++ * This file has been modified by Loongson Technology in 2022, These
++ * modifications are Copyright (c) 2019, 2022, Loongson Technology, and are made
 + * available on the same license terms set forth above.
 + */
 +
  package jdk.test.lib;
-
+ 
  import java.io.FileNotFoundException;
-@@ -247,10 +253,18 @@ public class Platform {
+@@ -225,6 +231,14 @@ public class Platform {
          return isArch("(i386)|(x86(?!_64))");
      }
  
@@ -66388,16 +116427,12 @@ index e526252fe2..fc8114c965 100644
 +        return isArch("loongarch64");
 +    }
 +
-     public static String getOsArch() {
-         return osArch;
-     }
- 
 +    public static boolean isMIPS() {
 +        return isArch("mips.*");
 +    }
 +
-     public static boolean isRoot() {
-         return userName.equals("root");
+     public static String getOsArch() {
+         return osArch;
      }
 diff --git a/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java b/test/micro/org/openjdk/bench/java/lang/RotateBenchmark.java
 new file mode 100644
@@ -66492,79 +116527,6 @@ index 0000000000..81fd956a4e
 +  }
 +
 +}
-diff --git a/test/micro/org/openjdk/bench/loongarch/C2Memory.java b/test/micro/org/openjdk/bench/loongarch/C2Memory.java
-new file mode 100644
-index 0000000000..65cf1773d0
---- /dev/null
-+++ b/test/micro/org/openjdk/bench/loongarch/C2Memory.java
-@@ -0,0 +1,67 @@
-+/*
-+ * Copyright (c) 2021, Loongson Technology. All rights reserved.
-+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
-+ *
-+ * This code is free software; you can redistribute it and/or modify it
-+ * under the terms of the GNU General Public License version 2 only, as
-+ * published by the Free Software Foundation.
-+ *
-+ * This code is distributed in the hope that it will be useful, but WITHOUT
-+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-+ * version 2 for more details (a copy is included in the LICENSE file that
-+ * accompanied this code).
-+ *
-+ * You should have received a copy of the GNU General Public License version
-+ * 2 along with this work; if not, write to the Free Software Foundation,
-+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
-+ *
-+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
-+ * or visit www.oracle.com if you need additional information or have any
-+ * questions.
-+ */
-+
-+package org.openjdk.bench.loongarch;
-+
-+import org.openjdk.jmh.annotations.Benchmark;
-+
-+public class C2Memory {
-+    public static int sum;
-+    public static int array1[] = new int[0x8000];
-+    public static int array2[] = new int[0x8000];
-+
-+    @Benchmark
-+    public void testMethod() {
-+       for (int i = 0; i<10000;i++) {
-+         sum = array1[0x7fff] + array2[0x1f0];
-+         array1[0x7fff] += array2[0x1f0];
-+       }
-+    }
-+
-+    @Benchmark
-+    public void testBasePosIndexOffset() {
-+        int xstart = 30000;
-+        long carry = 63;
-+
-+        for (int j=xstart; j >= 0; j--) {
-+            array2[j] = array1[xstart];
-+        }
-+
-+        array2[xstart] = (int)carry;
-+    }
-+
-+    public static byte b_array1[] = new byte[0x8000];
-+    public static byte b_array2[] = new byte[0x8000];
-+
-+    @Benchmark
-+    public void testBaseIndexOffset() {
-+        int xstart = 10000;
-+        byte carry = 63;
-+
-+        for (int j=xstart; j >= 0; j--) {
-+            b_array2[j] = b_array1[xstart];
-+        }
-+
-+        b_array2[xstart] = carry;
-+    }
-+}
 diff --git a/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java b/test/micro/org/openjdk/bench/vm/compiler/MacroLogicOpt.java
 new file mode 100644
 index 0000000000..58400cadf6
-- 
Gitee