diff --git a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
index 6272c2651de1a776ff0779ee4d0edac0236c8ecf..12408a808d467951911de08b75b4691227f4aa1f 100755
--- a/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
+++ b/G1-iterate-region-by-bitmap-rather-than-obj-size-in.patch
@@ -367,4 +367,4 @@ index 000000000..85b49171c
 --- /dev/null
 +++ b/version.txt
 @@ -0,0 +1 @@
-+11.0.26.0.13
++11.0.27.0.13
diff --git a/add-jbolt-feature.patch b/add-jbolt-feature.patch
new file mode 100644
index 0000000000000000000000000000000000000000..964118798ab06444d6bfe2e1e4bc4db720f12d3e
--- /dev/null
+++ b/add-jbolt-feature.patch
@@ -0,0 +1,6234 @@
+diff --git a/make/hotspot/lib/JvmFlags.gmk b/make/hotspot/lib/JvmFlags.gmk
+index 1a91eb007..109f40f15 100644
+--- a/make/hotspot/lib/JvmFlags.gmk
++++ b/make/hotspot/lib/JvmFlags.gmk
+@@ -41,6 +41,12 @@ JVM_SRC_DIRS += $(call uniq, $(wildcard $(foreach d, $(JVM_SRC_ROOTS), \
+     $(JVM_VARIANT_OUTPUTDIR)/gensrc
+     #
+ 
++JVM_ACC_PLUGIN_DIR := $(TOPDIR)/src/java.base/share/native/libjplugin
++JVM_ACC_PLUGIN_SRC := $(JVM_ACC_PLUGIN_DIR)/feature
++ifeq ($(wildcard $(JVM_ACC_PLUGIN_SRC)), $(JVM_ACC_PLUGIN_SRC))
++  JVM_SRC_DIRS += $(JVM_ACC_PLUGIN_SRC)
++endif
++ 
+ JVM_CFLAGS_INCLUDES += \
+     $(patsubst %,-I%,$(JVM_SRC_DIRS)) \
+     -I$(TOPDIR)/src/hotspot/share/precompiled \
+diff --git a/make/hotspot/lib/JvmMapfile.gmk b/make/hotspot/lib/JvmMapfile.gmk
+index ba44e5798..92e76b707 100644
+--- a/make/hotspot/lib/JvmMapfile.gmk
++++ b/make/hotspot/lib/JvmMapfile.gmk
+@@ -60,6 +60,12 @@ ifeq ($(call isTargetOs, solaris), true)
+   endif
+ endif
+ 
++JVM_ACC_PLUGIN_DIR := $(TOPDIR)/src/java.base/share/native/libjplugin
++JVM_ACC_PLUGIN_SYMBOLS_SRC := $(JVM_ACC_PLUGIN_DIR)/make/hotspot-symbols
++ifeq ($(wildcard $(JVM_ACC_PLUGIN_SYMBOLS_SRC)), $(JVM_ACC_PLUGIN_SYMBOLS_SRC))
++  SYMBOLS_SRC += $(JVM_ACC_PLUGIN_SYMBOLS_SRC)/symbols-plugin
++endif
++ 
+ ################################################################################
+ # Create a dynamic list of symbols from the built object files. This is highly
+ # platform dependent.
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 2306a8d60..0acc6a57f 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -5665,6 +5665,46 @@ void os::Linux::numa_init() {
+   }
+ }
+ 
++#if INCLUDE_JBOLT
++os::Linux::jboltHeap_init_t os::Linux::_jboltHeap_init;
++os::Linux::jboltLog_precalc_t os::Linux::_jboltLog_precalc;
++os::Linux::jboltLog_do_t os::Linux::_jboltLog_do;
++os::Linux::jboltMerge_judge_t os::Linux::_jboltMerge_judge;
++#endif // INCLUDE_JBOLT
++ 
++void os::Linux::load_plugin_library() {
++#if INCLUDE_JBOLT
++    _jboltHeap_init = CAST_TO_FN_PTR(jboltHeap_init_t, dlsym(RTLD_DEFAULT, "JBoltHeap_Init"));
++    _jboltLog_precalc = CAST_TO_FN_PTR(jboltLog_precalc_t, dlsym(RTLD_DEFAULT, "JBoltLog_PreCalc"));
++    _jboltLog_do = CAST_TO_FN_PTR(jboltLog_do_t, dlsym(RTLD_DEFAULT, "JBoltLog_DO"));
++    _jboltMerge_judge = CAST_TO_FN_PTR(jboltMerge_judge_t, dlsym(RTLD_DEFAULT, "JBoltMerge_Judge"));
++#endif // INCLUDE_JBOLT
++  
++  char path[JVM_MAXPATHLEN];
++  char ebuf[1024];
++  void* handle = NULL;
++  if (os::dll_locate_lib(path, sizeof(path), Arguments::get_dll_dir(), "jvm11_Acc") ||
++          os::dll_locate_lib(path, sizeof(path), "/usr/lib64", "jvm11_Acc")) {
++    handle = dlopen(path, RTLD_LAZY);
++  }
++  if (handle != NULL) {
++#if INCLUDE_JBOLT
++    if (_jboltHeap_init == NULL) {
++      _jboltHeap_init = CAST_TO_FN_PTR(jboltHeap_init_t, dlsym(handle, "JBoltHeap_Init"));
++    }
++    if (_jboltLog_precalc == NULL) {
++      _jboltLog_precalc = CAST_TO_FN_PTR(jboltLog_precalc_t, dlsym(handle, "JBoltLog_PreCalc"));
++    }
++    if (_jboltLog_do == NULL) {
++      _jboltLog_do = CAST_TO_FN_PTR(jboltLog_do_t, dlsym(handle, "JBoltLog_DO"));
++    }
++    if (_jboltMerge_judge == NULL) {
++      _jboltMerge_judge = CAST_TO_FN_PTR(jboltMerge_judge_t, dlsym(handle, "JBoltMerge_Judge"));   
++    }
++#endif // INCLUDE_JBOLT   
++  }
++}
++ 
+ // this is called _after_ the global arguments have been parsed
+ jint os::init_2(void) {
+ 
+@@ -5712,6 +5752,8 @@ jint os::init_2(void) {
+   init_adjust_stacksize_for_guard_pages();
+ #endif
+ 
++  Linux::load_plugin_library();
++
+   if (UseNUMA) {
+     Linux::numa_init();
+   }
+diff --git a/src/hotspot/os/linux/os_linux.hpp b/src/hotspot/os/linux/os_linux.hpp
+index 2965fd606..fac136dfe 100644
+--- a/src/hotspot/os/linux/os_linux.hpp
++++ b/src/hotspot/os/linux/os_linux.hpp
+@@ -188,6 +188,7 @@ class Linux {
+   static const char *libc_version()           { return _libc_version; }
+   static const char *libpthread_version()     { return _libpthread_version; }
+ 
++  static void load_plugin_library();
+   static void libpthread_init();
+   static void sched_getcpu_init();
+   static bool libnuma_init();
+@@ -271,7 +272,16 @@ class Linux {
+   typedef void (*numa_set_bind_policy_func_t)(int policy);
+   typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n);
+   typedef int (*numa_distance_func_t)(int node1, int node2);
+-
++#if INCLUDE_JBOLT
++  typedef void (*jboltHeap_init_t)(uintptr_t related_data[], address rs, address non_nmethod_space, address profiled_space, address non_profiled_space, address jbolt_hot_space, address jbolt_tmp_space);
++  typedef void (*jboltLog_precalc_t)(unsigned int topFrameIndex, unsigned int &max_frames, unsigned int framesCount);
++  typedef bool (*jboltLog_do_t)(uintptr_t related_data[], address stacktrace, unsigned int i, int comp_level, address new_func, address *tempfunc);
++  typedef int (*jboltMerge_judge_t)(uintptr_t related_data[], int candidate, address clusters, address merged, address cluster);
++  static jboltHeap_init_t _jboltHeap_init;
++  static jboltLog_precalc_t _jboltLog_precalc;
++  static jboltLog_do_t _jboltLog_do;
++  static jboltMerge_judge_t _jboltMerge_judge;
++#endif
+   static sched_getcpu_func_t _sched_getcpu;
+   static numa_node_to_cpus_func_t _numa_node_to_cpus;
+   static numa_node_to_cpus_v2_func_t _numa_node_to_cpus_v2;
+@@ -466,6 +476,33 @@ class Linux {
+       return false;
+     }
+   }
++
++#if INCLUDE_JBOLT
++  static bool jboltHeap_init(uintptr_t related_data[], address rs, address non_nmethod_space, address profiled_space, address non_profiled_space, address jbolt_hot_space, address jbolt_tmp_space) {
++    if (_jboltHeap_init != NULL) {
++      _jboltHeap_init(related_data, rs, non_nmethod_space, profiled_space, non_profiled_space, jbolt_hot_space, jbolt_tmp_space);
++      return true;
++    }
++    return false;
++  }
++  static void jboltLog_precalc(unsigned int topFrameIndex, unsigned int &max_frames, unsigned int framesCount) {
++    if (_jboltLog_precalc != NULL) {
++      _jboltLog_precalc(topFrameIndex, max_frames, framesCount);
++    }
++  }
++  static bool jboltLog_do(uintptr_t related_data[], address stacktrace, unsigned int i, int comp_level, address new_func, address *tempfunc) {
++    if (_jboltLog_do != NULL) {
++      return _jboltLog_do(related_data, stacktrace, i, comp_level, new_func, tempfunc);
++    }
++    return false;
++  }
++  static int jboltMerge_judge(uintptr_t related_data[], int candidate, address clusters, address merged, address cluster) {
++    if (_jboltMerge_judge != NULL) {
++      return _jboltMerge_judge(related_data, candidate, clusters, merged, cluster);
++    }
++    return -1;
++  }
++#endif // INCLUDE_JBOLT
+ };
+ 
+ #endif // OS_LINUX_VM_OS_LINUX_HPP
+diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
+index e7e3dc187..f66926600 100644
+--- a/src/hotspot/share/ci/ciEnv.cpp
++++ b/src/hotspot/share/ci/ciEnv.cpp
+@@ -69,6 +69,9 @@
+ #ifdef COMPILER2
+ #include "opto/runtime.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif
+ 
+ // ciEnv
+ //
+@@ -1033,15 +1036,33 @@ void ciEnv::register_method(ciMethod* target,
+     assert(offsets->value(CodeOffsets::Deopt) != -1, "must have deopt entry");
+     assert(offsets->value(CodeOffsets::Exceptions) != -1, "must have exception entry");
+ 
+-    nm =  nmethod::new_nmethod(method,
+-                               compile_id(),
+-                               entry_bci,
+-                               offsets,
+-                               orig_pc_offset,
+-                               debug_info(), dependencies(), code_buffer,
+-                               frame_words, oop_map_set,
+-                               handler_table, inc_table,
+-                               compiler, task()->comp_level());
++#if INCLUDE_JBOLT
++    if (UseJBolt && JBoltManager::reorder_phase_is_collecting_or_reordering()) {
++      int code_blob_type = JBoltManager::calc_code_blob_type(method(), task(), THREAD);
++      nm =  nmethod::new_nmethod(method,
++                                compile_id(),
++                                entry_bci,
++                                offsets,
++                                orig_pc_offset,
++                                debug_info(), dependencies(), code_buffer,
++                                frame_words, oop_map_set,
++                                handler_table, inc_table,
++                                compiler, task()->comp_level(),
++                                NULL, NULL,
++                                code_blob_type);
++    } else
++#endif // INCLUDE_JBOLT
++    {
++      nm =  nmethod::new_nmethod(method,
++                                compile_id(),
++                                entry_bci,
++                                offsets,
++                                orig_pc_offset,
++                                debug_info(), dependencies(), code_buffer,
++                                frame_words, oop_map_set,
++                                handler_table, inc_table,
++                                compiler, task()->comp_level());
++    }
+ 
+     // Free codeBlobs
+     code_buffer->free_blob();
+diff --git a/src/hotspot/share/code/codeBlob.hpp b/src/hotspot/share/code/codeBlob.hpp
+index 82b01d096..a14abe4e4 100644
+--- a/src/hotspot/share/code/codeBlob.hpp
++++ b/src/hotspot/share/code/codeBlob.hpp
+@@ -39,10 +39,12 @@ struct CodeBlobType {
+   enum {
+     MethodNonProfiled   = 0,    // Execution level 1 and 4 (non-profiled) nmethods (including native nmethods)
+     MethodProfiled      = 1,    // Execution level 2 and 3 (profiled) nmethods
+-    NonNMethod          = 2,    // Non-nmethods like Buffers, Adapters and Runtime Stubs
+-    All                 = 3,    // All types (No code cache segmentation)
+-    AOT                 = 4,    // AOT methods
+-    NumTypes            = 5     // Number of CodeBlobTypes
++    MethodJBoltHot      = 2,    // Hot methods (determined by JBolt) of level 1 and 4 nmethods
++    MethodJBoltTmp      = 3,    // Temporary storage of JBolt hot methods
++    NonNMethod          = 4,    // Non-nmethods like Buffers, Adapters and Runtime Stubs
++    All                 = 5,    // All types (No code cache segmentation)
++    AOT                 = 6,    // AOT methods
++    NumTypes            = 7     // Number of CodeBlobTypes
+   };
+ };
+ 
+diff --git a/src/hotspot/share/code/codeCache.cpp b/src/hotspot/share/code/codeCache.cpp
+index f95fbcce2..cd3e376d4 100644
+--- a/src/hotspot/share/code/codeCache.cpp
++++ b/src/hotspot/share/code/codeCache.cpp
+@@ -66,6 +66,9 @@
+ #include "opto/compile.hpp"
+ #include "opto/node.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif // INCLUDE_JBOLT
+ 
+ // Helper class for printing in CodeCache
+ class CodeBlob_sizes {
+@@ -292,6 +295,16 @@ void CodeCache::initialize_heaps() {
+   non_nmethod_size = align_up(non_nmethod_size, alignment);
+   profiled_size    = align_down(profiled_size, alignment);
+ 
++#if INCLUDE_JBOLT
++  if (UseJBolt && !JBoltDumpMode) {
++    // We replace the original add-heap logic with the JBolt one. manual dump mode doesn't need that
++    JBoltManager::init_code_heaps(non_nmethod_size, profiled_size, non_profiled_size, cache_size, alignment);
++    return;
++  }
++  // The following add-heap logic will not be executed if JBolt load mode is on.
++  // If the following logic is modified, remember to modify the JBolt logic accordingly.
++#endif // INCLUDE_JBOLT
++
+   // Reserve one continuous chunk of memory for CodeHeaps and split it into
+   // parts for the individual heaps. The memory layout looks like this:
+   // ---------- high -----------
+@@ -345,6 +358,12 @@ ReservedCodeSpace CodeCache::reserve_heap_memory(size_t size) {
+ 
+ // Heaps available for allocation
+ bool CodeCache::heap_available(int code_blob_type) {
++  if (code_blob_type == CodeBlobType::MethodJBoltHot) {
++    return JBOLT_ONLY(UseJBolt && !JBoltDumpMode) NOT_JBOLT(false);
++  } else if (code_blob_type == CodeBlobType::MethodJBoltTmp) {
++    return JBOLT_ONLY(UseJBolt && !JBoltDumpMode) NOT_JBOLT(false);
++  }
++
+   if (!SegmentedCodeCache) {
+     // No segmentation: use a single code heap
+     return (code_blob_type == CodeBlobType::All);
+@@ -372,6 +391,12 @@ const char* CodeCache::get_code_heap_flag_name(int code_blob_type) {
+   case CodeBlobType::MethodProfiled:
+     return "ProfiledCodeHeapSize";
+     break;
++  case CodeBlobType::MethodJBoltHot:
++    return "JBoltHotCodeHeapSize";
++    break;
++  case CodeBlobType::MethodJBoltTmp:
++    return "JBoltTmpCodeHeapSize";
++    break;
+   }
+   ShouldNotReachHere();
+   return NULL;
+@@ -522,6 +547,17 @@ CodeBlob* CodeCache::allocate(int size, int code_blob_type, int orig_code_blob_t
+             type = CodeBlobType::MethodNonProfiled;
+           }
+           break;
++#if INCLUDE_JBOLT
++        case CodeBlobType::MethodJBoltHot:
++        case CodeBlobType::MethodJBoltTmp:
++          if (JBoltLoadMode) {
++            type = CodeBlobType::MethodNonProfiled;
++            break;
++          }
++          // [jbolt]: JBoltCodeCache is too full to contain all ordered methods, but the hotter ones should have been recompiled.
++          JBoltManager::handle_full_jbolt_code_cache();
++          return NULL;
++#endif // INCLUDE_JBOLT
+         }
+         if (type != code_blob_type && type != orig_code_blob_type && heap_available(type)) {
+           if (PrintCodeCacheExtension) {
+diff --git a/src/hotspot/share/code/codeCache.hpp b/src/hotspot/share/code/codeCache.hpp
+index 3ca988c92..37edfa6e0 100644
+--- a/src/hotspot/share/code/codeCache.hpp
++++ b/src/hotspot/share/code/codeCache.hpp
+@@ -47,6 +47,10 @@
+ //    executed at level 2 or 3
+ //  - Non-Profiled nmethods: nmethods that are not profiled, i.e., those
+ //    executed at level 1 or 4 and native methods
++//  - JBolt nmethods: sorted non-profiled nmethods that are judged to be hot
++//    by JBolt
++//  - JBolt tmp nmethods: non-profiled nmethods that are judged to be hot by
++//    JBolt but not sorted yet
+ //  - All: Used for code of all types if code cache segmentation is disabled.
+ //
+ // In the rare case of the non-nmethod code heap getting full, non-nmethod code
+@@ -84,6 +88,9 @@ class CodeCache : AllStatic {
+ #if INCLUDE_SHENANDOAHGC
+   friend class ShenandoahParallelCodeHeapIterator;
+ #endif
++#if INCLUDE_JBOLT
++  friend class JBoltManager;
++#endif // INCLUDE_JBOLT
+  private:
+   // CodeHeaps of the cache
+   static GrowableArray<CodeHeap*>* _heaps;
+@@ -242,13 +249,17 @@ class CodeCache : AllStatic {
+   }
+ 
+   static bool code_blob_type_accepts_compiled(int type) {
+-    bool result = type == CodeBlobType::All || type <= CodeBlobType::MethodProfiled;
++    // Modified `type <= CodeBlobType::MethodProfiled` to `type < CodeBlobType::NonNMethod`
++    // after adding the JBolt heap. The two logics are still equivalent even without JBolt.
++    bool result = type == CodeBlobType::All || type < CodeBlobType::NonNMethod;
+     AOT_ONLY( result = result || type == CodeBlobType::AOT; )
+     return result;
+   }
+ 
+   static bool code_blob_type_accepts_nmethod(int type) {
+-    return type == CodeBlobType::All || type <= CodeBlobType::MethodProfiled;
++    // Modified `type <= CodeBlobType::MethodProfiled` to `type < CodeBlobType::NonNMethod`
++    // after adding the JBolt heap. The two logics are still equivalent even without JBolt.
++    return type == CodeBlobType::All || type < CodeBlobType::NonNMethod;
+   }
+ 
+   static bool code_blob_type_accepts_allocable(int type) {
+diff --git a/src/hotspot/share/code/nmethod.cpp b/src/hotspot/share/code/nmethod.cpp
+index 6bc63116b..ae02db085 100644
+--- a/src/hotspot/share/code/nmethod.cpp
++++ b/src/hotspot/share/code/nmethod.cpp
+@@ -67,6 +67,9 @@
+ #if INCLUDE_JVMCI
+ #include "jvmci/jvmciJavaClasses.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif
+ 
+ #ifdef DTRACE_ENABLED
+ 
+@@ -481,6 +484,9 @@ nmethod* nmethod::new_nmethod(const methodHandle& method,
+   , jweak installed_code,
+   jweak speculationLog
+ #endif
++#if INCLUDE_JBOLT
++  , int code_blob_type  // for jbolt
++#endif // INCLUDE_JBOLT
+ )
+ {
+   assert(debug_info->oop_recorder() == code_buffer->oop_recorder(), "shared OR");
+@@ -496,7 +502,11 @@ nmethod* nmethod::new_nmethod(const methodHandle& method,
+       + align_up(nul_chk_table->size_in_bytes()    , oopSize)
+       + align_up(debug_info->data_size()           , oopSize);
+ 
++#if INCLUDE_JBOLT
++    nm = new (nmethod_size, comp_level, code_blob_type)
++#else // INCLUDE_JBOLT
+     nm = new (nmethod_size, comp_level)
++#endif // INCLUDE_JBOLT
+     nmethod(method(), compiler->type(), nmethod_size, compile_id, entry_bci, offsets,
+             orig_pc_offset, debug_info, dependencies, code_buffer, frame_size,
+             oop_maps,
+@@ -641,6 +651,15 @@ void* nmethod::operator new(size_t size, int nmethod_size, int comp_level) throw
+   return CodeCache::allocate(nmethod_size, CodeCache::get_code_blob_type(comp_level));
+ }
+ 
++#if INCLUDE_JBOLT
++void* nmethod::operator new(size_t size, int nmethod_size, int comp_level, int code_blob_type) throw () {
++  if (code_blob_type < CodeBlobType::All) {
++    return CodeCache::allocate(nmethod_size, code_blob_type);
++  }
++  return CodeCache::allocate(nmethod_size, CodeCache::get_code_blob_type(comp_level));
++}
++#endif // INCLUDE_JBOLT
++
+ nmethod::nmethod(
+   Method* method,
+   CompilerType type,
+diff --git a/src/hotspot/share/code/nmethod.hpp b/src/hotspot/share/code/nmethod.hpp
+index b5018dcf8..6e33af573 100644
+--- a/src/hotspot/share/code/nmethod.hpp
++++ b/src/hotspot/share/code/nmethod.hpp
+@@ -214,6 +214,11 @@ class nmethod : public CompiledMethod {
+   // helper methods
+   void* operator new(size_t size, int nmethod_size, int comp_level) throw();
+ 
++#if INCLUDE_JBOLT
++  // For JBolt. So the code can be allocated in code segments defined by JBolt.
++  void* operator new(size_t size, int nmethod_size, int comp_level, int code_blob_type) throw ();
++#endif // INCLUDE_JBOLT
++
+   const char* reloc_string_for(u_char* begin, u_char* end);
+   // Returns true if this thread changed the state of the nmethod or
+   // false if another thread performed the transition.
+@@ -253,6 +258,9 @@ class nmethod : public CompiledMethod {
+                               , jweak installed_code = NULL,
+                               jweak speculation_log = NULL
+ #endif
++#if INCLUDE_JBOLT
++                              , int code_blob_type = CodeBlobType::All  // for jbolt
++#endif // INCLUDE_JBOLT
+   );
+ 
+   static nmethod* new_native_nmethod(const methodHandle& method,
+diff --git a/src/hotspot/share/compiler/compileBroker.cpp b/src/hotspot/share/compiler/compileBroker.cpp
+index c3fae3df2..5d806966d 100644
+--- a/src/hotspot/share/compiler/compileBroker.cpp
++++ b/src/hotspot/share/compiler/compileBroker.cpp
+@@ -75,6 +75,9 @@
+ #ifdef COMPILER2
+ #include "opto/c2compiler.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif // INCLUDE_JBOLT
+ 
+ #ifdef DTRACE_ENABLED
+ 
+@@ -1882,6 +1885,12 @@ void CompileBroker::compiler_thread_loop() {
+         }
+       }
+ 
++#if INCLUDE_JBOLT
++      if (UseJBolt && JBoltLoadMode) {
++        JBoltManager::check_start_reordering(thread);
++      }
++#endif // INCLUDE_JBOLT
++
+       if (UseDynamicNumberOfCompilerThreads) {
+         possibly_add_compiler_threads();
+       }
+diff --git a/src/hotspot/share/compiler/compileBroker.hpp b/src/hotspot/share/compiler/compileBroker.hpp
+index 53e496cd3..2c8ecb5ce 100644
+--- a/src/hotspot/share/compiler/compileBroker.hpp
++++ b/src/hotspot/share/compiler/compileBroker.hpp
+@@ -139,6 +139,9 @@ public:
+ class CompileBroker: AllStatic {
+  friend class Threads;
+  friend class CompileTaskWrapper;
++#if INCLUDE_JBOLT
++ friend class JBoltManager;
++#endif // INCLUDE_JBOLT
+ 
+  public:
+   enum {
+diff --git a/src/hotspot/share/compiler/compileTask.hpp b/src/hotspot/share/compiler/compileTask.hpp
+index 2029defdc..61f3af955 100644
+--- a/src/hotspot/share/compiler/compileTask.hpp
++++ b/src/hotspot/share/compiler/compileTask.hpp
+@@ -55,6 +55,9 @@ class CompileTask : public CHeapObj<mtCompiler> {
+       Reason_Whitebox,         // Whitebox API
+       Reason_MustBeCompiled,   // Java callHelper, LinkResolver
+       Reason_Bootstrap,        // JVMCI bootstrap
++#if INCLUDE_JBOLT
++      Reason_Reorder,          // JBolt reorder
++#endif
+       Reason_Count
+   };
+ 
+@@ -69,6 +72,9 @@ class CompileTask : public CHeapObj<mtCompiler> {
+       "whitebox",
+       "must_be_compiled",
+       "bootstrap"
++#if INCLUDE_JBOLT
++      , "reorder"
++#endif
+     };
+     return reason_names[compile_reason];
+   }
+@@ -225,6 +231,12 @@ public:
+     print_inlining_inner(tty, method, inline_level, bci, msg);
+   }
+   static void print_inlining_ul(ciMethod* method, int inline_level, int bci, const char* msg = NULL);
++
++#if INCLUDE_JBOLT
++  CompileReason compile_reason() { return _compile_reason; }
++  int hot_count() { return _hot_count; }
++  const char* failure_reason() { return _failure_reason; }
++#endif // INCLUDE_JBOLT
+ };
+ 
+ #endif // SHARE_VM_COMPILER_COMPILETASK_HPP
+diff --git a/src/hotspot/share/compiler/compilerDefinitions.hpp b/src/hotspot/share/compiler/compilerDefinitions.hpp
+index 12589e11c..cf2c2b3b7 100644
+--- a/src/hotspot/share/compiler/compilerDefinitions.hpp
++++ b/src/hotspot/share/compiler/compilerDefinitions.hpp
+@@ -26,6 +26,11 @@
+ #define SHARE_VM_COMPILER_COMPILERDEFINITIONS_HPP
+ 
+ #include "memory/allocation.hpp"
++#include "runtime/arguments.hpp"
++
++#if INCLUDE_JVMCI
++#include "jvmci/jvmci_globals.hpp"
++#endif
+ 
+ // The (closed set) of concrete compiler classes.
+ enum CompilerType {
+@@ -128,6 +133,27 @@ public:
+ 
+   static void ergo_initialize();
+ 
++  static bool has_c1()     { return COMPILER1_PRESENT(true) NOT_COMPILER1(false); }
++  static bool has_c2()     { return COMPILER2_PRESENT(true) NOT_COMPILER2(false); }
++  static bool has_jvmci()  { return JVMCI_ONLY(true) NOT_JVMCI(false);            }
++
++  static bool is_jvmci_compiler()    { return JVMCI_ONLY(has_jvmci() && UseJVMCICompiler) NOT_JVMCI(false);            }
++  static bool is_interpreter_only()  { return Arguments::is_interpreter_only() || TieredStopAtLevel == CompLevel_none; }
++
++  // Is the JVM in a configuration that permits only c1-compiled methods (level 1,2,3)?
++  static bool is_c1_only() {
++    if (!is_interpreter_only() && has_c1()) {
++      const bool c1_only = !has_c2() && !is_jvmci_compiler();
++      const bool tiered_degraded_to_c1_only = TieredCompilation && TieredStopAtLevel >= CompLevel_simple && TieredStopAtLevel < CompLevel_full_optimization;
++      return c1_only || tiered_degraded_to_c1_only;
++    }
++    return false;
++  }
++
++  static bool is_c2_enabled() {
++    return has_c2() && !is_interpreter_only() && !is_c1_only() && !is_jvmci_compiler();
++  }
++
+ private:
+   static void set_tiered_flags();
+ };
+diff --git a/src/hotspot/share/jbolt/jBoltCallGraph.cpp b/src/hotspot/share/jbolt/jBoltCallGraph.cpp
+new file mode 100644
+index 000000000..c2a3b51f5
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltCallGraph.cpp
+@@ -0,0 +1,482 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "precompiled.hpp"
++#include "jbolt/jBoltCallGraph.hpp"
++#include "jfr/utilities/jfrAllocation.hpp"
++#include "logging/log.hpp"
++#include "logging/logStream.hpp"
++#include "oops/method.inline.hpp"
++#include "utilities/defaultStream.hpp"
++
++#define PAGE_SIZE os::vm_page_size()
++
++static GrowableArray<JBoltCluster>* _clusters = NULL;
++static GrowableArray<JBoltCall>* _calls = NULL;
++static GrowableArray<JBoltFunc>* _funcs = NULL;
++
++// (JBolt hfsort optional)sort final clusters by density
++static const bool _jbolt_density_sort = false;
++// (JBolt hfsort optional)freeze merging while exceeding pagesize
++static const bool _jbolt_merge_frozen = false;
++
++void JBoltCallGraph::initialize() {
++  ::_clusters = JBoltCallGraph::callgraph_instance().callgraph_clusters();
++  ::_calls = JBoltCallGraph::callgraph_instance().callgraph_calls();
++  ::_funcs = JBoltCallGraph::callgraph_instance().callgraph_funcs();
++}
++
++void JBoltCallGraph::deinitialize() {
++  ::_clusters = NULL;
++  ::_calls = NULL;
++  ::_funcs = NULL;
++}
++
++int JBoltCallGraph::clear_instance() {
++  delete _clusters;
++  delete _calls;
++  delete _funcs;
++ 
++  // Reinit default cluster start id
++  _init_cluster_id = 0;
++ 
++  // Re-allocate
++  _clusters = create_growable_array<JBoltCluster>();
++  _calls = create_growable_array<JBoltCall>();
++  _funcs = create_growable_array<JBoltFunc>();
++ 
++  // Re-initialize
++  initialize();
++ 
++  return 0;
++}
++
++static GrowableArray<JBoltCluster>* clusters_copy() {
++  GrowableArray<JBoltCluster>* copy = create_growable_array<JBoltCluster>(_clusters->length());
++  copy->appendAll(_clusters);
++  return copy;
++}
++
++static GrowableArray<JBoltFunc>* funcs_copy() {
++  GrowableArray<JBoltFunc>* copy = create_growable_array<JBoltFunc>(_funcs->length());
++  copy->appendAll(_funcs);
++  return copy;
++}
++
++static int find_func_index(const JBoltFunc* func) {
++  for (int i = 0; i < _funcs->length(); ++i) {
++    JBoltFunc& existing = _funcs->at(i);
++    if (existing == (*func)) {
++      return i;
++    }
++  }
++  return -1;
++}
++
++// Searching for a cluster with corresponding func or creating a new one if doesn't exist
++static JBoltCluster* find_cluster(JBoltFunc* func) {
++  for (int i = 0; i < _clusters->length(); ++i) {
++    JBoltCluster& cluster = _clusters->at(i);
++    int index = cluster.func_indexes()->at(0);
++    if (_funcs->at(index) == (*func)) {
++      return &cluster;
++    }
++  }
++  _funcs->append(*func);
++  _clusters->append(JBoltCluster(*func));
++  JBoltCluster& cluster = _clusters->at(_clusters->length() - 1);
++  _funcs->at(_funcs->length() - 1).set_cluster_id(cluster.id());
++  return &cluster;
++}
++
++// Creating a new call in graph or updating the weight if exists
++static void add_call_to_calls(GrowableArray<JBoltCall>* calls, const JBoltCall* call) {
++  for (int i = 0; i < calls->length(); ++i) {
++    JBoltCall& existing_call = calls->at(i);
++    if (existing_call == *call) {
++      if (existing_call.stacktrace_id() == call->stacktrace_id()) {
++        assert(call->call_count() >= existing_call.call_count(), "invariant");
++        existing_call.callee().add_heat(call->call_count() - existing_call.call_count());
++        existing_call.set_call_count(call->call_count());
++      }
++      else {
++        existing_call.callee().add_heat(call->call_count());
++        existing_call.set_call_count(existing_call.call_count() + call->call_count());
++      }
++      return;
++    }
++  }
++
++  calls->append(*call);
++  call->callee().add_heat(call->call_count());
++  call->callee().append_call_index(calls->length() - 1);
++}
++
++// Getting final funcs order from an array of processed clusters
++static GrowableArray<JBoltFunc>* clusters_to_funcs_order(GrowableArray<JBoltCluster>* clusters) {
++  log_debug(jbolt)( "sorted clusters:\n");
++  for (int i = 0; i < clusters->length(); ++i) {
++    log_debug(jbolt)( "cluster id: %d heats: %ld size: %dB density: %f\n", clusters->at(i).id(), clusters->at(i).heats(), clusters->at(i).size(), clusters->at(i).density());
++    for (int j = 0; j < clusters->at(i).get_funcs_count(); ++j) {
++        JBoltFunc& func = _funcs->at(clusters->at(i).func_indexes()->at(j));
++        const Method* const method = func.method();
++        if (method != NULL) {
++          log_debug(jbolt)( "%d: method signature:%s heat: %ld size: %dB\n",
++            j, method->external_name(), func.heat(), func.size());
++        }
++    }
++  }
++
++  GrowableArray<JBoltFunc>* order = create_growable_array<JBoltFunc>(_funcs->length());
++  // used to seperator distinct cluster, klass = NULL
++  JBoltFunc seperator_func;
++  order->append(seperator_func);
++  for (int i = 0; i < clusters->length(); ++i) {
++    JBoltCluster& cluster = clusters->at(i);
++    GrowableArray<int>* func_indexes = cluster.func_indexes();
++
++    for (int j = 0; j < func_indexes->length(); ++j) {
++      int index = func_indexes->at(j);
++      order->append(_funcs->at(index));
++    }
++
++    order->append(seperator_func);
++  }
++  return order;
++}
++
++template <typename T>
++static int fast_compare(T val1, T val2) {
++  return (val1 < val2) ? 1 : ((val1 == val2) ? 0 : -1); 
++}
++
++// Comparing function needed to sort an array of funcs by their weights (in decreasing order)
++static int func_comparator(JBoltFunc* func1, JBoltFunc* func2) { 
++  return _jbolt_density_sort ? fast_compare(func1->heat() * func2->size(), func2->heat() * func1->size()) : fast_compare(func1->heat(), func2->heat()); 
++}
++
++// Comparing cluster needed to sort an array of clusters by their densities (in decreasing order)
++static int cluster_comparator(JBoltCluster* cluster1, JBoltCluster* cluster2) {
++  return _jbolt_density_sort ? fast_compare(cluster1->density(), cluster2->density()) : fast_compare(cluster1->heats(), cluster2->heats()); 
++}
++
++// Comparing call indexes needed to sort an array of call indexes by their call counts (in decreasing order)
++static int func_call_indexes_comparator(int* index1, int* index2) {
++  return fast_compare(_calls->at(*index1).call_count(), _calls->at(*index2).call_count());
++}
++
++JBoltCallGraph& JBoltCallGraph::callgraph_instance() {
++  static JBoltCallGraph _call_graph;
++  return _call_graph;
++}
++
++void JBoltCallGraph::add_func(JBoltFunc* func) {
++  if (!(UseJBolt && JBoltManager::reorder_phase_is_profiling_or_waiting())) return;
++  JBoltCluster* cluster = find_cluster(func);
++  assert(cluster != NULL, "invariant");
++}
++
++void JBoltCallGraph::add_call(JBoltCall* call) {
++  if (!(UseJBolt && JBoltManager::reorder_phase_is_profiling_or_waiting())) return;
++  // Self-recursion is not helpful for the call, skip it directly
++  if (call->caller() == call->callee()) return;
++  add_call_to_calls(_calls, call);
++}
++
++uintptr_t related_data_jbolt_merge_judge[] = {
++  (uintptr_t)in_bytes(JBoltCluster::id_offset()),
++  (uintptr_t)in_bytes(JBoltCluster::heats_offset()),
++  (uintptr_t)in_bytes(JBoltCluster::frozen_offset()),
++  (uintptr_t)in_bytes(JBoltCluster::size_offset()),
++  (uintptr_t)in_bytes(JBoltCluster::density_offset()),
++  (uintptr_t)in_bytes(JBoltCluster::func_indexes_offset()),
++ 
++  (uintptr_t)in_bytes(GrowableArray<address>::data_offset()),
++ 
++  (uintptr_t)JBoltCluster::find_cluster_by_id,
++  (uintptr_t)_jbolt_merge_frozen
++};
++
++static void deal_with_each_func(GrowableArray<JBoltCluster>* clusters, GrowableArray<JBoltFunc>* funcs, GrowableArray<int>* merged) {
++  for (int i = 0; i < funcs->length(); ++i) {
++    JBoltFunc& func = funcs->at(i);
++
++    JBoltCluster* cluster = JBoltCluster::find_cluster_by_id(clusters, func.cluster_id());
++
++    // for cluster size larger than page size, should be frozen and don't merge with any cluster 
++    if (_jbolt_merge_frozen && cluster->frozen()) continue;
++
++    // find best predecessor
++    func.call_indexes()->sort(&func_call_indexes_comparator);
++    
++    int bestPred = -1;
++
++    for (int j = 0; j < func.call_indexes()->length(); ++j) {
++      const JBoltCall& call = _calls->at(func.call_indexes()->at(j));
++
++      bestPred = os::Linux::jboltMerge_judge(related_data_jbolt_merge_judge, call.caller().cluster_id(), (address)clusters, (address)merged, (address)cluster);
++
++      if (bestPred == -1) continue;
++
++      break;
++    }
++
++    // not merge -- no suitable caller nodes
++    if (bestPred == -1) {
++      continue;
++    }
++
++    JBoltCluster* predCluster = JBoltCluster::find_cluster_by_id(clusters, bestPred);
++
++    // merge callee cluster to caller cluster
++    for (int j = 0; j < cluster->func_indexes()->length(); ++j) {
++      int index = cluster->func_indexes()->at(j);
++      predCluster->append_func_index(index);
++    }
++    predCluster->add_heat(cluster->heats());
++    predCluster->add_size(cluster->size());
++    predCluster->update_density();
++    merged->at(cluster->id()) = bestPred;
++    cluster->clear();
++  }
++}
++
++// Every node is a cluster with funcs
++// Initially each cluster has only one func inside
++GrowableArray<JBoltFunc>* JBoltCallGraph::hfsort() {
++  if (!(UseJBolt && (JBoltDumpMode || JBoltManager::auto_mode()))) return NULL;
++  log_debug(jbolt)( "hfsort begin...\n");
++  // Copies are needed for saving initial graph in memory
++  GrowableArray<JBoltCluster>* clusters = clusters_copy();
++  GrowableArray<JBoltFunc>* funcs = funcs_copy();
++
++  // store a map for finding head of merge chain
++  GrowableArray<int>* merged = create_growable_array<int>(clusters->length());
++  for (int i = 0; i < clusters->length(); ++i) {
++    merged->append(-1);
++  }
++
++  // sorted by func(initially a node) weight(now just as 'heat')
++  funcs->sort(&func_comparator);
++
++  // Process each function, and consider merging its cluster with the
++  // one containing its most likely predecessor.
++  deal_with_each_func(clusters, funcs, merged);
++
++  // the set of clusters that are left
++  GrowableArray<JBoltCluster>* sortedClusters = create_growable_array<JBoltCluster>();
++  for (int i = 0; i < clusters->length(); ++i) {
++    if (clusters->at(i).id() != -1) {
++      sortedClusters->append(clusters->at(i));
++    }
++  }
++
++  sortedClusters->sort(&cluster_comparator);
++
++  GrowableArray<JBoltFunc>* order = clusters_to_funcs_order(sortedClusters);
++
++  delete clusters;
++  delete funcs;
++  delete merged;
++  delete sortedClusters;
++  log_debug(jbolt)( "hfsort over...\n");
++
++  return order;
++}
++
++JBoltFunc::JBoltFunc() :
++  _method(NULL),
++  _method_id(0),
++  _heat(0),
++  _size(0),
++  _cluster_id(-1),
++  _method_key(),
++  _call_indexes(create_growable_array<int>()) {}
++
++JBoltFunc::JBoltFunc(const JBoltFunc& func) :
++  _method(func._method),
++  _method_id(func._method_id),
++  _heat(func._heat),
++  _size(func._size), 
++  _cluster_id(func._cluster_id),
++  _method_key(func._method_key),
++  _call_indexes(create_growable_array<int>(func.get_calls_count())) {
++    GrowableArray<int>* array = func.call_indexes();
++    _call_indexes->appendAll(array);
++  }
++
++JBoltFunc::JBoltFunc(const Method* method, traceid method_id, int size, JBoltMethodKey method_key) :
++  _method(method),
++  _method_id(method_id),
++  _heat(0),
++  _size(size),
++  _cluster_id(-1),
++  _method_key(method_key),
++  _call_indexes(create_growable_array<int>()) {
++    // not new_symbol, need to inc reference cnt
++    _method_key.klass()->increment_refcount();
++    _method_key.name()->increment_refcount();
++    _method_key.sig()->increment_refcount();
++  }
++
++void JBoltFunc::add_heat(int64_t heat)  { 
++  _heat += heat;
++  assert(_cluster_id != -1, "invariant");
++  _clusters->at(_cluster_id).add_heat(heat);
++  _clusters->at(_cluster_id).update_density();
++}
++
++void JBoltFunc::set_heat(int64_t heat)  {
++  int64_t diff = heat - _heat;
++  _heat = heat;
++  assert(_cluster_id != -1, "invariant");
++  _clusters->at(_cluster_id).add_heat(diff);
++  _clusters->at(_cluster_id).update_density();
++}
++
++void JBoltFunc::set_cluster_id(int cluster_id)            { _cluster_id = cluster_id;                                          }
++
++void JBoltFunc::append_call_index(int index)              { _call_indexes->append(index);                                      }
++
++JBoltFunc* JBoltFunc::constructor(const Method* method, traceid method_id, int size, JBoltMethodKey method_key) {
++  JBoltFunc *ret = new JBoltFunc(method, method_id, size, method_key);
++  return ret;
++}
++ 
++JBoltFunc* JBoltFunc::copy_constructor(const JBoltFunc* func) {
++  JBoltFunc *ret = new JBoltFunc(*func);
++  return ret;
++}
++
++JBoltCluster::JBoltCluster() :
++  _id(-1),
++  _heats(0),
++  _frozen(false), 
++  _size(0),
++  _density(0.0),
++  _func_indexes(create_growable_array<int>()) {}
++
++JBoltCluster::JBoltCluster(const JBoltFunc& func) :
++  _id(_init_cluster_id++),
++  _heats(func.heat()),
++  _frozen(false),
++  _size(func.size()),
++  _density(0.0),
++  _func_indexes(create_growable_array<int>()) {
++    if (_size >= PAGE_SIZE) 
++        freeze();
++
++    update_density();
++
++    int func_idx = find_func_index(&func);
++    assert(func_idx != -1, "invariant");
++    _func_indexes->append(func_idx);
++  }
++
++JBoltCluster::JBoltCluster(const JBoltCluster& cluster) :
++  _id(cluster.id()),
++  _heats(cluster.heats()),
++  _frozen(cluster.frozen()),
++  _size(cluster.size()),
++  _density(cluster.density()),
++  _func_indexes(create_growable_array<int>(cluster.get_funcs_count())) {
++    GrowableArray<int>* array = cluster.func_indexes();
++    _func_indexes->appendAll(array);
++  }
++
++void JBoltCluster::add_heat(int64_t heat)                      { _heats += heat;                            }
++
++void JBoltCluster::freeze()                                    { _frozen = true;                            }
++
++void JBoltCluster::add_size(int size)                          { _size += size;                             }
++
++void JBoltCluster::update_density()                            { _density = (double)_heats / (double)_size; }
++
++void JBoltCluster::append_func_index(int index)                { _func_indexes->append(index);              }
++
++void JBoltCluster::clear() {
++  _id = -1;
++  _heats = 0;
++  _frozen = false;
++  _size = 0;
++  _density = 0.0;
++  _func_indexes->clear();
++}
++
++// Searching for a cluster by its id
++JBoltCluster* JBoltCluster::find_cluster_by_id(GrowableArray<JBoltCluster>* clusters, u4 id) {
++  if (id >= (u4)clusters->length()) return NULL;
++
++  return &(clusters->at(id));
++}
++
++JBoltCluster* JBoltCluster::constructor(const JBoltFunc* func) {
++  JBoltCluster *ret = new JBoltCluster(*func);
++  return ret;
++}
++ 
++JBoltCluster* JBoltCluster::copy_constructor(const JBoltCluster* cluster) {
++  JBoltCluster *ret = new JBoltCluster(*cluster);
++  return ret;
++}
++
++JBoltCall::JBoltCall() :
++  _caller_index(-1),
++  _callee_index(-1),
++  _call_count(0),
++  _stacktrace_id(0) {}
++
++JBoltCall::JBoltCall(const JBoltCall& call) :
++  _caller_index(call._caller_index),
++  _callee_index(call._callee_index),
++  _call_count(call._call_count),
++  _stacktrace_id(call._stacktrace_id) {}
++        
++JBoltCall::JBoltCall(const JBoltFunc& caller_func, const JBoltFunc& callee_func, u4 call_count, traceid stacktrace_id) :
++  _call_count(call_count),
++  _stacktrace_id(stacktrace_id) {
++    _caller_index = find_func_index(&caller_func);
++    _callee_index = find_func_index(&callee_func);
++    assert(_caller_index != -1, "invariant");
++    assert(_callee_index != -1, "invariant");
++  }
++
++JBoltFunc& JBoltCall::caller() const { return _funcs->at(_caller_index); }
++
++JBoltFunc& JBoltCall::callee() const { return _funcs->at(_callee_index); }
++
++void JBoltCall::set_caller_index(int index)   { _caller_index = index;                 }
++
++void JBoltCall::set_callee_index(int index)   { _callee_index = index;                 }
++
++void JBoltCall::set_call_count(u4 call_count) { _call_count = call_count;              }
++
++JBoltCall* JBoltCall::constructor(const JBoltFunc* caller_func, const JBoltFunc* callee_func, u4 call_count, traceid stacktrace_id) {
++  JBoltCall *ret = new JBoltCall(*caller_func, *callee_func, call_count, stacktrace_id);
++  return ret;
++}
++
++JBoltCall* JBoltCall::copy_constructor(const JBoltCall* call) {
++  JBoltCall *ret = new JBoltCall(*call);
++  return ret;
++}
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltCallGraph.hpp b/src/hotspot/share/jbolt/jBoltCallGraph.hpp
+new file mode 100644
+index 000000000..1bfbcabcc
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltCallGraph.hpp
+@@ -0,0 +1,274 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef SHARE_JBOLT_JBOLTCALLGRAPH_HPP
++#define SHARE_JBOLT_JBOLTCALLGRAPH_HPP
++
++#include "jbolt/jbolt_globals.hpp"
++#include "jbolt/jBoltManager.hpp"
++#include "jfr/utilities/jfrTypes.hpp"
++#include "utilities/growableArray.hpp"
++
++class JBoltFunc;
++class JBoltCall;
++class JBoltCluster;
++
++template<typename T>
++static GrowableArray<T>* create_growable_array(int size = 1) {
++  GrowableArray<T>* array = new (ResourceObj::C_HEAP, mtTracing) GrowableArray<T>(size, mtTracing);
++  assert(array != NULL, "invariant");
++  return array;
++}
++
++// initial cluster id
++static u4 _init_cluster_id = 0;
++
++class JBoltCallGraph : public CHeapObj<mtTracing> {
++  private:
++    GrowableArray<JBoltCluster>* _clusters = NULL;
++    GrowableArray<JBoltCall>* _calls = NULL;
++    GrowableArray<JBoltFunc>* _funcs = NULL;
++
++    JBoltCallGraph() { 
++        _clusters = create_growable_array<JBoltCluster>();
++        _calls = create_growable_array<JBoltCall>();
++        _funcs = create_growable_array<JBoltFunc>(); 
++    }
++
++    // for constructing CG
++    void add_func(JBoltFunc* func);     // Node
++    void add_call(JBoltCall* call);     // Edge
++
++  public:
++    static JBoltCallGraph& callgraph_instance();
++    // these two funcs initialize and deinitialize homonymous static array pointers in global
++    static void initialize();
++    static void deinitialize();
++
++    GrowableArray<JBoltCluster>* callgraph_clusters()   { return _clusters; }
++    GrowableArray<JBoltCall>* callgraph_calls()         { return _calls;    }
++    GrowableArray<JBoltFunc>* callgraph_funcs()         { return _funcs;    }
++
++    static void static_add_func(JBoltFunc* func) {  callgraph_instance().add_func(func);  }
++    static void static_add_call(JBoltCall* call) {  callgraph_instance().add_call(call);  }
++    
++    // for dealing with CG
++    GrowableArray<JBoltFunc>* hfsort();
++
++    int clear_instance();
++
++    virtual ~JBoltCallGraph() { 
++        delete _clusters;
++        delete _calls;
++        delete _funcs;
++
++        _clusters = NULL;
++        _calls = NULL;
++        _funcs = NULL;
++    }
++};
++
++class JBoltFunc : public CHeapObj<mtTracing> {
++  private:
++    const Method* _method;
++    traceid _method_id;
++    int64_t _heat;
++    int _size;
++    int _cluster_id;
++    JBoltMethodKey _method_key;
++    GrowableArray<int>* _call_indexes;
++
++  public:
++    JBoltFunc();
++    JBoltFunc(const JBoltFunc& func);
++    JBoltFunc(const Method* method, traceid method_id, int size, JBoltMethodKey method_key);
++
++    virtual ~JBoltFunc() { 
++      delete _call_indexes; 
++    }
++
++    bool operator==(const JBoltFunc& func) const    { return (_method == func._method && _method_id == func._method_id) || (_method_key.equals(func._method_key));      }
++    bool operator!=(const JBoltFunc& func) const    { return (_method != func._method || _method_id != func._method_id) && !(_method_key.equals(func._method_key));     }
++
++    JBoltFunc& operator=(const JBoltFunc& func) {
++        _method = func._method;
++        _method_id = func._method_id;
++        _heat = func._heat;
++        _size = func._size;
++        _cluster_id = func._cluster_id;
++        _method_key = func._method_key;
++        if (_call_indexes != NULL) {
++            delete _call_indexes;
++        }
++        _call_indexes = create_growable_array<int>(func.get_calls_count());
++        _call_indexes->appendAll(func.call_indexes());
++
++        return *this;
++    }
++
++    const Method* method() const                   { return _method;                                                    }
++    const traceid method_id() const                { return _method_id;                                                 }
++    const int64_t heat() const                     { return _heat;                                                      }
++    const int size() const                         { return _size;                                                      }
++    const int cluster_id() const                   { return _cluster_id;                                                }
++    JBoltMethodKey method_key() const              { return _method_key;                                                }    
++    GrowableArray<int>* call_indexes() const       { return _call_indexes;                                              }
++    int get_calls_count() const                    { return _call_indexes->length();                                    }
++
++    void add_heat(int64_t heat);
++    void set_heat(int64_t heat);
++    void set_cluster_id(int cluster_id);
++    void append_call_index(int index);
++
++    static ByteSize method_offset()                { return byte_offset_of(JBoltFunc, _method);                          }
++    static ByteSize method_id_offset()             { return byte_offset_of(JBoltFunc, _method_id);                      }
++    static ByteSize heat_offset()                  { return byte_offset_of(JBoltFunc, _heat);                           }
++    static ByteSize size_offset()                  { return byte_offset_of(JBoltFunc, _size);                           }
++    static ByteSize cluster_id_offset()            { return byte_offset_of(JBoltFunc, _cluster_id);                     }
++    static ByteSize call_indexes_offset()          { return byte_offset_of(JBoltFunc, _call_indexes);                   }
++
++    static JBoltFunc* constructor(const Method* method, traceid method_id, int size, JBoltMethodKey method_key);
++    static JBoltFunc* copy_constructor(const JBoltFunc* func);
++};
++
++class JBoltCluster : public CHeapObj<mtTracing> {
++  private:
++    int _id;
++    int64_t _heats;
++    bool _frozen;
++    int _size;
++    double _density;
++    GrowableArray<int>* _func_indexes;
++
++  public:
++    JBoltCluster();
++    JBoltCluster(const JBoltFunc& func);
++    JBoltCluster(const JBoltCluster& cluster);
++
++    bool operator==(const JBoltCluster& cluster) const {
++        if (_id != cluster.id())    return false;
++
++        int count = get_funcs_count();
++        if (count != cluster.get_funcs_count())
++            return false;
++        
++        for (int i = 0; i < count; ++i) {
++            if (_func_indexes->at(i) != cluster._func_indexes->at(i)) {
++              return false;
++            }
++        }
++
++        return true;
++    }
++
++    JBoltCluster& operator=(const JBoltCluster& cluster) {
++        _id = cluster.id();
++        _heats = cluster.heats();
++        _frozen = cluster.frozen();
++        _size = cluster.size();
++        _density = cluster.density();
++        if (_func_indexes != NULL) {
++            delete _func_indexes;
++        }
++        _func_indexes = create_growable_array<int>(cluster.get_funcs_count());
++        _func_indexes->appendAll(cluster.func_indexes());
++        return *this;
++    }
++
++    virtual ~JBoltCluster() { delete _func_indexes; }
++
++    int id() const                                   { return _id;                                }
++    int64_t heats() const                            { return _heats;                             }
++    bool frozen() const                              { return _frozen;                            }
++    int size() const                                 { return _size;                              }
++    double density() const                           { return _density;                           }
++    GrowableArray<int>* func_indexes() const         { return _func_indexes;                      }
++    int get_funcs_count() const                      { return _func_indexes->length();            }
++
++    void add_heat(int64_t heat);
++    void freeze();
++    void add_size(int size);
++    void update_density();
++    void append_func_index(int index);
++    void clear();
++
++    static JBoltCluster* find_cluster_by_id(GrowableArray<JBoltCluster>* clusters, u4 id);
++
++    static ByteSize id_offset()                       { return byte_offset_of(JBoltCluster, _id);            }
++    static ByteSize heats_offset()                    { return byte_offset_of(JBoltCluster, _heats);         }
++    static ByteSize frozen_offset()                   { return byte_offset_of(JBoltCluster, _frozen);        }
++    static ByteSize size_offset()                     { return byte_offset_of(JBoltCluster, _size);          }
++    static ByteSize density_offset()                  { return byte_offset_of(JBoltCluster, _density);       }
++    static ByteSize func_indexes_offset()             { return byte_offset_of(JBoltCluster, _func_indexes);  }
++
++    static JBoltCluster* constructor(const JBoltFunc* func);
++    static JBoltCluster* copy_constructor(const JBoltCluster* cluster);
++};
++
++class JBoltCall : public CHeapObj<mtTracing> {
++  private:
++    int _caller_index;
++    int _callee_index;
++    u4 _call_count;
++    traceid _stacktrace_id;
++
++  public:
++    JBoltCall();
++    JBoltCall(const JBoltCall& call);  
++    JBoltCall(const JBoltFunc& caller_func, const JBoltFunc& callee_func, u4 call_count, traceid stacktrace_id);
++
++    bool operator==(const JBoltCall& call) const { 
++        return _caller_index == call._caller_index && _callee_index == call._callee_index; 
++    }
++
++    JBoltCall& operator=(const JBoltCall& call) {
++        _caller_index = call._caller_index;
++        _callee_index = call._callee_index;
++        _call_count = call._call_count;
++        _stacktrace_id = call._stacktrace_id;
++        return *this;
++    }
++
++    virtual ~JBoltCall() {}
++
++    int caller_index() const             { return _caller_index;                  }
++    int callee_index() const             { return _callee_index;                  }
++    u4 call_count() const                { return _call_count;                    }
++    traceid stacktrace_id() const        { return _stacktrace_id;                 }
++
++    JBoltFunc& caller() const;
++    JBoltFunc& callee() const;
++    void set_caller_index(int index);
++    void set_callee_index(int index);
++    void set_call_count(u4 count);
++
++    static ByteSize caller_offset()                       { return byte_offset_of(JBoltCall, _caller_index);            }
++    static ByteSize callee_offset()                       { return byte_offset_of(JBoltCall, _caller_index);            }
++    static ByteSize call_count_offset()                   { return byte_offset_of(JBoltCall, _call_count);              }
++    static ByteSize stacktrace_id_offset()                { return byte_offset_of(JBoltCall, _stacktrace_id);           }
++
++    static JBoltCall* constructor(const JBoltFunc* caller_func, const JBoltFunc* callee_func, u4 call_count, traceid stacktrace_id);
++    static JBoltCall* copy_constructor(const JBoltCall* call);
++};
++
++#endif // SHARE_JBOLT_JBOLTCALLGRAPH_HPP
+diff --git a/src/hotspot/share/jbolt/jBoltControlThread.cpp b/src/hotspot/share/jbolt/jBoltControlThread.cpp
+new file mode 100644
+index 000000000..d813a94f9
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltControlThread.cpp
+@@ -0,0 +1,290 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++#include <time.h>
++
++#include "classfile/javaClasses.inline.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "jbolt/jBoltControlThread.hpp"
++#include "jbolt/jBoltManager.hpp"
++#include "logging/log.hpp"
++#include "logging/logStream.hpp"
++#include "runtime/atomic.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/jniHandles.inline.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/sweeper.hpp"
++ 
++JavaThread* volatile JBoltControlThread::_the_java_thread = NULL;
++Monitor* JBoltControlThread::_control_wait_monitor = NULL;
++Monitor* JBoltControlThread::_sample_wait_monitor = NULL;
++jobject JBoltControlThread::_thread_obj = NULL;
++int volatile JBoltControlThread::_signal = JBoltControlThread::SIG_NULL;
++bool volatile JBoltControlThread::_abort = false;
++intx volatile JBoltControlThread::_interval = 0;
++ 
++static bool not_first = false;
++
++void JBoltControlThread::init(TRAPS) {
++  Handle string = java_lang_String::create_from_str("JBolt Control", CATCH);
++  Handle thread_group(THREAD, Universe::system_thread_group());
++  Handle thread_oop = JavaCalls::construct_new_instance(
++          SystemDictionary::Thread_klass(),
++          vmSymbols::threadgroup_string_void_signature(),
++          thread_group,
++          string,
++          CATCH);
++  _thread_obj = JNIHandles::make_global(thread_oop);
++  _control_wait_monitor = new Monitor(Mutex::nonleaf, "JBoltControlMonitor");
++  _sample_wait_monitor = new Monitor(Mutex::nonleaf, "JBoltSampleMonitor");
++  OrderAccess::release_store(&_interval, JBoltSampleInterval);
++}
++ 
++void JBoltControlThread::start_thread(TRAPS) {
++  guarantee(OrderAccess::load_acquire(&_the_java_thread) == NULL, "sanity");
++  JavaThread* new_thread = new JavaThread(&thread_entry);
++  if (new_thread->osthread() == NULL) {
++    fatal("Failed to create JBoltControlThread as no os thread!");
++    return;
++  }
++ 
++  Handle thread_oop(THREAD, JNIHandles::resolve_non_null(_thread_obj));
++  {
++    MutexLocker mu(Threads_lock, THREAD);
++    java_lang_Thread::set_thread(thread_oop(), new_thread);
++    java_lang_Thread::set_priority(thread_oop(), MinPriority);
++    java_lang_Thread::set_daemon(thread_oop());
++    new_thread->set_threadObj(thread_oop());
++    Threads::add(new_thread);
++    Thread::start(new_thread);
++  }
++  guarantee(Atomic::cmpxchg(new_thread, &_the_java_thread, (JavaThread*) NULL) == NULL, "sanity");
++}
++
++intx JBoltControlThread::sample_interval() {
++  return OrderAccess::load_acquire(&_interval);
++}
++ 
++// Work to do before restarting a control schedule, twice and after only
++bool JBoltControlThread::prev_control_schdule(TRAPS) {
++  guarantee(JBoltManager::auto_mode(), "sanity");
++  // Clear obsolete data structures
++  if (JBoltManager::clear_last_sample_datas() != 0) {
++    log_error(jbolt)("Something wrong happened in data clean, not going on...");
++    return false;
++  }
++ 
++  // Restart JFR
++  bufferedStream output;
++  DCmd::parse_and_execute(DCmd_Source_Internal, &output, "JFR.start name=jbolt-jfr", ' ', THREAD);
++  if (HAS_PENDING_EXCEPTION) {
++    ResourceMark rm;
++    log_warning(jbolt)("unable to start jfr jbolt-jfr");
++    log_warning(jbolt)("exception type: %s", PENDING_EXCEPTION->klass()->external_name());
++    // don't unwind this exception
++    CLEAR_PENDING_EXCEPTION;
++  }
++ 
++  return true;
++}
++ 
++void JBoltControlThread::control_schdule(TRAPS) {
++  guarantee(JBoltManager::auto_mode(), "sanity");
++
++  { MonitorLocker locker(_sample_wait_monitor);
++    // Perform time wait
++    log_info(jbolt)("JBolt Starting Sample for %lds!!!", sample_interval());
++    const jlong interval = (jlong) sample_interval();
++    jlong cur_time = os::javaTimeMillis();
++    const jlong end_time = cur_time + (interval * 1000);
++    while ((end_time > cur_time) && OrderAccess::load_acquire(&_signal) != SIG_STOP_PROFILING) {
++      int64_t timeout = (int64_t) (end_time - cur_time);
++      locker.wait(timeout);
++      cur_time = os::javaTimeMillis();
++    }
++  }
++  // Close JFR
++  guarantee(JBoltManager::reorder_phase_profiling_to_waiting(), "sanity");
++  bufferedStream output;
++  DCmd::parse_and_execute(DCmd_Source_Internal, &output, "JFR.stop name=jbolt-jfr", ' ', THREAD);
++  if (HAS_PENDING_EXCEPTION) {
++    ResourceMark rm;
++    // JFR.stop maybe failed if a jfr recording is already stopped
++    // but it's nothing worry, jbolt should continue to work normally
++    log_warning(jbolt)("unable to stop jfr jbolt-jfr");
++    log_warning(jbolt)("exception type: %s", PENDING_EXCEPTION->klass()->external_name());
++    // don't unwind this exception
++    CLEAR_PENDING_EXCEPTION;
++  }
++  if (Atomic::cmpxchg(false, &_abort, true) == /* should abort */ true) {
++    return;
++  }
++ 
++  size_t total_nmethod_size = 0;
++  // Init structures for load phase
++  JBoltManager::init_auto_transition(&total_nmethod_size, CATCH);
++ 
++  if (total_nmethod_size > JBoltCodeHeapSize) {
++    log_warning(jbolt)("JBolt reordering not complete because JBolt CodeHeap is too small to place all ordered methods. Please use -XX:JBoltCodeHeapSize to enlarge");
++    log_warning(jbolt)("JBoltCodeHeapSize=" UINTX_FORMAT " B ( need " UINTX_FORMAT " B).", JBoltCodeHeapSize, total_nmethod_size);
++  }
++
++  if (not_first) {
++    // Exchange Hot Segment primary and secondary relationships
++    JBoltManager::swap_semi_jbolt_segs();
++  }
++
++  if (!not_first && EnableDumpGraph) {
++    // When EnableDumpGraph, dump initial code heaps for compared
++    JBoltManager::dump_code_heaps_with_count();
++  }
++ 
++  guarantee(JBoltManager::reorder_phase_waiting_to_reordering(), "sanity");
++  OrderAccess::release_store(&_signal, SIG_NULL);
++ 
++  // Start reorder
++  JBoltManager::reorder_all_methods(CATCH);
++}
++ 
++// Work to do after reordering, twice and after only
++void JBoltControlThread::post_control_schdule(TRAPS) {
++  JBoltManager::clear_secondary_hot_seg(THREAD);
++}
++
++struct tm JBoltControlThread::next_trigger_time(struct tm* localtime) {
++  struct tm target_tm = *localtime;
++  GrowableArray<char*>* rescheduling_time = JBoltManager::rescheduling_time();
++  for (int i = 0; i < rescheduling_time->length(); ++i) {
++    char* target_time = rescheduling_time->at(i);
++    int target_hour = (target_time[0] - '0') * 10 + (target_time[1] - '0');
++    int target_minute = (target_time[3] - '0') * 10 + (target_time[4] - '0');
++    if (target_hour > localtime->tm_hour || (target_hour == localtime->tm_hour && target_minute > localtime->tm_min)) {
++      target_tm.tm_hour = target_hour;
++      target_tm.tm_min = target_minute;
++      target_tm.tm_sec = 0;
++      break;
++    }
++    if (i == rescheduling_time->length() - 1) {
++      target_time = rescheduling_time->at(0);
++      target_hour = (target_time[0] - '0') * 10 + (target_time[1] - '0');
++      target_minute = (target_time[3] - '0') * 10 + (target_time[4] - '0');
++      target_tm.tm_mday += 1;
++      target_tm.tm_hour = target_hour;
++      target_tm.tm_min = target_minute;
++      target_tm.tm_sec = 0;
++      mktime(&target_tm);
++    }
++  }
++
++  return target_tm;
++}
++
++void JBoltControlThread::wait_for_next_trigger(TRAPS) {
++  MonitorLocker locker(_control_wait_monitor);
++  time_t current_time;
++  struct tm p;
++  time(&current_time);
++  localtime_r(&current_time, &p);
++  if (JBoltManager::rescheduling_time() != NULL && JBoltManager::rescheduling_time()->length() > 0) {
++    struct tm target_tm = next_trigger_time(&p);
++    log_info(jbolt)("next trigger is at %d.%d.%d.%02d:%02d:%02d",1900+target_tm.tm_year,1+target_tm.tm_mon,target_tm.tm_mday,target_tm.tm_hour,target_tm.tm_min,target_tm.tm_sec);
++    while (OrderAccess::load_acquire(&_signal) != SIG_START_PROFILING) {
++      long time_wait = mktime(&target_tm) - current_time;
++      if (time_wait <= 0) {
++        log_info(jbolt)("successfully trigger at %02d:%02d",target_tm.tm_hour,target_tm.tm_min);
++        break;
++      }
++      locker.wait(time_wait * 1000);
++      time(&current_time);
++    }
++  }
++  else {
++    while (OrderAccess::load_acquire(&_signal) != SIG_START_PROFILING) {
++      locker.wait(60 * 1000);
++    }
++  }
++}
++
++void JBoltControlThread::thread_run_auto_loop(TRAPS) {
++  do {
++    OrderAccess::release_store(&_signal, SIG_NULL);
++    if (not_first && !prev_control_schdule(THREAD)) continue;
++    guarantee(JBoltManager::reorder_phase_available_to_profiling(), "sanity");
++    control_schdule(THREAD);
++    if (!JBoltManager::reorder_phase_reordering_to_available()) {
++      // abort logic 
++      guarantee(JBoltManager::reorder_phase_waiting_to_available(), "sanity");
++      guarantee(Atomic::cmpxchg(SIG_NULL, &_signal, SIG_STOP_PROFILING) == SIG_STOP_PROFILING, "sanity");
++    }
++    else if (not_first) {
++      post_control_schdule(THREAD);
++    }
++    not_first = true;
++    wait_for_next_trigger(THREAD);
++    JBoltManager::clear_structures();
++  } while(true);
++}
++
++void JBoltControlThread::thread_run(TRAPS) {
++  if (JBoltManager::auto_mode()) {
++    thread_run_auto_loop(THREAD);
++  } else {
++    guarantee(JBoltManager::can_reorder_now(), "sanity");
++    guarantee(JBoltManager::reorder_phase_collecting_to_reordering(), "sanity");
++    JBoltManager::reorder_all_methods(CATCH);
++    JBoltManager::clear_structures();
++    guarantee(JBoltManager::reorder_phase_reordering_to_end(), "sanity");
++    assert(JBoltLoadMode, "Only manual JBoltLoadMode can reach here");
++  }
++}
++
++bool JBoltControlThread::notify_sample_wait(bool abort) {
++  int old_sig = Atomic::cmpxchg(SIG_STOP_PROFILING, &_signal, SIG_NULL);
++  if (old_sig == SIG_NULL) {
++    MonitorLocker locker(_sample_wait_monitor);
++    // abort implementation maybe not in order in extreme cases
++    // add fence? or delete abort() if not so useful.
++    OrderAccess::release_store(&_abort, abort);
++    locker.notify();
++    return true;
++  }
++  return false;
++}
++ 
++bool JBoltControlThread::notify_control_wait(intx interval) {
++  int old_sig = Atomic::cmpxchg(SIG_START_PROFILING, &_signal, SIG_NULL);
++  if (old_sig == SIG_NULL) {
++    // this lock will be grabbed by ControlThread until it's waiting
++    MonitorLocker locker(_control_wait_monitor);
++    OrderAccess::release_store(&_interval, interval);
++    locker.notify();
++    return true;
++  }
++  return false;
++}
++
++JavaThread* JBoltControlThread::get_thread() {
++  return OrderAccess::load_acquire(&_the_java_thread);
++}
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltControlThread.hpp b/src/hotspot/share/jbolt/jBoltControlThread.hpp
+new file mode 100644
+index 000000000..946a61960
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltControlThread.hpp
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++ 
++#ifndef SHARE_JBOLT_JBOLTCONTROLTHREAD_HPP
++#define SHARE_JBOLT_JBOLTCONTROLTHREAD_HPP
++ 
++#include "runtime/thread.hpp"
++ 
++/**
++ * Control JBolt how to run in this thread.
++ */
++class JBoltControlThread: public AllStatic {
++public:
++  static const int SIG_NULL            = 0;
++  static const int SIG_START_PROFILING = 1;
++  static const int SIG_STOP_PROFILING  = 2;
++
++private:
++  static JavaThread* volatile _the_java_thread;
++  // Can be notified by jcmd JBolt.start, restart a control schedule
++  static Monitor* _control_wait_monitor;
++  // Can be notified by jcmd JBolt.stop/abort, stop a running JFR
++  static Monitor* _sample_wait_monitor; 
++  static jobject _thread_obj;
++  static int volatile _signal;
++  static bool volatile _abort;
++  static intx volatile _interval; 
++
++  static void thread_entry(JavaThread* thread, TRAPS) { thread_run(thread); }
++  static void thread_run(TRAPS);
++  static void thread_run_auto_loop(TRAPS);
++
++  static intx sample_interval();
++  static bool prev_control_schdule(TRAPS);
++  static void control_schdule(TRAPS);
++  static void post_control_schdule(TRAPS);
++  static void wait_for_next_trigger(TRAPS);
++
++  static struct tm next_trigger_time(struct tm* localtime);
++
++public:
++  static void init(TRAPS);
++ 
++  static void start_thread(TRAPS);
++
++  static bool notify_sample_wait(bool abort = false);
++ 
++  static bool notify_control_wait(intx interval);
++ 
++  static JavaThread* get_thread();
++};
++ 
++#endif // SHARE_JBOLT_JBOLTCONTROLTHREAD_HPP
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltDcmds.cpp b/src/hotspot/share/jbolt/jBoltDcmds.cpp
+new file mode 100644
+index 000000000..249a98001
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltDcmds.cpp
+@@ -0,0 +1,249 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++ 
++#include "jbolt/jBoltDcmds.hpp"
++#include "jbolt/jBoltControlThread.hpp"
++#include "jbolt/jBoltManager.hpp"
++ 
++bool register_jbolt_dcmds() {
++  uint32_t full_export = DCmd_Source_Internal | DCmd_Source_AttachAPI | DCmd_Source_MBean;
++  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<JBoltStartDCmd>(full_export, true, false));
++  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<JBoltStopDCmd>(full_export, true, false));
++  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<JBoltAbortDCmd>(full_export, true, false));
++  DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<JBoltDumpDCmd>(full_export, true, false));
++  if (EnableDumpGraph) DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<JBoltDumpGraphDCmd>(full_export, true, false));
++  return true;
++}
++ 
++JBoltStartDCmd::JBoltStartDCmd(outputStream* output, bool heap) : DCmdWithParser(output, heap),
++  _duration("duration", "Duration of time(second) in this sample.", "INT", false, "600") {
++  _dcmdparser.add_dcmd_option(&_duration);
++}
++ 
++int JBoltStartDCmd::num_arguments() {
++  ResourceMark rm;
++  JBoltStartDCmd* dcmd = new JBoltStartDCmd(NULL, false);
++  if (dcmd != NULL) {
++    DCmdMark mark(dcmd);
++    return dcmd->_dcmdparser.num_arguments();
++  } else {
++    return 0;
++  }
++}
++ 
++void JBoltStartDCmd::execute(DCmdSource source, TRAPS) {
++  if (!UseJBolt) {
++    output()->print_cr("Unable to execute because \"UseJBolt\" is disabled.");
++    return;
++  }
++ 
++  if (!JBoltManager::auto_mode()) {
++    output()->print_cr("JBolt JCMD can only be used in auto mode.");
++    return;
++  }
++ 
++  if (!JBoltManager::reorder_phase_is_available()) {
++    output()->print_cr("Unable to start because it's working now. Stop it first.");
++    return;
++  }
++ 
++  intx interval = _duration.is_set() ? _duration.value() : JBoltSampleInterval;
++ 
++  if (interval < 0) {
++    output()->print_cr("duration is set to %ld which is above range, should be in [0, %d]", interval, max_jint);
++    return;
++  }
++ 
++  if (JBoltControlThread::notify_control_wait(interval)) {
++    output()->print_cr("OK. Start a new JBolt schedule, duration=%lds.", interval);
++  }
++  else {
++    output()->print_cr("It's busy now. Please try again later...");
++  }
++}
++ 
++void JBoltStartDCmd::print_help(const char* name) const {
++  output()->print_cr(
++              "Syntax : %s [options]\n"
++              "\n"
++              "Options:\n"
++              "\n"
++              "   duration  (Optional) Duration of time(second) in this sample. (INT, default value=600)\n"
++              "\n"
++              "Options must be specified using the <key> or <key>=<value> syntax.\n"
++              "\n"
++              "Example usage:\n"
++              "  $ jcmd <pid> JBolt.start\n"
++              "  $ jcmd <pid> JBolt.start duration=900", name);
++}
++ 
++void JBoltStopDCmd::execute(DCmdSource source, TRAPS) {
++  if (!UseJBolt) {
++    output()->print_cr("Unable to execute because \"UseJBolt\" is disabled.");
++    return;
++  }
++ 
++  if (!JBoltManager::auto_mode()) {
++    output()->print_cr("JBolt JCMD can only be used in auto mode.");
++    return;
++  }
++ 
++  if (!JBoltManager::reorder_phase_is_profiling()) {
++    output()->print_cr("Unable to stop because it's not sampling now.");
++    return;
++  }
++ 
++  if (JBoltControlThread::notify_sample_wait()) {
++    output()->print_cr("OK.\"jbolt-jfr\" would be stopped and turn to reorder.");
++  } else {
++    output()->print_cr("It's busy now. Please try again later...");
++  }
++}
++ 
++void JBoltStopDCmd::print_help(const char* name) const {
++  output()->print_cr(
++              "Syntax : %s\n"
++              "\n"
++              "Example usage:\n"
++              "  $ jcmd <pid> JBolt.stop", name);
++}
++ 
++void JBoltAbortDCmd::execute(DCmdSource source, TRAPS) {
++  if (!UseJBolt) {
++    output()->print_cr("Unable to execute because \"UseJBolt\" is disabled.");
++    return;
++  }
++ 
++  if (!JBoltManager::auto_mode()) {
++    output()->print_cr("JBolt JCMD can only be used in auto mode.");
++    return;
++  }
++ 
++  if (!JBoltManager::reorder_phase_is_profiling()) {
++    output()->print_cr("Unable to abort because it's not sampling now.");
++    return;
++  }
++ 
++  if (JBoltControlThread::notify_sample_wait(true)) {
++    output()->print_cr("OK.\"jbolt-jfr\" would be aborted.");
++  } else {
++    output()->print_cr("It's busy now. Please try again later...");
++  }
++}
++ 
++void JBoltAbortDCmd::print_help(const char* name) const {
++  output()->print_cr(
++              "Syntax : %s\n"
++              "\n"
++              "Example usage:\n"
++              "  $ jcmd <pid> JBolt.abort", name);
++}
++ 
++JBoltDumpDCmd::JBoltDumpDCmd(outputStream* output, bool heap) : DCmdWithParser(output, heap),
++  _filename("filename", "Name of the file to which the flight recording data is dumped", "STRING", true, NULL) {
++  _dcmdparser.add_dcmd_option(&_filename);
++}
++ 
++int JBoltDumpDCmd::num_arguments() {
++  ResourceMark rm;
++  JBoltDumpDCmd* dcmd = new JBoltDumpDCmd(NULL, false);
++  if (dcmd != NULL) {
++    DCmdMark mark(dcmd);
++    return dcmd->_dcmdparser.num_arguments();
++  } else {
++    return 0;
++  }
++}
++ 
++void JBoltDumpDCmd::execute(DCmdSource source, TRAPS) {
++  if (!UseJBolt) {
++    output()->print_cr("Unable to execute because \"UseJBolt\" is disabled.");
++    return;
++  }
++ 
++  if (!JBoltManager::auto_mode()) {
++    output()->print_cr("JBolt JCMD can only be used in auto mode.");
++    return;
++  }
++ 
++  const char* path = _filename.value();
++  char buffer[PATH_MAX];
++  char* rp = NULL;
++ 
++  JBoltErrorCode ec = JBoltManager::dump_order_in_jcmd(path);
++  switch (ec) {
++    case JBoltOrderNULL:
++      output()->print_cr("Failed: No order applied by JBolt now.");
++      break;
++    case JBoltOpenFileError:
++      output()->print_cr("Failed: File open error or NULL: %s", path);
++      break;
++    case JBoltOK:
++      rp = realpath(path, buffer);
++      output()->print_cr("Successful: Dump to %s", buffer);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++ 
++void JBoltDumpDCmd::print_help(const char* name) const {
++  output()->print_cr(
++              "Syntax : %s [options]\n"
++              "\n"
++              "Options:\n"
++              "\n"
++              "   filename  Name of the file to which the flight recording data is dumped. (STRING, no default value)\n"
++              "\n"
++              "Options must be specified using the <key> or <key>=<value> syntax.\n"
++              "\n"
++              "Example usage:\n"
++              "  $ jcmd <pid> JBolt.dump filename=order.log", name);
++}
++
++void JBoltDumpGraphDCmd::execute(DCmdSource source, TRAPS) {
++  if (!UseJBolt) {
++    output()->print_cr("Unable to execute because \"UseJBolt\" is disabled.");
++    return;
++  }
++ 
++  if (!JBoltManager::auto_mode()) {
++    output()->print_cr("JBolt JCMD can only be used in auto mode.");
++    return;
++  }
++ 
++  if (JBoltManager::reorder_phase_is_profiling()) {
++    output()->print_cr("Unable to dump because it's sampling now. Stop it first");
++    return;
++  }
++ 
++  JBoltManager::dump_code_heaps_with_count();
++}
++ 
++void JBoltDumpGraphDCmd::print_help(const char* name) const {
++  output()->print_cr(
++              "Syntax : %s\n"
++              "\n"
++              "Example usage:\n"
++              "  $ jcmd <pid> JBolt.dumpgraph", name);
++}
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltDcmds.hpp b/src/hotspot/share/jbolt/jBoltDcmds.hpp
+new file mode 100644
+index 000000000..478a2043a
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltDcmds.hpp
+@@ -0,0 +1,154 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++ 
++#ifndef SHARE_JBOLT_JBOLTDCMDS_HPP
++#define SHARE_JBOLT_JBOLTDCMDS_HPP
++ 
++#include "services/diagnosticCommand.hpp"
++ 
++class JBoltStartDCmd : public DCmdWithParser {
++ protected:
++  DCmdArgument<jlong> _duration;
++ public:
++  JBoltStartDCmd(outputStream* output, bool heap);
++ 
++  static const char* name() {
++    return "JBolt.start";
++  }
++  static const char* description() {
++    return "Starts a new JBolt sample schedule(fail if sampling)";
++  }
++  static const char* impact() {
++    return "Medium: Depending on JFR that JBolt rely on, the impact can range from low to high.";
++  }
++  static const JavaPermission permission() {
++    JavaPermission p = {"java.lang.management.ManagementPermission", "control", NULL};
++    return p;
++  }
++  static int num_arguments();
++  virtual void execute(DCmdSource source, TRAPS);
++  virtual void print_help(const char* name) const;
++};
++ 
++class JBoltStopDCmd : public DCmd {
++ public:
++  JBoltStopDCmd(outputStream* output, bool heap) : DCmd(output, heap) {}
++ 
++  static const char* name() {
++    return "JBolt.stop";
++  }
++  static const char* description() {
++    return "Stop a running JBolt sample schedule and reorder immediately(fail if not sampling)";
++  }
++  static const char* impact() {
++    return "Low";
++  }
++  static const JavaPermission permission() {
++    JavaPermission p = {"java.lang.management.ManagementPermission", "control", NULL};
++    return p;
++  }
++  static int num_arguments() {
++    return 0;
++  }
++ 
++  virtual void execute(DCmdSource source, TRAPS);
++  virtual void print_help(const char* name) const;
++};
++ 
++class JBoltAbortDCmd : public DCmd {
++ public:
++  JBoltAbortDCmd(outputStream* output, bool heap) : DCmd(output, heap) {}
++ 
++  static const char* name() {
++    return "JBolt.abort";
++  }
++  static const char* description() {
++    return "Stop a running JBolt sample schedule but don't reorder(fail if not sampling)";
++  }
++  static const char* impact() {
++    return "Low";
++  }
++  static const JavaPermission permission() {
++    JavaPermission p = {"java.lang.management.ManagementPermission", "monitor", NULL};
++    return p;
++  }
++  static int num_arguments() {
++    return 0;
++  }
++ 
++  virtual void execute(DCmdSource source, TRAPS);
++  virtual void print_help(const char* name) const;
++};
++ 
++class JBoltDumpDCmd : public DCmdWithParser {
++ protected:
++  DCmdArgument<char*> _filename;
++ public:
++  JBoltDumpDCmd(outputStream* output, bool heap);
++ 
++  static const char* name() {
++    return "JBolt.dump";
++  }
++  static const char* description() {
++    return "dump an effective order to file(fail if no order)";
++  }
++  static const char* impact() {
++    return "Low";
++  }
++  static const JavaPermission permission() {
++    JavaPermission p = {"java.lang.management.ManagementPermission", "monitor", NULL};
++    return p;
++  }
++  static int num_arguments();
++  virtual void execute(DCmdSource source, TRAPS);
++  virtual void print_help(const char* name) const;
++};
++
++class JBoltDumpGraphDCmd : public DCmd {
++ public:
++  JBoltDumpGraphDCmd(outputStream* output, bool heap) : DCmd(output, heap) {}
++ 
++  static const char* name() {
++    return "JBolt.dumpgraph";
++  }
++  static const char* description() {
++    return "Dump count files to provide data for drawing a heap heat graph";
++  }
++  static const char* impact() {
++    return "Low";
++  }
++  static const JavaPermission permission() {
++    JavaPermission p = {"java.lang.management.ManagementPermission", "monitor", NULL};
++    return p;
++  }
++  static int num_arguments() {
++    return 0;
++  }
++ 
++  virtual void execute(DCmdSource source, TRAPS);
++  virtual void print_help(const char* name) const;
++};
++
++bool register_jbolt_dcmds();
++ 
++#endif // SHARE_JBOLT_JBOLTDCMDS_HPP
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltManager.cpp b/src/hotspot/share/jbolt/jBoltManager.cpp
+new file mode 100644
+index 000000000..13d8dec55
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltManager.cpp
+@@ -0,0 +1,1682 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include <stdio.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <time.h>
++
++#include "classfile/javaClasses.inline.hpp"
++#include "classfile/symbolTable.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/codeBlob.hpp"
++#include "code/codeCache.hpp"
++#include "compiler/compileBroker.hpp"
++#include "jbolt/jBoltCallGraph.hpp"
++#include "jbolt/jBoltControlThread.hpp"
++#include "jbolt/jBoltManager.hpp"
++#include "jbolt/jBoltUtils.inline.hpp"
++#include "jfr/jfr.hpp"
++#include "logging/log.hpp"
++#include "logging/logStream.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/method.inline.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/atomic.hpp"
++#include "runtime/globals_extension.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepointVerifiers.hpp"
++#include "runtime/sweeper.hpp"
++#include "utilities/formatBuffer.hpp"
++
++#define LINE_BUF_SIZE       8192    // used to parse JBolt order file
++#define MIN_FRAMESCOUNT     2       // used as default stacktrace depth
++#define ILL_NM_STATE        -2      // used to present nmethod illegal state
++#define PATH_LENGTH         256     // used to store path
++
++#define B_TF(b) (b ? "V" : "X")
++
++GrowableArray<JBoltMethodKey>* JBoltManager::_hot_methods_sorted = NULL;
++JBoltManager::MethodKeyMap* JBoltManager::_hot_methods_vis = NULL;
++int JBoltManager::_reorder_method_threshold_cnt = 0;
++
++volatile int JBoltManager::_reorder_phase = JBoltReorderPhase::Available;
++volatile int JBoltManager::_reorderable_method_cnt = 0;
++Method* volatile JBoltManager::_cur_reordering_method = NULL;
++
++Thread* JBoltManager::_start_reordering_thread = NULL;
++
++JBoltManager::StackFrameKeyMap* JBoltManager::_sampled_methods_refs = NULL;
++JBoltManager::MethodHotCountMap* JBoltManager::_sampled_methods_hotcount_stored = NULL;
++
++bool JBoltManager::_auto_mode = false;
++ 
++// swap between MethodJBoltHot and MethodJBoltTmp
++volatile int JBoltManager::_primary_hot_seg = CodeBlobType::MethodJBoltHot;
++volatile int JBoltManager::_secondary_hot_seg = CodeBlobType::MethodJBoltTmp;
++
++// used in Reordering phase, reset to ##false after swapping the hot codecache
++volatile bool JBoltManager::_hot_codecache_full = false;
++volatile bool JBoltManager::_force_sweep = false;
++
++GrowableArray<char*>* JBoltManager::_rescheduling_time = NULL;
++GrowableArray<JBoltFunc>* _order_stored = NULL;
++
++// This is a tmp obj used only in initialization phases.
++// We cannot alloc Symbol in phase 1 so we have to parses the order file again
++// in phase 2.
++// This obj will be freed after initialization.
++static FILE* _order_fp = NULL;
++
++static bool read_line(FILE* fp, char* buf, int buf_len, int* res_len) {
++  if (fgets(buf, buf_len, fp) == NULL) {
++    return false;
++  }
++  int len = (int) strcspn(buf, "\r\n");
++  buf[len] = '\0';
++  *res_len = len;
++  return true;
++}
++
++static bool read_a_size(char* buf, size_t* res) {
++  char* t = strchr(buf, ' ');
++  if (t == NULL) return false;
++  *t = '\0';
++  julong v;
++  if (!Arguments::atojulong(buf, &v)) {
++    *t = ' ';
++    return false;
++  }
++  *t = ' ';
++  *res = (size_t) v;
++  return true;
++}
++
++static void replace_all(char* s, char from, char to) {
++  char* begin = s;
++  while (true) {
++    char* t = strchr(begin, from);
++    if (t == NULL) {
++      break;
++    }
++    *t = to;
++    begin = t + 1;
++  }
++}
++
++JBoltMethodValue::~JBoltMethodValue() {
++  if (_comp_info != NULL) delete get_comp_info();
++}
++
++CompileTaskInfo* JBoltMethodValue::get_comp_info() {
++  return OrderAccess::load_acquire(&_comp_info);
++}
++
++bool JBoltMethodValue::set_comp_info(CompileTaskInfo* info) {
++  return Atomic::cmpxchg(info, &_comp_info, (CompileTaskInfo*) NULL) == NULL;
++}
++
++void JBoltMethodValue::clear_comp_info_but_not_release() {
++  OrderAccess::release_store(&_comp_info, (CompileTaskInfo*) NULL);
++}
++
++JBoltStackFrameValue::~JBoltStackFrameValue() {
++  if (_method_holder != NULL) {
++    if (JNIHandles::is_weak_global_handle(_method_holder)) {
++      JNIHandles::destroy_weak_global(_method_holder);
++    } else {
++      JNIHandles::destroy_global(_method_holder);
++    }
++  }
++}
++
++jobject JBoltStackFrameValue::get_method_holder() { return _method_holder; }
++
++void JBoltStackFrameValue::clear_method_holder_but_not_release() { _method_holder = NULL; }
++
++CompileTaskInfo::CompileTaskInfo(Method* method, int osr_bci, int comp_level, int comp_reason, Method* hot_method, int hot_cnt):
++        _method(method), _osr_bci(osr_bci), _comp_level(comp_level), _comp_reason(comp_reason), _hot_method(hot_method), _hot_count(hot_cnt) {
++  Thread* thread = Thread::current();
++
++  assert(_method != NULL, "sanity");
++  // _method_holder can be null for boot loader (the null loader)
++  _method_holder = JNIHandles::make_weak_global(Handle(thread, _method->method_holder()->klass_holder()));
++
++  if (_hot_method != NULL && _hot_method != _method) {
++    _hot_method_holder = JNIHandles::make_weak_global(Handle(thread, _hot_method->method_holder()->klass_holder()));
++  } else {
++    _hot_method_holder = NULL;
++  }
++}
++
++CompileTaskInfo::~CompileTaskInfo() {
++  if (_method_holder != NULL) {
++    if (JNIHandles::is_weak_global_handle(_method_holder)) {
++      JNIHandles::destroy_weak_global(_method_holder);
++    } else {
++      JNIHandles::destroy_global(_method_holder);
++    }
++  }
++  if (_hot_method_holder != NULL) {
++    if (JNIHandles::is_weak_global_handle(_hot_method_holder)) {
++      JNIHandles::destroy_weak_global(_hot_method_holder);
++    } else {
++      JNIHandles::destroy_global(_hot_method_holder);
++    }
++  }
++}
++
++/**
++ * Set the weak reference to strong reference if the method is not unloaded.
++ * It seems that the life cycle of Method is consistent with that of the Klass and CLD.
++ * @see CompileTask::select_for_compilation()
++ */
++bool CompileTaskInfo::try_select() {
++  NoSafepointVerifier nsv;
++  Thread* thread = Thread::current();
++  // is unloaded
++  if (_method_holder != NULL && JNIHandles::is_weak_global_handle(_method_holder) && JNIHandles::is_global_weak_cleared(_method_holder)) {
++    if (log_is_enabled(Debug, jbolt)) {
++      log_debug(jbolt)("Some method has been unloaded so skip reordering for it: p=%p.", _method);
++    }
++    return false;
++  }
++
++  assert(_method->method_holder()->is_loader_alive(), "should be alive");
++  Handle method_holder(thread, _method->method_holder()->klass_holder());
++  JNIHandles::destroy_weak_global(_method_holder);
++  _method_holder = JNIHandles::make_global(method_holder);
++
++  if (_hot_method_holder != NULL) {
++    Handle hot_method_holder(thread, _hot_method->method_holder()->klass_holder());
++    JNIHandles::destroy_weak_global(_hot_method_holder);
++    _hot_method_holder = JNIHandles::make_global(Handle(thread, _hot_method->method_holder()->klass_holder()));
++  }
++  return true;
++}
++
++static const char *method_type_to_string(u1 type) {
++  switch (type) {
++    case JfrStackFrame::FRAME_INTERPRETER:
++      return "Interpreted";
++    case JfrStackFrame::FRAME_JIT:
++      return "JIT compiled";
++    case JfrStackFrame::FRAME_INLINE:
++      return "Inlined";
++    case JfrStackFrame::FRAME_NATIVE:
++      return "Native";
++    default:
++      ShouldNotReachHere();
++      return "Unknown";
++  }
++}
++
++uintptr_t related_data_jbolt_log_do[] = {
++  (uintptr_t)in_bytes(JfrStackTrace::hash_offset()),
++  (uintptr_t)in_bytes(JfrStackTrace::id_offset()),
++  (uintptr_t)in_bytes(JfrStackTrace::hotcount_offset()),
++  (uintptr_t)in_bytes(JfrStackTrace::frames_offset()),
++  (uintptr_t)in_bytes(JfrStackTrace::frames_count_offset()),
++ 
++  (uintptr_t)in_bytes(JfrStackFrame::method_offset()),
++  (uintptr_t)in_bytes(JfrStackFrame::methodid_offset()),
++  (uintptr_t)in_bytes(JfrStackFrame::bci_offset()),
++  (uintptr_t)in_bytes(JfrStackFrame::type_offset()),
++ 
++  (uintptr_t)JBoltFunc::constructor,
++  (uintptr_t)JBoltFunc::copy_constructor,
++  (uintptr_t)JBoltCall::constructor,
++  (uintptr_t)JBoltCall::copy_constructor,
++  (uintptr_t)JBoltCallGraph::static_add_func,
++  (uintptr_t)JBoltCallGraph::static_add_call
++};
++
++/**
++ * Invoked in JfrStackTraceRepository::add_jbolt().
++ * Each time JFR record a valid stacktrace, 
++ * we log a weak ptr of each unique method in _sampled_methods_refs. 
++ */
++void JBoltManager::log_stacktrace(const JfrStackTrace& stacktrace) {
++  Thread* thread = Thread::current();
++  HandleMark hm(thread);
++
++  const JfrStackFrame* frames = stacktrace.get_frames();
++  unsigned int framesCount = stacktrace.get_framesCount();
++
++  for (u4 i = 0; i < framesCount; ++i) {
++    const JfrStackFrame& frame = frames[i];
++
++    JBoltStackFrameKey stackframe_key(const_cast<Method*>(frame.get_method()), frame.get_methodId());
++
++    if (!_sampled_methods_refs->contains(stackframe_key)) {
++      jobject method_holder = JNIHandles::make_weak_global(Handle(thread, frame.get_method()->method_holder()->klass_holder()));
++      JBoltStackFrameValue stackframe_value(method_holder);
++      _sampled_methods_refs->put(stackframe_key, stackframe_value);
++      // put() transmits method_holder ownership to element in map
++      // set the method_holder to NULL in temp variable stackframe_value, to avoid double free
++      stackframe_value.clear_method_holder_but_not_release();
++    }
++  }
++}
++
++methodHandle JBoltManager::lookup_method(Method* method, traceid method_id) {
++  Thread* thread = Thread::current();
++  JBoltStackFrameKey stackframe_key(method, method_id);
++  JBoltStackFrameValue* stackframe_value = _sampled_methods_refs->get(stackframe_key);
++  if (stackframe_value == NULL) {
++    return methodHandle();
++  }
++
++  jobject method_holder = stackframe_value->get_method_holder();
++  if (method_holder != NULL && JNIHandles::is_weak_global_handle(method_holder) && JNIHandles::is_global_weak_cleared(method_holder)) {
++    log_debug(jbolt)("method at %p is unloaded", (void*)method);
++    return methodHandle();
++  }
++
++  const Method* const lookup_method = method;
++  if (lookup_method == NULL) {
++    // stacktrace obsolete
++    return methodHandle();
++  }
++  assert(lookup_method != NULL, "invariant");
++  methodHandle method_handle(thread, const_cast<Method*>(lookup_method));
++
++  return method_handle;
++}
++
++void JBoltManager::construct_stacktrace(const JfrStackTrace& stacktrace) {
++  NoSafepointVerifier nsv;
++  if (stacktrace.get_framesCount() < MIN_FRAMESCOUNT)
++    return;
++
++  u4 topFrameIndex = 0;
++  u4 max_frames = 0;
++
++  const JfrStackFrame* frames = stacktrace.get_frames();
++  unsigned int framesCount = stacktrace.get_framesCount();
++
++  // Native method subsidence
++  while (topFrameIndex < framesCount) { 
++    const JfrStackFrame& frame = frames[topFrameIndex];
++
++    if (strcmp(method_type_to_string(frame.get_type()), "Native") != 0) {
++      break;
++    }
++
++    topFrameIndex++;
++  }
++
++  if (framesCount - topFrameIndex < MIN_FRAMESCOUNT) {
++    return;
++  }
++
++  os::Linux::jboltLog_precalc(topFrameIndex, max_frames, framesCount);
++
++  JBoltFunc **tempfunc = NULL;
++
++  for (u4 i = 0; i < max_frames; ++i) {
++    const JfrStackFrame& frame = frames[topFrameIndex + i];
++
++    methodHandle method = lookup_method(const_cast<Method*>(frame.get_method()), frame.get_methodId());
++    if (method.is_null()) {
++      break;
++    }
++
++    if (i == 0) {
++        int hotcount = stacktrace.hotcount();
++        int* exist_hotcount = _sampled_methods_hotcount_stored->get(method());
++        if (exist_hotcount != NULL) {
++            hotcount += *exist_hotcount;
++        }
++        _sampled_methods_hotcount_stored->put(method(), hotcount);
++    }
++
++    const CompiledMethod* const compiled = method->code();
++
++    log_trace(jbolt)(
++      "Method id - %lu\n\tBytecode index - %d\n\tSignature - %s\n\tType - %s\n\tCompiler - %s\n\tCompile Level - %d\n\tSize - %dB\n",
++      frame.get_methodId(),
++      frame.get_byteCodeIndex(),
++      method->external_name(),
++      method_type_to_string(frame.get_type()),
++      compiled != NULL ? compiled->compiler_name() : "None",
++      compiled != NULL ? compiled->comp_level() : -1,
++      compiled != NULL ? compiled->size() : 0);
++
++    if (compiled == NULL) continue;
++
++    JBoltMethodKey method_key(method->constants()->pool_holder()->name(), method->name(), method->signature());
++    JBoltFunc* func = JBoltFunc::constructor(frame.get_method(), frame.get_methodId(), compiled->size(), method_key);
++    
++    if (!os::Linux::jboltLog_do(related_data_jbolt_log_do, (address)(const_cast<JfrStackTrace*>(&stacktrace)), i, compiled->comp_level(), (address)func, (address*)&tempfunc)) {
++      delete func;
++      func = NULL;
++      continue;
++    }
++  }
++
++  log_trace(jbolt)(
++    "StackTrace hash - %u hotcount - %u\n==============================\n", stacktrace.hash(), stacktrace.hotcount());
++}
++
++/**
++ * Invoked in JfrStackTraceRepository::write().
++ * Each time JfrChunkWrite do write and clear stacktrace table, 
++ * we update the CG by invoke construct_stacktrace().
++ */
++void JBoltManager::construct_cg_once() {
++  guarantee((UseJBolt && JBoltManager::reorder_phase_is_profiling_or_waiting()), "sanity");
++ 
++  GrowableArray<JfrStackTrace*>* traces = create_growable_array<JfrStackTrace*>();
++ 
++  {
++    MutexLockerEx lock(JfrStacktrace_lock, Mutex::_no_safepoint_check_flag);
++    const JfrStackTraceRepository& repository = JfrStackTraceRepository::instance();
++ 
++    if (repository.get_entries_count_jbolt() == 0) {
++      return;
++    }
++ 
++    const JfrStackTrace* const * table = repository.get_stacktrace_table_jbolt();
++    for (uint i = 0; i < repository.TABLE_SIZE; ++i) {
++      for (const JfrStackTrace* trace = table[i]; trace != NULL; trace = trace->next()) {
++        traces->append(const_cast<JfrStackTrace*>(trace));
++      }
++    }
++  }
++  
++  for (int i = 0; i < traces->length(); ++i) {
++    construct_stacktrace(*(traces->at(i)));
++  }
++ 
++  log_trace(jbolt)(
++    "+++++++ one time log over ++++++\n\n");
++  delete traces;
++}
++
++static void write_order(const GrowableArray<JBoltFunc>* order, fileStream& fs) {
++  assert(order != NULL, "sanity");
++  const char* methodFlag = "M";
++  const char* segmentor = "C\n";
++ 
++  log_debug(jbolt)("+============================+\n\t\t\tORDER\n");
++ 
++  for (int i = 0; i < order->length(); ++i) {
++    const JBoltFunc& func = order->at(i);
++    if (func.method() == NULL) {
++      fs.write(segmentor, strlen(segmentor));
++      continue;
++    }
++ 
++    char* holder_name = func.method_key().klass()->as_C_string();
++    char* name = func.method_key().name()->as_C_string();
++    char* signature = func.method_key().sig()->as_C_string();
++    char size[LINE_BUF_SIZE] = {0};
++    snprintf(size, sizeof(size), "%d", func.size());
++ 
++    log_debug(jbolt)("order %d --- Method - %s %s %s\n", i, holder_name, name, signature);
++ 
++    fs.write(methodFlag, strlen(methodFlag));
++    fs.write(" ", 1);
++    fs.write(size, strlen(size));
++    fs.write(" ", 1);
++    fs.write(holder_name, strlen(holder_name));
++    fs.write(" ", 1);
++    fs.write(name, strlen(name));
++    fs.write(" ", 1);
++    fs.write(signature, strlen(signature));
++    fs.write("\n", 1);
++  }
++}
++
++/**
++ * Invoked in before_exit().
++ * 
++ * Dump the order to JBoltOrderFile before vm exit.
++ */
++void JBoltManager::dump_order_in_manual() {
++  guarantee((UseJBolt && JBoltDumpMode), "sanity");
++  guarantee(reorder_phase_profiling_to_waiting(), "sanity");
++  NoSafepointVerifier nsv;
++  ResourceMark rm;
++  GrowableArray<JBoltFunc>* order = JBoltCallGraph::callgraph_instance().hfsort();
++
++  fileStream order_file(JBoltOrderFile, "w+");
++
++  if (JBoltOrderFile == NULL || !order_file.is_open()) {
++    log_error(jbolt)("JBoltOrderFile open error");
++    vm_exit_during_initialization("JBoltOrderFile open error");
++  }
++
++  write_order(order, order_file);
++
++  log_info(jbolt)("order generate successful !!");
++  log_debug(jbolt)("+============================+\n");  
++  delete order;
++  delete _sampled_methods_refs;
++  _sampled_methods_refs = NULL;
++  JBoltCallGraph::deinitialize();
++}
++
++JBoltErrorCode JBoltManager::dump_order_in_jcmd(const char* filename) {
++  guarantee(UseJBolt, "sanity");
++  NoSafepointVerifier nsv;
++  ResourceMark rm;
++ 
++  if (_order_stored == NULL) return JBoltOrderNULL;
++ 
++  fileStream order_file(filename, "w+");
++ 
++  if (filename == NULL || !order_file.is_open()) return JBoltOpenFileError;
++ 
++  write_order(_order_stored, order_file);
++ 
++  return JBoltOK;
++}
++
++#define check_arg_not_set(flag)                                                                           \
++do {                                                                                                      \
++    if (FLAG_IS_CMDLINE(flag)) {                                                                          \
++        vm_exit_during_initialization(err_msg("Do not set VM option " #flag " without UseJBolt enabled.")); \
++    }                                                                                                     \
++} while(0)
++
++/**
++ * Do not set the JBolt-related flags manually if UseJBolt is not enabled.
++ */
++void JBoltManager::check_arguments_not_set() {
++  if (UseJBolt) return;
++
++  check_arg_not_set(JBoltDumpMode);
++  check_arg_not_set(JBoltLoadMode);
++  check_arg_not_set(JBoltOrderFile);
++  check_arg_not_set(JBoltSampleInterval);
++  check_arg_not_set(JBoltCodeHeapSize);
++  check_arg_not_set(JBoltRescheduling);
++  check_arg_not_set(JBoltReorderThreshold);
++  check_arg_not_set(EnableDumpGraph);
++}
++
++/**
++ * Check which mode is JBolt in.
++ * If JBoltDumpMode or JBoltLoadMode is set manually then do nothing, else it will be fully auto sched by JBolt itself.
++ */
++void JBoltManager::check_mode() {
++  if (!(JBoltDumpMode || JBoltLoadMode)) {
++    _auto_mode = true;
++    return;
++  }
++ 
++  if (!FLAG_IS_DEFAULT(JBoltSampleInterval)) {
++    log_warning(jbolt)("JBoltSampleInterval is ignored because it is not in auto mode.");
++  }
++
++  if (JBoltDumpMode && JBoltLoadMode) {
++    vm_exit_during_initialization("Do not set both JBoltDumpMode and JBoltLoadMode!");
++  }
++ 
++  guarantee((JBoltDumpMode ^ JBoltLoadMode), "Must set either JBoltDumpMode or JBoltLoadMode!");
++}
++ 
++/**
++ * If in auto mode, JBoltOrderFile will be ignored
++ * If in any manual mode, then JBoltOrderFile will be necessary.
++ * Check whether the order file exists or is accessable.
++ */
++void JBoltManager::check_order_file() {
++  if (auto_mode()) {
++    if (JBoltOrderFile != NULL) log_warning(jbolt)("JBoltOrderFile is ignored because it is in auto mode.");
++    return;
++  }
++
++  if (JBoltOrderFile == NULL) {
++    vm_exit_during_initialization("JBoltOrderFile is not set!");
++  }
++
++  bool file_exist = (::access(JBoltOrderFile, F_OK) == 0);
++  if (file_exist) {
++    if (JBoltDumpMode) {
++      log_warning(jbolt)("JBoltOrderFile to dump already exists and will be overwritten: file=%s.", JBoltOrderFile);
++      ::remove(JBoltOrderFile);
++    }
++  } else {
++    if (JBoltLoadMode) {
++      vm_exit_during_initialization(err_msg("JBoltOrderFile does not exist or cannot be accessed! file=\"%s\".", JBoltOrderFile));
++    }
++  }
++}
++
++void JBoltManager::check_dependency() {
++  if (FLAG_IS_CMDLINE(FlightRecorder) ? !FlightRecorder : false) {
++    vm_exit_during_initialization("JBolt depends on JFR!");
++  }
++
++  if (!CompilerConfig::is_c2_enabled()) {
++    vm_exit_during_initialization("JBolt depends on C2!");
++  }
++
++  if (!SegmentedCodeCache) {
++    vm_exit_during_initialization("JBolt depends on SegmentedCodeCache!");
++  }
++}
++
++size_t JBoltManager::calc_nmethod_size_with_padding(size_t nmethod_size) {
++  return align_up(nmethod_size, (size_t) CodeCacheSegmentSize);
++}
++
++size_t JBoltManager::calc_segment_size_with_padding(size_t segment_size) {
++  size_t page_size = CodeCache::page_size();
++  if (segment_size < page_size) return page_size;
++  return align_down(segment_size, page_size);
++}
++
++/**
++ * We have to parse the file twice because SymbolTable is not inited in phase 1...
++ */
++void JBoltManager::load_order_file_phase1(int* method_cnt, size_t* segment_size) {
++  assert(JBoltOrderFile != NULL, "sanity");
++
++  _order_fp = os::fopen(JBoltOrderFile, "r");
++  if (_order_fp == NULL) {
++    vm_exit_during_initialization(err_msg("Cannot open file JBoltOrderFile! file=\"%s\".", JBoltOrderFile));
++  }
++
++  int mth_cnt = 0;
++  size_t seg_size = 0;
++
++  char line[LINE_BUF_SIZE];
++  int len = -1;
++  while (read_line(_order_fp, line, sizeof(line), &len)) {
++    if (len <= 2) continue;
++    if (line[0] != 'M' || line[1] != ' ') continue;
++    char* left_start = line + 2;
++
++    // parse nmethod size
++    size_t nmethod_size;
++    if (!read_a_size(left_start, &nmethod_size)) {
++      vm_exit_during_initialization(err_msg("Wrong format of JBolt order line! line=\"%s\".", line));
++    }
++    ++mth_cnt;
++    seg_size += calc_nmethod_size_with_padding(nmethod_size);
++  }
++
++  *method_cnt = mth_cnt;
++  *segment_size = seg_size;
++  log_trace(jbolt)("Read order file method_cnt=%d, estimated_segment_size=" SIZE_FORMAT ".", mth_cnt, seg_size);
++}
++
++bool JBoltManager::parse_method_line_phase2(char* const line, const int len, TRAPS) {
++  // Skip "M ".
++  char* left_start = line + 2;
++
++  // Skip nmethod size (has parsed in phase1).
++  {
++    char* t = strchr(left_start, ' ');
++    if (t == NULL) return false;
++    left_start = t + 1;
++  }
++
++  // Modify "java.lang.Obj" to "java/lang/Obj".
++  replace_all(left_start, '.', '/');
++
++  // Parse the three symbols: class name, method name, signature.
++  Symbol* three_symbols[3];
++  for (int i = 0; i < 2; ++i) {
++    char* t = strchr(left_start, ' ');
++    if (t == NULL) return false;
++    Symbol* sym = SymbolTable::new_symbol(left_start, t - left_start, THREAD);
++    three_symbols[i] = sym;
++    left_start = t + 1;
++  }
++  Symbol* sym = SymbolTable::new_symbol(left_start, line + len - left_start, THREAD);
++  three_symbols[2] = sym;
++  if (log_is_enabled(Trace, jbolt)) {
++    log_trace(jbolt)("HotMethod init: key={%s %s %s}",
++                      three_symbols[0]->as_C_string(),
++                      three_symbols[1]->as_C_string(),
++                      three_symbols[2]->as_C_string());
++  }
++
++  // Add to data structure.
++  JBoltMethodKey method_key(three_symbols[0], three_symbols[1], three_symbols[2]);
++  _hot_methods_sorted->append(method_key);
++  JBoltMethodValue method_value;
++  bool put = _hot_methods_vis->put(method_key, method_value);
++  if (!put) {
++    vm_exit_during_initialization(err_msg("Duplicated method: {%s %s %s}!",
++            three_symbols[0]->as_C_string(),
++            three_symbols[1]->as_C_string(),
++            three_symbols[2]->as_C_string()));
++  }
++
++  return true;
++}
++
++bool JBoltManager::parse_connected_component_line_phase2(char* const line, const int len) { return true; }
++
++void JBoltManager::load_order_file_phase2(TRAPS) {
++  guarantee(_order_fp != NULL, "sanity");
++
++  // re-scan
++  fseek(_order_fp, 0, SEEK_SET);
++
++  char line[LINE_BUF_SIZE];
++  int len = -1;
++  while (read_line(_order_fp, line, sizeof(line), &len)) {
++    if (len <= 0) continue;
++    bool success = false;
++    switch (line[0]) {
++      case '#': success = true; break;  // ignore comments
++      case 'M': success = parse_method_line_phase2(line, len, THREAD); break;
++      case 'C': success = parse_connected_component_line_phase2(line, len); break;
++      default: break;
++    }
++    if (!success) {
++      vm_exit_during_initialization(err_msg("Wrong format of JBolt order line! line=\"%s\".", line));
++    }
++  }
++  fclose(_order_fp);
++  _order_fp = NULL;
++}
++
++void JBoltManager::init_load_mode_phase1() {
++  if (!(auto_mode() || JBoltLoadMode)) return;
++
++  if (auto_mode()) {
++    // auto mode has no order now, initialize as default.
++    _hot_methods_sorted = new (ResourceObj::C_HEAP, mtCompiler) GrowableArray<JBoltMethodKey>(1, mtCompiler);
++    _hot_methods_vis = new (ResourceObj::C_HEAP, mtCompiler) MethodKeyMap();
++    log_info(jbolt)("Default set JBoltCodeHeapSize=" UINTX_FORMAT " B (" UINTX_FORMAT " MB).", JBoltCodeHeapSize, JBoltCodeHeapSize / 1024 / 1024);
++    return;
++  }
++  guarantee(reorder_phase_available_to_collecting(), "sanity");
++  size_t total_nmethod_size = 0;
++  int method_cnt = 0;
++  load_order_file_phase1(&method_cnt, &total_nmethod_size);
++ 
++  _hot_methods_sorted = new (ResourceObj::C_HEAP, mtCompiler) GrowableArray<JBoltMethodKey>(method_cnt, mtCompiler);
++  _hot_methods_vis = new (ResourceObj::C_HEAP, mtCompiler) MethodKeyMap();
++
++  if (FLAG_IS_DEFAULT(JBoltCodeHeapSize)) {
++    FLAG_SET_ERGO(uintx, JBoltCodeHeapSize, calc_segment_size_with_padding(total_nmethod_size));
++    log_info(jbolt)("Auto set JBoltCodeHeapSize=" UINTX_FORMAT " B (" UINTX_FORMAT " MB).", JBoltCodeHeapSize, JBoltCodeHeapSize / 1024 / 1024);
++  }
++}
++
++void JBoltManager::init_load_mode_phase2(TRAPS) {
++  // Only manual load mode need load phase2
++  if (!JBoltLoadMode) return;
++
++  load_order_file_phase2(CHECK);
++  _reorderable_method_cnt = 0;
++  _reorder_method_threshold_cnt = _hot_methods_sorted->length() * JBoltReorderThreshold;
++}
++
++void JBoltManager::init_dump_mode_phase2(TRAPS) {
++  if (!(auto_mode() || JBoltDumpMode)) return;
++ 
++  JBoltCallGraph::initialize();
++  _sampled_methods_refs = new (ResourceObj::C_HEAP, mtTracing) StackFrameKeyMap();
++  _sampled_methods_hotcount_stored = new (ResourceObj::C_HEAP, mtTracing) MethodHotCountMap();
++ 
++  // JBolt will create a JFR by itself
++  // In auto mode, will stop in JBoltControlThread::start_thread() after JBoltSampleInterval.
++  // In manual dump mode, won't stop until program exit.
++  log_info(jbolt)("JBolt in dump mode now, start a JFR recording named \"jbolt-jfr\".");
++  bufferedStream output;
++  DCmd::parse_and_execute(DCmd_Source_Internal, &output, "JFR.start name=jbolt-jfr", ' ', THREAD);
++  if (HAS_PENDING_EXCEPTION) {
++    ResourceMark rm;
++    log_warning(jbolt)("unable to start jfr jbolt-jfr");
++    log_warning(jbolt)("exception type: %s", PENDING_EXCEPTION->klass()->external_name());
++    // don't unwind this exception
++    CLEAR_PENDING_EXCEPTION;
++  }
++}
++ 
++static void update_stored_order(const GrowableArray<JBoltFunc>* order) {
++  if (_order_stored != NULL) {
++    // use a tmp for releasing space to provent _order_stored from being a wild pointer
++    GrowableArray<JBoltFunc>* tmp = _order_stored;
++    _order_stored = NULL;
++    delete tmp;
++  }
++  _order_stored = new (ResourceObj::C_HEAP, mtTracing) GrowableArray<JBoltFunc>(order->length(), mtTracing);
++  _order_stored->appendAll(order);
++}
++ 
++static CompileTaskInfo* create_compile_task_info(const methodHandle& method) {
++    CompiledMethod* compiled = method->code();
++    if (compiled == NULL) {
++      log_trace(jbolt)("Recompilation Task init failed because of null nmethod. func: %s.", method->external_name());
++      return NULL;
++    }
++    int osr_bci = compiled->is_osr_method() ? compiled->osr_entry_bci() : InvocationEntryBci;
++    int comp_level = compiled->comp_level();
++    // comp_level adaptation for deoptmization
++    if (comp_level > CompLevel_simple && comp_level <= CompLevel_full_optimization) comp_level = CompLevel_full_optimization;
++    CompileTask::CompileReason comp_reason = CompileTask::Reason_Reorder;
++    CompileTaskInfo* ret = new CompileTaskInfo(method(), osr_bci, comp_level, (int)comp_reason,
++                                                       NULL, 0);
++    return ret;
++}
++ 
++/**
++ * This function is invoked by JBoltControlThread.
++ * Do initialization for converting dump mode to load mode.
++ */
++void JBoltManager::init_auto_transition(size_t* segment_size, TRAPS) {
++  guarantee(UseJBolt && auto_mode(), "sanity");
++  NoSafepointVerifier nsv;
++  ResourceMark rm;
++ 
++  GrowableArray<JBoltFunc>* order = JBoltCallGraph::callgraph_instance().hfsort();
++  update_stored_order(order);
++ 
++  size_t seg_size = 0;
++  for (int i = 0; i < order->length(); ++i) {
++    const JBoltFunc& func = order->at(i);
++    if (func.method() == NULL) {
++      continue;
++    }
++ 
++    methodHandle method = lookup_method(const_cast<Method*>(func.method()), func.method_id());
++    if (method.is_null()) {
++      continue;
++    }
++ 
++    CompileTaskInfo* cti = create_compile_task_info(method);
++    if (cti == NULL) {
++      continue;
++    }
++ 
++    JBoltMethodKey method_key = func.method_key();
++    JBoltMethodValue method_value;
++    if (!method_value.set_comp_info(cti)) {
++      delete cti;
++      continue;
++    }
++ 
++    seg_size += calc_nmethod_size_with_padding(func.size());
++    _hot_methods_sorted->append(method_key);
++    bool put = _hot_methods_vis->put(method_key, method_value);
++    if (!put) {
++      vm_exit_during_initialization(err_msg("Duplicated method: {%s %s %s}!",
++              method_key.klass()->as_C_string(),
++              method_key.name()->as_C_string(),
++              method_key.sig()->as_C_string()));
++    }
++    method_value.clear_comp_info_but_not_release();
++  }
++  log_info(jbolt)("order generate successful !!");
++  *segment_size = calc_segment_size_with_padding(seg_size);
++  delete order;
++}
++
++/**
++ * This function must be invoked after CompilerConfig::ergo_initialize() in Arguments::apply_ergo().
++ * This function must be invoked before CodeCache::initialize_heaps() in codeCache_init() in init_globals().
++ * Thread and SymbolTable is not inited now!
++ */
++void JBoltManager::init_phase1() {
++  if (!UseJBolt) return;
++  check_mode();
++  check_dependency();
++  check_order_file();
++  parse_rescheduling();
++
++  /* dump mode has nothing to do in phase1 */
++  init_load_mode_phase1();
++}
++
++void JBoltManager::init_phase2(TRAPS) {
++  if (!UseJBolt) return;
++
++  ResourceMark rm(THREAD);
++  init_dump_mode_phase2(CHECK);
++  init_load_mode_phase2(CHECK);
++ 
++  // Manual dump mode doesn't need JBoltControlThread, directly go to profiling phase
++  if (JBoltDumpMode) {
++    guarantee(JBoltManager::reorder_phase_available_to_profiling(), "sanity");
++    return;
++  }
++ 
++  JBoltControlThread::init(CHECK);
++  // Auto mode will start control thread earlier.
++  // Manual load mode start later in check_start_reordering()
++  if (auto_mode()) {
++    JBoltControlThread::start_thread(CHECK_AND_CLEAR);
++  }
++}
++
++/**
++ * Code heaps are initialized between init phase 1 and init phase 2.
++ */
++void JBoltManager::init_code_heaps(size_t non_nmethod_size, size_t profiled_size, size_t non_profiled_size, size_t cache_size, size_t alignment) {
++  assert(UseJBolt && !JBoltDumpMode, "sanity");
++  if(!is_aligned(JBoltCodeHeapSize, alignment)) {
++    vm_exit_during_initialization(err_msg("JBoltCodeHeapSize should be %ld aligned, please adjust", alignment));
++  }
++
++  size_t jbolt_hot_size     = JBoltCodeHeapSize;
++  size_t jbolt_tmp_size     = JBoltCodeHeapSize;
++  size_t jbolt_total_size   = jbolt_hot_size + jbolt_tmp_size;
++  if (non_profiled_size <= jbolt_total_size) {
++    vm_exit_during_initialization(err_msg(
++        "Not enough space in non-profiled code heap to split out JBolt heap(s): " SIZE_FORMAT "K <= " SIZE_FORMAT "K",
++        non_profiled_size/K, jbolt_total_size/K));
++  }
++  non_profiled_size -= jbolt_total_size;
++  non_profiled_size = align_down(non_profiled_size, alignment);
++  FLAG_SET_ERGO(uintx, NonProfiledCodeHeapSize, non_profiled_size);
++
++  ReservedCodeSpace rs = CodeCache::reserve_heap_memory(cache_size);
++  ReservedSpace non_nmethod_space, profiled_space, non_profiled_space, jbolt_hot_space, jbolt_tmp_space;
++
++  uintptr_t related_data_jbolt_heap_init[] = {
++    (uintptr_t)non_nmethod_size,
++    (uintptr_t)profiled_size,
++    (uintptr_t)non_profiled_size,
++    (uintptr_t)jbolt_hot_size,
++    (uintptr_t)jbolt_tmp_size,
++
++    (uintptr_t)ReservedSpace::static_first_part,
++    (uintptr_t)ReservedSpace::static_last_part
++  };
++
++  if (!os::Linux::jboltHeap_init(related_data_jbolt_heap_init, (address)&rs, (address)&non_nmethod_space, (address)&profiled_space, (address)&non_profiled_space, (address)&jbolt_hot_space, (address)&jbolt_tmp_space)) {
++    jbolt_hot_size = CodeCache::page_size();
++    jbolt_tmp_size = CodeCache::page_size();
++    non_profiled_size += (jbolt_total_size - 2 * CodeCache::page_size());
++    // Reserve one continuous chunk of memory for CodeHeaps and split it into
++    // parts for the individual heaps. The memory layout looks like this:
++    // ---------- high -----------
++    //    Non-profiled nmethods
++    //      JBolt tmp nmethods
++    //      JBolt hot nmethods
++    //      Profiled nmethods
++    //         Non-nmethods
++    // ---------- low ------------
++    non_nmethod_space   = rs.first_part(non_nmethod_size);
++    ReservedSpace r1    = rs.last_part(non_nmethod_size);
++    profiled_space      = r1.first_part(profiled_size);
++    ReservedSpace r2    = r1.last_part(profiled_size);
++    jbolt_hot_space     = r2.first_part(jbolt_hot_size);
++    ReservedSpace r3    = r2.last_part(jbolt_hot_size);
++    jbolt_tmp_space     = r3.first_part(jbolt_tmp_size);
++    non_profiled_space  = r3.last_part(jbolt_tmp_size);
++  }
++
++  CodeCache::add_heap(non_nmethod_space, "CodeHeap 'non-nmethods'", CodeBlobType::NonNMethod);
++  CodeCache::add_heap(profiled_space, "CodeHeap 'profiled nmethods'", CodeBlobType::MethodProfiled);
++  CodeCache::add_heap(non_profiled_space, "CodeHeap 'non-profiled nmethods'", CodeBlobType::MethodNonProfiled);
++  const char* no_space = NULL;
++  CodeCache::add_heap(jbolt_hot_space, "CodeHeap 'jbolt hot nmethods'", CodeBlobType::MethodJBoltHot);
++  if (jbolt_hot_size != jbolt_hot_space.size()) {
++    no_space = "hot";
++  }
++  CodeCache::add_heap(jbolt_tmp_space, "CodeHeap 'jbolt tmp nmethods'", CodeBlobType::MethodJBoltTmp);
++  if (jbolt_tmp_size != jbolt_tmp_space.size()) {
++    no_space = "tmp";
++  }
++  if (no_space != NULL) {
++    vm_exit_during_initialization(FormatBuffer<1024>(
++        "No enough space for JBolt %s heap: \n"
++        "Expect: cache_size=" SIZE_FORMAT "K, profiled_size=" SIZE_FORMAT "K, non_nmethod_size=" SIZE_FORMAT "K, jbolt_hot_size=" SIZE_FORMAT "K, non_profiled_size=" SIZE_FORMAT "K, jbolt_tmp_size=" SIZE_FORMAT "K\n"
++        "Actual: cache_size=" SIZE_FORMAT "K, profiled_size=" SIZE_FORMAT "K, non_nmethod_size=" SIZE_FORMAT "K, jbolt_hot_size=" SIZE_FORMAT "K, non_profiled_size=" SIZE_FORMAT "K, jbolt_tmp_size=" SIZE_FORMAT "K\n"
++        "alignment=" SIZE_FORMAT,
++        no_space,
++        cache_size/K, profiled_size/K,         non_nmethod_size/K,         jbolt_hot_size/K,         non_profiled_size/K,         jbolt_tmp_size/K,
++        rs.size()/K,  profiled_space.size()/K, non_nmethod_space.size()/K, jbolt_hot_space.size()/K, non_profiled_space.size()/K, jbolt_tmp_space.size()/K,
++        alignment));
++  }
++}
++
++int JBoltManager::reorder_phase() {
++  return OrderAccess::load_acquire(&_reorder_phase);
++}
++
++bool JBoltManager::reorder_phase_available_to_collecting() {
++  assert(!auto_mode(), "two-phase only");
++  return Atomic::cmpxchg(JBoltReorderPhase::Collecting, &_reorder_phase, JBoltReorderPhase::Available) == JBoltReorderPhase::Available;
++}
++ 
++bool JBoltManager::reorder_phase_collecting_to_reordering() {
++  assert(!auto_mode(), "two-phase only");
++  return Atomic::cmpxchg(JBoltReorderPhase::Reordering, &_reorder_phase, JBoltReorderPhase::Collecting) == JBoltReorderPhase::Collecting;
++}
++ 
++bool JBoltManager::reorder_phase_available_to_profiling() {
++  return Atomic::cmpxchg(JBoltReorderPhase::Profiling, &_reorder_phase, JBoltReorderPhase::Available) == JBoltReorderPhase::Available;
++}
++ 
++bool JBoltManager::reorder_phase_profiling_to_reordering() {
++  assert(auto_mode(), "one-phase only");
++  return Atomic::cmpxchg(JBoltReorderPhase::Reordering, &_reorder_phase, JBoltReorderPhase::Profiling) == JBoltReorderPhase::Profiling;
++}
++ 
++bool JBoltManager::reorder_phase_reordering_to_available() {
++  assert(auto_mode(), "one-phase only");
++  return Atomic::cmpxchg(JBoltReorderPhase::Available, &_reorder_phase, JBoltReorderPhase::Reordering) == JBoltReorderPhase::Reordering;
++}
++ 
++bool JBoltManager::reorder_phase_profiling_to_available() {
++  assert(auto_mode(), "one-phase only");
++  return Atomic::cmpxchg(JBoltReorderPhase::Available, &_reorder_phase, JBoltReorderPhase::Profiling) == JBoltReorderPhase::Profiling;
++}
++
++bool JBoltManager::reorder_phase_profiling_to_waiting() {
++  return Atomic::cmpxchg(JBoltReorderPhase::Waiting, &_reorder_phase, JBoltReorderPhase::Profiling) == JBoltReorderPhase::Profiling;
++}
++ 
++bool JBoltManager::reorder_phase_waiting_to_reordering() {
++  assert(auto_mode(), "one-phase only");
++  return Atomic::cmpxchg(JBoltReorderPhase::Reordering, &_reorder_phase, JBoltReorderPhase::Waiting) == JBoltReorderPhase::Waiting;
++}
++ 
++bool JBoltManager::reorder_phase_waiting_to_available() {
++  assert(auto_mode(), "one-phase only");
++  return Atomic::cmpxchg(JBoltReorderPhase::Available, &_reorder_phase, JBoltReorderPhase::Waiting) == JBoltReorderPhase::Waiting;
++}
++
++bool JBoltManager::reorder_phase_reordering_to_end() {
++  return Atomic::cmpxchg(JBoltReorderPhase::End, &_reorder_phase, JBoltReorderPhase::Reordering) == JBoltReorderPhase::Reordering;
++}
++
++bool JBoltManager::reorder_phase_is_waiting() {
++  return OrderAccess::load_acquire(&_reorder_phase) == JBoltReorderPhase::Waiting;
++}
++
++bool JBoltManager::reorder_phase_is_available() {
++  bool res = (OrderAccess::load_acquire(&_reorder_phase) == JBoltReorderPhase::Available);
++  assert(!res || auto_mode(), "one-phase only");
++  return res;
++}
++ 
++bool JBoltManager::reorder_phase_is_collecting() {
++  bool res = (OrderAccess::load_acquire(&_reorder_phase) == JBoltReorderPhase::Collecting);
++  assert(!res || !auto_mode(), "two-phase only");
++  return res;
++}
++ 
++bool JBoltManager::reorder_phase_is_profiling() {
++  bool res = (OrderAccess::load_acquire(&_reorder_phase) == JBoltReorderPhase::Profiling);
++  return res;
++}
++ 
++bool JBoltManager::reorder_phase_is_reordering() {
++  return OrderAccess::load_acquire(&_reorder_phase) == JBoltReorderPhase::Reordering;
++}
++
++bool JBoltManager::reorder_phase_is_profiling_or_waiting() {
++  int p = OrderAccess::load_acquire(&_reorder_phase);
++  return p == JBoltReorderPhase::Profiling || p == JBoltReorderPhase::Waiting;
++}
++
++bool JBoltManager::reorder_phase_is_collecting_or_reordering() {
++  int p = OrderAccess::load_acquire(&_reorder_phase);
++  assert(p != JBoltReorderPhase::Collecting || !auto_mode(), "two-phase only");
++  return p == JBoltReorderPhase::Collecting || p == JBoltReorderPhase::Reordering;
++}
++
++Method* JBoltManager::cur_reordering_method() {
++  return OrderAccess::load_acquire(&_cur_reordering_method);
++}
++
++void JBoltManager::set_cur_reordering_method(Method* method) {
++  OrderAccess::release_store(&_cur_reordering_method, method);
++}
++
++int JBoltManager::inc_reorderable_method_cnt() {
++  return Atomic::add(+1, &_reorderable_method_cnt);
++}
++
++bool JBoltManager::can_reorder_now() {
++  return OrderAccess::load_acquire(&_reorderable_method_cnt) >= _reorder_method_threshold_cnt;
++}
++
++bool JBoltManager::should_reorder_now() {
++  return OrderAccess::load_acquire(&_reorderable_method_cnt) == _reorder_method_threshold_cnt;
++}
++
++int JBoltManager::primary_hot_seg() {
++  return OrderAccess::load_acquire(&_primary_hot_seg);
++}
++
++int JBoltManager::secondary_hot_seg() {
++  return OrderAccess::load_acquire(&_secondary_hot_seg);
++}
++
++bool JBoltManager::force_sweep() {
++  return OrderAccess::load_acquire(&_force_sweep);
++}
++
++static bool is_valid_time(const char* timeStr) {
++  // hh:mm
++  if (strlen(timeStr) != 5) return false;
++
++  if (timeStr[2] != ':') return false;
++
++  if (timeStr[0] < '0' || timeStr[0] > '2') return false;
++  if (timeStr[1] < '0' || timeStr[1] > '9') return false;
++  if (timeStr[3] < '0' || timeStr[3] > '5') return false;
++  if (timeStr[4] < '0' || timeStr[4] > '9') return false;
++
++  int hour = (timeStr[0] - '0') * 10 + (timeStr[1] - '0');
++  int minute = (timeStr[3] - '0') * 10 + (timeStr[4] - '0');
++
++  if (hour < 0 || hour > 23 || minute < 0 || minute > 59) return false;
++
++  return true;
++}
++
++void JBoltManager::remove_duplicate_time(GrowableArray<char*>* times) {
++  for (int i = 0; i < times->length(); ++i) {
++    char* time = times->at(i);
++    bool exists = false;
++    for (int j = 0; j < _rescheduling_time->length(); ++j) {
++      char* uniqueTime = _rescheduling_time->at(j);
++      if (strcmp(time, uniqueTime) == 0) {
++        exists = true;
++        log_warning(jbolt)("time %s is duplicated in JBoltRescheduling", time);
++        break;
++      }
++    }
++    if (!exists) {
++      if (_rescheduling_time->length() >= 10) {
++        // support max 10 time to reschedule
++        log_warning(jbolt)("JBoltRescheduling support up to 10 time settings, any excess will be ignored.");
++        return;
++      }
++      log_trace(jbolt)("Set time trigger at %s", time);
++      _rescheduling_time->append(time);
++    }
++  }
++}
++
++static int time_comparator(char** time1, char** time2) { 
++  int hour1 = ((*time1)[0] - '0') * 10 + ((*time1)[1] - '0');
++  int minute1 = ((*time1)[3] - '0') * 10 + ((*time1)[4] - '0');
++  int hour2 = ((*time2)[0] - '0') * 10 + ((*time2)[1] - '0');
++  int minute2 = ((*time2)[3] - '0') * 10 + ((*time2)[4] - '0');
++    
++  if (hour1 == hour2) {
++      return (minute1 > minute2) ? 1 : ((minute1 == minute2) ? 0 : -1);
++  }
++  return (hour1 > hour2) ? 1 : ((hour1 == hour2) ? 0 : -1);
++}
++
++void JBoltManager::parse_rescheduling() {
++  if (!FLAG_IS_CMDLINE(JBoltRescheduling)) return;
++
++  if (JBoltRescheduling == NULL || strlen(JBoltRescheduling) == 0) {
++    vm_exit_during_initialization("JBoltRescheduling is set but is null");
++  }
++
++  const int buflen = 1024;
++  if (strlen(JBoltRescheduling) > buflen) {
++    vm_exit_during_initialization("JBoltRescheduling is too long");
++  }
++
++  if (!auto_mode()) {
++    log_warning(jbolt)("JBoltRescheduling is ignored because it is not in auto mode.");
++    return;
++  }
++
++  ResourceMark rm;
++  _rescheduling_time = new (ResourceObj::C_HEAP, mtTracing) GrowableArray<char*>(1, mtTracing);
++  GrowableArray<char*>* tmp_time = new (ResourceObj::C_HEAP, mtTracing) GrowableArray<char*>(1, mtTracing);
++
++  const char* rescheduling_str = JBoltRescheduling;
++  const char* start = rescheduling_str;
++  const char* end = strchr(rescheduling_str, ',');
++  char timeStr[buflen] = {0};
++
++  while (end != NULL) {
++    size_t len = (size_t)(end - start);
++    strncpy(timeStr, start, buflen);
++    timeStr[len] = '\0';
++
++    if (is_valid_time(timeStr)) {
++      tmp_time->append(strdup(timeStr));
++    }
++    else {
++      vm_exit_during_initialization(err_msg("Invalid time %s in JBoltRescheduling", timeStr));
++    }
++
++    start = end + 1;
++    end = strchr(start, ',');
++  }
++
++  if (*start != '\0') {
++    strncpy(timeStr, start, buflen);
++    timeStr[strlen(start)] = '\0';
++
++    if (is_valid_time(timeStr)) {
++      tmp_time->append(strdup(timeStr));
++    }
++    else {
++      vm_exit_during_initialization(err_msg("Invalid time %s in JBoltRescheduling", timeStr));
++    }
++  }
++
++  remove_duplicate_time(tmp_time);
++  _rescheduling_time->sort(&time_comparator);
++
++  delete tmp_time;
++}
++
++GrowableArray<char*>* JBoltManager::rescheduling_time() {
++  return _rescheduling_time;
++}
++
++int JBoltManager::clear_manager() {
++  /* _hot_methods_sorted, _hot_methods_vis and _sampled_methods_refs have been cleared in other pos, don't delete again */
++  guarantee(_hot_methods_sorted == NULL, "sanity");
++  guarantee(_hot_methods_vis == NULL, "sanity");
++  guarantee(_sampled_methods_refs == NULL, "sanity");
++  // Re-allocate them
++  _hot_methods_sorted = new (ResourceObj::C_HEAP, mtCompiler) GrowableArray<JBoltMethodKey>(1, mtCompiler);
++  _hot_methods_vis = new (ResourceObj::C_HEAP, mtCompiler) MethodKeyMap();
++  _sampled_methods_refs = new (ResourceObj::C_HEAP, mtTracing) StackFrameKeyMap();
++
++  if (_sampled_methods_hotcount_stored != NULL) {
++    MethodHotCountMap* tmp = _sampled_methods_hotcount_stored;
++    _sampled_methods_hotcount_stored = NULL;
++    delete tmp;
++  }
++  _sampled_methods_hotcount_stored = new (ResourceObj::C_HEAP, mtTracing) MethodHotCountMap();
++
++  return 0;
++}
++ 
++/**
++ * Invoked in JBoltControlThread::prev_control_schedule().
++ * Expect to only execute in auto mode while JBolt.start triggered.
++ * Clear JBolt related data structures to restore a initial env same as sample never happening.
++*/
++int JBoltManager::clear_last_sample_datas() {
++  int ret = 0;
++  // Clear _table_jbolt in JfrStackTraceRepository
++  ret = JfrStackTraceRepository::clear_jbolt();
++  // Clear JBoltCallGraph
++  ret = JBoltCallGraph::callgraph_instance().clear_instance();
++  // Clear JBoltManager
++  ret = clear_manager();
++ 
++  return ret;
++}
++ 
++/**
++ * Invoked in JBoltControlThread::prev_control_schedule().
++ * Swap primary hot segment with secondary hot segment
++ */
++void JBoltManager::swap_semi_jbolt_segs() {
++  guarantee(reorder_phase_is_waiting(), "swap must happen in reorder phase Profiling.");
++  int tmp = Atomic::xchg(OrderAccess::load_acquire(&_primary_hot_seg), &_secondary_hot_seg);
++  Atomic::xchg(tmp, &_primary_hot_seg);
++  OrderAccess::release_store(&_hot_codecache_full, false);
++}
++ 
++/**
++ * Invoked in JBoltControlThread::post_control_schdule().
++ * Free scondary hot segment space for next reorder.
++ */
++void JBoltManager::clear_secondary_hot_seg(TRAPS) {
++  guarantee(reorder_phase_is_available(), "secondary clear must happen in reorder phase Available.");
++  // scan secondary hot seg and recompile alive nmethods to non-profiled
++  ResourceMark rm(THREAD);
++  // We cannot alloc weak handle within CodeCache_lock because of the mutex rank check.
++  // So instead we keep the methods alive only within the scope of this method.
++  JBoltUtils::MetaDataKeepAliveMark mdm(THREAD);
++  const GrowableArray<Metadata*>& to_recompile = mdm.kept();
++ 
++  {
++    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
++    CodeHeap* sec_hot = CodeCache::get_code_heap(secondary_hot_seg());
++    for (CodeBlob* cb = (CodeBlob*) sec_hot->first(); cb != NULL; cb = (CodeBlob*) sec_hot->next(cb)) {
++      nmethod* nm = cb->as_nmethod_or_null();
++      Method* m = nm->method();
++      if (nm && nm->get_state() == CompiledMethod::in_use && m != NULL) {
++        mdm.add(m);
++      }
++    }
++  }
++ 
++  for (int i = 0; i < to_recompile.length(); ++i) {
++    Method* m = (Method*) to_recompile.at(i);
++    methodHandle method(THREAD, m);
++    CompileTaskInfo* cti = create_compile_task_info(method);
++    if (cti == NULL) continue;
++    guarantee(cti->try_select(), "method is on stack, should be ok");
++    assert(cti->hot_method() == NULL, "sanity");
++    methodHandle hot_method;
++ 
++    bool recompile_result = enqueue_recompile_task(cti, method, hot_method, THREAD);
++    if(recompile_result) {
++      check_compiled_result(method(), CodeBlobType::MethodNonProfiled, THREAD);
++    }
++    delete cti;
++  }
++
++  OrderAccess::release_store(&_force_sweep, true);
++  // need 2 cleaning passes before not_entrant converting to zombie, @see nmethod::mark_as_seen_on_stack
++  NMethodSweeper::force_sweep();
++  NMethodSweeper::force_sweep();
++  // this time sweep converting to zombie
++  NMethodSweeper::force_sweep();
++  // this time sweep cleaning zombie
++  NMethodSweeper::force_sweep();
++  OrderAccess::release_store(&_force_sweep, false);
++  log_info(jbolt)("Sweep secondary codecache: %s", CodeCache::get_code_heap_name(JBoltManager::secondary_hot_seg()));
++  print_code_heaps();
++}
++
++/**
++ * Invoked in ciEnv::register_method() in CompilerThread.
++ * Controls where the new nmethod should be allocated.
++ *
++ * Returns CodeBlobType::All if it is not determined by JBolt logic.
++ */
++int JBoltManager::calc_code_blob_type(Method* method, CompileTask* task, TRAPS) {
++  assert(UseJBolt && reorder_phase_is_collecting_or_reordering(), "sanity");
++  const int not_care = CodeBlobType::All;
++
++  // Only cares about non-profiled segment.
++  int lvl = task->comp_level();
++  if (lvl != CompLevel_full_optimization && lvl != CompLevel_simple) {
++    return not_care;
++  }
++
++  // Ignore on-stack-replacement.
++  if (task->osr_bci() != InvocationEntryBci) {
++    return not_care;
++  }
++
++  int cur_reorder_phase = reorder_phase();
++  // Do nothing after reordering.
++  if (cur_reorder_phase != JBoltReorderPhase::Collecting && cur_reorder_phase != JBoltReorderPhase::Reordering) {
++    return not_care;
++  }
++  // Only cares about the current reordering method.
++  if (cur_reorder_phase == JBoltReorderPhase::Reordering) {
++    if (cur_reordering_method() == method) {
++      log_trace(jbolt)("Compiling to JBolt heap: method=%s.", method->name_and_sig_as_C_string());
++      return primary_hot_seg();
++    }
++    return not_care;
++  }
++  guarantee(cur_reorder_phase == JBoltReorderPhase::Collecting, "sanity");
++  assert(!auto_mode(), "sanity");
++
++  JBoltMethodKey method_key(method);
++  JBoltMethodValue* method_value = _hot_methods_vis->get(method_key);
++  if (method_value == NULL) {
++    return not_care;
++  }
++
++  // Register the method and the compile task.
++  if (method_value->get_comp_info() == NULL) {
++    CompileTaskInfo* cti = new CompileTaskInfo(method, task->osr_bci(), task->comp_level(), (int) task->compile_reason(),
++                                                       task->hot_method(), task->hot_count());
++    if (method_value->set_comp_info(cti)) {
++      int cnt = inc_reorderable_method_cnt();
++      log_trace(jbolt)("Reorderable method found: cnt=%d, lvl=%d, p=%p, method=%s.",
++              cnt, task->comp_level(), method, method->name_and_sig_as_C_string());
++      if (is_power_of_2(_reorder_method_threshold_cnt - cnt)) {
++        log_info(jbolt)("Reorderable cnt: %d/%d/%d", cnt, _reorder_method_threshold_cnt, _hot_methods_sorted->length());
++      }
++      if (cnt == _reorder_method_threshold_cnt) {
++        log_info(jbolt)("Time to reorder: %d/%d/%d", cnt, _reorder_method_threshold_cnt, _hot_methods_sorted->length());
++        _start_reordering_thread = THREAD;
++      }
++    } else {
++      delete cti;
++    }
++  }
++
++  return secondary_hot_seg();
++}
++
++/*
++ * Invoked in CodeCache::allocate()
++ * set _hot_codecache_full to stop recompilation early
++ */
++void JBoltManager::handle_full_jbolt_code_cache() {
++  log_warning(jbolt)("%s is full, will stop recompilation", CodeCache::get_code_heap_name(primary_hot_seg()));
++  OrderAccess::release_store(&_hot_codecache_full, true);
++}
++
++/**
++ * Check if reordering should start.
++ * The reordering should only start once (for now).
++ * We don't do this check in "if (cnt == _reorder_method_threshold_cnt)" in calc_code_blob_type()
++ * because it will cause an assert error: "Possible safepoint reached by thread that does not allow it".
++ */
++void JBoltManager::check_start_reordering(TRAPS) {
++  // _start_reordering_thread is set and tested in the same thread. No need to be atomic.
++  if (_start_reordering_thread == THREAD) {
++    _start_reordering_thread = NULL;
++    if (JBoltControlThread::get_thread() == NULL) {
++      assert(can_reorder_now(), "sanity");
++      log_info(jbolt)("Starting JBoltControlThread to reorder.");
++      JBoltControlThread::start_thread(CHECK_AND_CLEAR);
++    }
++  }
++}
++
++/**
++ * The task will be added to the compile queue and be compiled just like other tasks.
++ */
++CompileTask* JBoltManager::create_a_task_instance(CompileTaskInfo* cti, const methodHandle& method, const methodHandle& hot_method, TRAPS) {
++  int osr_bci = cti->osr_bci();
++  int comp_level = cti->comp_level();
++  CompileTask::CompileReason comp_reason = (CompileTask::CompileReason) cti->comp_reason();
++  int hot_count = cti->hot_count();
++  bool is_blocking = true;
++
++  // init a task (@see CompileBroker::create_compile_task())
++  CompileTask* task = CompileTask::allocate();
++  int compile_id = CompileBroker::assign_compile_id(method, osr_bci);
++  task->initialize(compile_id, method, osr_bci, comp_level,
++                  hot_method, hot_count, comp_reason,
++                  is_blocking);
++  return task;
++}
++
++/**
++ * Print the failure reason if something is wrong in recompilation.
++ */
++bool JBoltManager::check_compiled_result(Method* method, int check_blob_type, TRAPS) {
++  CompiledMethod* cm = method->code();
++  if (cm == NULL) {
++    log_trace(jbolt)("Recompilation failed because of null nmethod. method=%s", method->name_and_sig_as_C_string());
++    return false;
++  }
++  nmethod* nm = cm->as_nmethod_or_null();
++  if (nm == NULL) {
++    log_trace(jbolt)("Recompilation failed because the code is not a nmethod. method=%s", method->name_and_sig_as_C_string());
++    return false;
++  }
++  int code_blob_type = CodeCache::get_code_blob_type(nm);
++  if (code_blob_type != check_blob_type) {
++    log_trace(jbolt)("Recompilation failed because the nmethod is not in heap [%s]: it's in [%s]. method=%s",
++                        CodeCache::get_code_heap_name(check_blob_type), CodeCache::get_code_heap_name(code_blob_type), method->name_and_sig_as_C_string());
++    return false;
++  }
++  log_trace(jbolt)("Recompilation good: code=%p, size=%d, method=%s, heap=%s.",
++                    nm, nm->size(), method->name_and_sig_as_C_string(), CodeCache::get_code_heap_name(check_blob_type));
++  return true;
++}
++
++/**
++ * Create the compile task instance and enqueue into compile queue
++ */
++bool JBoltManager::enqueue_recompile_task(CompileTaskInfo* cti, const methodHandle& method, const methodHandle& hot_method, TRAPS) {
++  CompileTask* task = NULL;
++  CompileQueue* queue = CompileBroker::compile_queue(cti->comp_level());
++  { MutexLocker locker(MethodCompileQueue_lock, THREAD);
++    if (CompileBroker::compilation_is_in_queue(method)) {
++      log_trace(jbolt)("JBOLT won't compile as \"compilation is in queue\": method=%s.", method->name_and_sig_as_C_string());
++      return false;
++    }
++ 
++    task = create_a_task_instance(cti, method, hot_method, CHECK_AND_CLEAR_false);
++    if (task == NULL) {
++      log_trace(jbolt)("JBOLT won't compile as \"task instance is NULL\": method=%s.", method->name_and_sig_as_C_string());
++      return false;
++    }
++    queue->add(task);
++  }
++ 
++  // Same waiting logic as CompileBroker::wait_for_completion().
++  { MonitorLocker ml(task->lock(), THREAD);
++    while (!task->is_complete() && !CompileBroker::is_compilation_disabled_forever()) {
++      ml.wait();
++    }
++  }
++ 
++  CompileBroker::wait_for_completion(task);
++  task = NULL; // freed
++  return true;
++}
++ 
++/**
++ * Recompilation is to move the nmethod to _primary_hot_seg.
++ */
++bool JBoltManager::recompile_one(CompileTaskInfo* cti, const methodHandle& method, const methodHandle& hot_method, TRAPS) {
++  ResourceMark rm(THREAD);
++
++  if (cti->osr_bci() != InvocationEntryBci) {
++    log_trace(jbolt)("We don't handle on-stack-replacement nmethods: method=%s.", method->name_and_sig_as_C_string());
++    return false;
++  }
++
++  if (log_is_enabled(Trace, jbolt)) {
++    const char* heap_name = NULL;
++    CompiledMethod* cm = method->code();
++    if (cm == NULL) heap_name = "<null>";
++    else if (!cm->is_nmethod()) heap_name = "<not-nmethod>";
++    else heap_name = CodeCache::get_code_heap_name(CodeCache::get_code_blob_type(cm));
++    log_trace(jbolt)("Start to recompile & reorder: heap=%s, method=%s.", heap_name, method->name_and_sig_as_C_string());
++  }
++
++  // Add a compilation task.
++  set_cur_reordering_method(method());
++  bool ret = enqueue_recompile_task(cti, method, hot_method, CHECK_AND_CLEAR_false);
++  ret = ret && check_compiled_result(method(), primary_hot_seg(), CHECK_AND_CLEAR_false);
++  return ret;
++}
++
++/**
++ * This method is invoked in a new thread JBoltControlThread.
++ * Recompiles the methods in the order list one by one (serially) based on the hot order.
++ * The methods to recompile were almost all in MethodJBoltTmp, and will in install in
++ * MethodJBoltHot after recompilation.
++ */
++void JBoltManager::reorder_all_methods(TRAPS) {
++  guarantee(UseJBolt && reorder_phase_is_reordering(), "sanity");
++  log_info(jbolt)("Start to reorder!");
++  print_code_heaps();
++
++  ResourceMark rm(THREAD);
++  for (int i = 0; i < _hot_methods_sorted->length(); ++i) {
++    JBoltMethodKey k = _hot_methods_sorted->at(i);
++    JBoltMethodValue* v = _hot_methods_vis->get(k);
++    if (v == NULL) continue;
++    CompileTaskInfo* cti = v->get_comp_info();
++    if (cti == NULL) continue;
++    if (!cti->try_select()) continue;
++
++    methodHandle method(THREAD, cti->method());
++    methodHandle hot_method(THREAD, cti->hot_method());
++
++    if (!recompile_one(cti, method, hot_method, THREAD) && OrderAccess::load_acquire(&_hot_codecache_full)) {
++      // JBolt codecache is full, stop early
++      break;
++    }
++    if (HAS_PENDING_EXCEPTION) {
++      Handle ex(THREAD, PENDING_EXCEPTION);
++      CLEAR_PENDING_EXCEPTION;
++      LogTarget(Warning, jbolt) lt;
++      if (lt.is_enabled()) {
++        LogStream ls(lt);
++        ls.print("Failed to recompile the method: %s.", method->name_and_sig_as_C_string());
++        java_lang_Throwable::print(ex(), &ls);
++      }
++    }
++  }
++
++  log_info(jbolt)("JBolt reordering succeeds.");
++  print_code_heaps();
++
++}
++ 
++void JBoltManager::clear_structures() {
++  delete _sampled_methods_refs;
++  _sampled_methods_refs = NULL;
++  JBoltCallGraph::deinitialize();
++  set_cur_reordering_method(NULL);
++  delete _hot_methods_sorted;
++  _hot_methods_sorted = NULL;
++  delete _hot_methods_vis;
++  _hot_methods_vis = NULL;
++}
++
++void JBoltManager::print_code_heap(outputStream& ls, CodeHeap* heap, const char* name) {
++  for (CodeBlob* cb = (CodeBlob*) heap->first(); cb != NULL; cb = (CodeBlob*) heap->next(cb)) {
++    nmethod* nm = cb->as_nmethod_or_null();
++    Method* m = nm != NULL ? nm->method() : NULL;
++    ls.print_cr("%s %p %d alive=%s, zombie=%s, nmethod=%s, entrant=%s, name=[%s %s %s]",
++                                name,
++                                cb, cb->size(),
++                                B_TF(cb->is_alive()),
++                                B_TF(cb->is_zombie()),
++                                B_TF(cb->is_nmethod()),
++                                nm ? B_TF(!nm->is_not_entrant()) : "?",
++                                m ? m->method_holder()->name()->as_C_string() : cb->name(),
++                                m ? m->name()->as_C_string() : NULL,
++                                m ? m->signature()->as_C_string() : NULL);
++  }
++}
++
++void JBoltManager::print_code_heaps() {
++  {
++    LogTarget(Debug, jbolt) lt;
++    if (!lt.is_enabled()) return;
++    LogStream ls(lt);
++    MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
++    CodeCache::print_summary(&ls, true);
++  }
++
++  {
++    LogTarget(Trace, jbolt) lt;
++    if (!lt.is_enabled()) return;
++    LogStream ls(lt);
++    CodeHeap* hot_heap = CodeCache::get_code_heap(CodeBlobType::MethodJBoltHot);
++    CodeHeap* tmp_heap = CodeCache::get_code_heap(CodeBlobType::MethodJBoltTmp);
++
++    ResourceMark rm;
++    if (hot_heap == NULL) {
++      ls.print_cr("The jbolt hot heap is null.");
++    } else {
++      print_code_heap(ls, hot_heap, "hot");
++    }
++    if (tmp_heap == NULL) {
++      ls.print_cr("The jbolt tmp heap is null.");
++    } else {
++      print_code_heap(ls, tmp_heap, "tmp");
++    }
++  }
++}
++
++void JBoltManager::dump_nmethod_count(fileStream& file, nmethod* nm, CodeBlob* cb) {
++  int hotcount = 0;
++  if (cb->is_alive() && !nm->is_not_entrant() && _sampled_methods_hotcount_stored->get(nm->method()) != NULL) {
++      hotcount = *(_sampled_methods_hotcount_stored->get(nm->method()));
++  }
++  file.print_cr("  sample count: %d", hotcount);
++}
++
++void JBoltManager::dump_code_heap_with_count(const char* filename, CodeHeap* heap) {
++  if (heap == NULL) return;
++
++  fileStream invocation_count_file(filename, "w+");
++  uint64_t total = 0;
++  if (invocation_count_file.is_open()) {
++    invocation_count_file.print("%s:", heap->name());
++    invocation_count_file.print_cr(" size=" SIZE_FORMAT "Kb used=" SIZE_FORMAT
++                  "Kb max_used=" SIZE_FORMAT "Kb free=" SIZE_FORMAT "Kb",
++                  (size_t)(heap->high_boundary() - heap->low_boundary())/K, (size_t)(heap->high_boundary() - heap->low_boundary() - heap->unallocated_capacity())/K,
++                  heap->max_allocated_capacity()/K, heap->unallocated_capacity()/K);
++    invocation_count_file.print_cr(" bounds [" INTPTR_FORMAT ", " INTPTR_FORMAT ", " INTPTR_FORMAT "]",
++                    p2i(heap->low_boundary()),
++                    p2i(heap->high()),
++                    p2i(heap->high_boundary()));
++    for (CodeBlob* cb = (CodeBlob*) heap->first(); cb != NULL; cb = (CodeBlob*) heap->next(cb)) {
++      nmethod* nm = cb->as_nmethod_or_null();
++      invocation_count_file.print_cr("###%lu %s size=%dB %p %p %p state=%d name=%s alive=%s nmethod=%s use=%s entrant=%s zombie=%s level=%d code=%p",
++                                  total++,
++                                  "np",
++                                  cb->size(),
++                                  cb, cb->code_begin(), cb->data_end(),
++                                  nm ? nm->get_state() : ILL_NM_STATE,
++                                  (nm && nm->method()) ? nm->method()->name_and_sig_as_C_string() : "NULL",
++                                  B_TF(cb->is_alive()),
++                                  B_TF(cb->is_nmethod()),
++                                  nm ? B_TF(nm->is_in_use()) : "?",
++                                  nm ? B_TF(!nm->is_not_entrant()) : "?",
++                                  nm ? B_TF(nm->is_zombie()) : "?",
++                                  nm ? nm->comp_level() : -1,
++                                  (nm && nm->method()) ? nm->method()->code() : 0);
++      if (nm && nm->method()) {
++        dump_nmethod_count(invocation_count_file, nm, cb);
++      }
++    }
++  }
++  else {
++    log_info(jbolt)("%s open error\n", filename);
++  }
++}
++
++void JBoltManager::dump_code_heaps_with_count() {
++  if (!EnableDumpGraph) {
++    ShouldNotReachHere();
++    return;
++  }
++
++  MutexLockerEx mu(CodeCache_lock, Mutex::_no_safepoint_check_flag);
++  CodeHeap* np_heap = CodeCache::get_code_heap(CodeBlobType::MethodNonProfiled);
++  CodeHeap* hot_heap = (UseJBolt && !JBoltDumpMode) ? CodeCache::get_code_heap(CodeBlobType::MethodJBoltHot) : NULL;
++  CodeHeap* tmp_heap = (UseJBolt && !JBoltDumpMode) ? CodeCache::get_code_heap(CodeBlobType::MethodJBoltTmp) : NULL;
++ 
++  ResourceMark rm;
++  time_t current_time;
++  struct tm p;
++  char oldpath[PATH_LENGTH];
++  char dirname[PATH_LENGTH];
++
++  time(&current_time);
++  localtime_r(&current_time, &p);
++  sprintf(dirname, "JBOLT.%d.%d.%d.%02d:%02d:%02d",1900+p.tm_year,1+p.tm_mon,p.tm_mday,p.tm_hour,p.tm_min,p.tm_sec);  
++  
++  mkdir(dirname, S_IRWXU|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH);
++  if (getcwd(oldpath, PATH_LENGTH) != NULL) {
++    if (chdir(dirname) == OS_ERR) {
++        warning("Can't change to directory %s", dirname);
++        return;
++    }
++    dump_code_heap_with_count("count_np.txt", np_heap);
++    dump_code_heap_with_count("count_hot.txt", hot_heap);
++    dump_code_heap_with_count("count_tmp.txt", tmp_heap);
++    if (chdir(oldpath) == OS_ERR) {
++        warning("Can't change to directory %s", oldpath);
++    }
++  }
++}
++
++#undef B_TF
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltManager.hpp b/src/hotspot/share/jbolt/jBoltManager.hpp
+new file mode 100644
+index 000000000..aeb3e1f4f
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltManager.hpp
+@@ -0,0 +1,347 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef SHARE_JBOLT_JBOLTMANAGER_HPP
++#define SHARE_JBOLT_JBOLTMANAGER_HPP
++
++#include "compiler/compileTask.hpp"
++#include "jbolt/jbolt_globals.hpp"
++#include "jfr/recorder/stacktrace/jfrStackTraceRepository.hpp"
++#include "jfr/dcmd/jfrDcmds.hpp"
++#include "memory/allocation.hpp"
++#include "memory/heap.hpp"
++#include "oops/method.hpp"
++#include "oops/symbol.hpp"
++#include "runtime/handles.hpp"
++#include "runtime/thread.hpp"
++#include "utilities/growableArray.hpp"
++#include "utilities/resourceHash.hpp"
++
++enum JBoltErrorCode {
++  JBoltOK = 0,
++  JBoltOrderNULL = 1,
++  JBoltOpenFileError = 2
++};
++
++struct JBoltReorderPhase {
++  static const int Waiting    = -1; // JBolt logic is waiting for something to be done.
++  static const int Available  = 0;  // JBolt logic is not working or is done (can be reordered again now).
++  static const int Collecting = 1;  // Collecting methods in the order file (this phase is for two-phase only).
++  static const int Profiling  = 2;  // JFR is working (this phase is for one-phase only).
++  static const int Reordering = 3;  // Recompiling and re-laying.
++  static const int End        = 4;  // JBolt is not available anymore (for two-phase, or error happened on one-phase).
++};
++
++class CompileTaskInfo : public CHeapObj<mtCompiler> {
++  Method* const _method;
++  jobject _method_holder;
++  const int _osr_bci;
++  const int _comp_level;
++  const int _comp_reason;
++  Method* const _hot_method;
++  jobject _hot_method_holder;
++  const int _hot_count;
++
++public:
++  CompileTaskInfo(Method* method, int osr_bci, int comp_level, int comp_reason, Method* hot_method, int hot_cnt);
++  ~CompileTaskInfo();
++
++  bool try_select();
++
++  Method* method() const { return _method; }
++  int osr_bci() const { return _osr_bci; }
++  int comp_level() const { return _comp_level; }
++  int comp_reason() const { return _comp_reason; }
++  Method* hot_method() const { return _hot_method; }
++  int hot_count() const { return _hot_count; }
++};
++
++class JBoltMethodKey : public StackObj {
++  Symbol* _klass;
++  Symbol* _name;
++  Symbol* _sig;
++
++  void inc_ref_cnt() {
++    Symbol* arr[] = { _klass, _name, _sig };
++    for (int i = 0; i < (int) (sizeof(arr) / sizeof(arr[0])); ++i) {
++      if (arr[i] != NULL) arr[i]->increment_refcount();
++    }
++  }
++
++  void dec_ref_cnt() {
++    Symbol* arr[] = { _klass, _name, _sig };
++    for (int i = 0; i < (int) (sizeof(arr) / sizeof(arr[0])); ++i) {
++      if (arr[i] != NULL) arr[i]->decrement_refcount();
++    }
++  }
++public:
++
++  JBoltMethodKey(Symbol* klass, Symbol* name, Symbol* sig): _klass(klass), _name(name), _sig(sig) { /* no inc_ref_cnt() here for SymbolTable::new_symbol() */ }
++  JBoltMethodKey(Method* method): _klass(method->method_holder()->name()), _name(method->name()), _sig(method->signature()) { inc_ref_cnt(); }
++  JBoltMethodKey(const JBoltMethodKey& other): _klass(other._klass), _name(other._name), _sig(other._sig) { inc_ref_cnt(); }
++  JBoltMethodKey(): _klass(NULL), _name(NULL), _sig(NULL) {}
++  ~JBoltMethodKey() { dec_ref_cnt(); }
++
++  JBoltMethodKey& operator = (const JBoltMethodKey& other) {
++    dec_ref_cnt();
++    _klass = other._klass;
++    _name = other._name;
++    _sig = other._sig;
++    inc_ref_cnt();
++    return *this;
++  }
++
++  unsigned hash() const {
++    unsigned v = primitive_hash(_klass);
++    v = v * 31 + primitive_hash(_name);
++    v = v * 31 + primitive_hash(_sig);
++    return v;
++  }
++  bool equals(const JBoltMethodKey& other) const {
++    return _klass == other._klass && _name == other._name && _sig == other._sig;
++  }
++
++  static unsigned calc_hash(const JBoltMethodKey& k) {
++    return k.hash();
++  }
++  static bool calc_equals(const JBoltMethodKey& k1, const JBoltMethodKey& k2) {
++    return k1.equals(k2);
++  }
++
++  Symbol* klass() const    { return _klass;     }
++  Symbol* name() const     { return _name;      }
++  Symbol* sig() const      { return _sig;       }
++};
++
++class JBoltMethodValue : public StackObj {
++private:
++  CompileTaskInfo* volatile _comp_info;
++
++public:
++  JBoltMethodValue(): _comp_info(NULL) {}
++  ~JBoltMethodValue();
++
++  CompileTaskInfo* get_comp_info();
++  bool set_comp_info(CompileTaskInfo* info);
++  void clear_comp_info_but_not_release();
++};
++
++class JBoltStackFrameKey : public StackObj {
++  Method* _method;
++  traceid _methodid;
++
++public:
++  JBoltStackFrameKey(Method* method, traceid methodid): _method(method), _methodid(methodid) {}
++  JBoltStackFrameKey(const JBoltStackFrameKey& other): _method(other._method), _methodid(other._methodid) {}
++  JBoltStackFrameKey(): _method(NULL), _methodid(0) {}
++  ~JBoltStackFrameKey() { /* nothing to do as _method is a softcopy of JfrStackFrame::_method */ }
++
++
++  JBoltStackFrameKey& operator = (const JBoltStackFrameKey& other) {
++    _method = other._method;
++    _methodid = other._methodid;
++    return *this;
++  }
++
++  unsigned hash() const {
++    unsigned v = primitive_hash(_method);
++    v = v * 31 + primitive_hash(_methodid);
++    return v;
++  }
++
++  bool equals(const JBoltStackFrameKey& other) const {
++    return _method == other._method && _methodid == other._methodid;
++  }
++
++  static unsigned calc_hash(const JBoltStackFrameKey& k) {
++    return k.hash();
++  }
++
++  static bool calc_equals(const JBoltStackFrameKey& k1, const JBoltStackFrameKey& k2) {
++    return k1.equals(k2);
++  } 
++};
++
++class JBoltStackFrameValue : public StackObj {
++private:
++  jobject _method_holder;
++
++public:
++  JBoltStackFrameValue(jobject method_holder): _method_holder(method_holder) {}
++  ~JBoltStackFrameValue();
++
++  jobject get_method_holder();
++  void clear_method_holder_but_not_release();
++};
++
++class JBoltManager : public AllStatic {
++  friend class JBoltControlThread;
++
++  typedef ResourceHashtable<const JBoltMethodKey, JBoltMethodValue,
++                            JBoltMethodKey::calc_hash, JBoltMethodKey::calc_equals,
++                            15889, ResourceObj::C_HEAP, mtCompiler> MethodKeyMap;
++
++  typedef ResourceHashtable<const JBoltStackFrameKey, JBoltStackFrameValue,
++                            JBoltStackFrameKey::calc_hash, JBoltStackFrameKey::calc_equals,
++                            15889, ResourceObj::C_HEAP, mtTracing> StackFrameKeyMap;
++
++  typedef ResourceHashtable<Method*, int,
++                            primitive_hash<Method*>, primitive_equals<Method*>,
++                            15889, ResourceObj::C_HEAP, mtTracing> MethodHotCountMap;
++
++  static GrowableArray<JBoltMethodKey>* _hot_methods_sorted;
++  static MethodKeyMap* _hot_methods_vis;
++  static int _reorder_method_threshold_cnt;
++
++  static volatile int _reorder_phase;
++  static volatile int _reorderable_method_cnt;
++  static Method* volatile _cur_reordering_method;
++
++  // the CompilerThread to start the new JBoltControlThread
++  static Thread* _start_reordering_thread;
++
++  static StackFrameKeyMap* _sampled_methods_refs;
++  static MethodHotCountMap* _sampled_methods_hotcount_stored;
++
++  // when not set JBoltDumpMode or JBoltLoadMode, JBolt will be in one-step auto mode.
++  static bool _auto_mode;
++ 
++  // use MethodJBoltHot and MethodJBoltTmp as two semi hot space.
++  // each time restart a schedule, we exchange primary and secondary
++  static volatile int _primary_hot_seg;
++  static volatile int _secondary_hot_seg;
++
++  // when primary hot codecache is full, we stop recompiling.
++  static volatile bool _hot_codecache_full;
++  // JBolt force sweep codecache.
++  static volatile bool _force_sweep;
++
++  // timelist to trigger JBolt rescheduling(format: hh:mm)
++  static GrowableArray<char*>* _rescheduling_time;
++
++private:
++  // Used in dump mode.
++  static methodHandle lookup_method(Method* method, traceid method_id);
++  static void construct_stacktrace(const JfrStackTrace &stacktrace);
++
++  // Used in init phase 1.
++  static void check_mode();
++  static void check_order_file();
++  static void check_dependency();
++  static size_t calc_nmethod_size_with_padding(size_t nmethod_size);
++  static size_t calc_segment_size_with_padding(size_t segment_size);
++  static void load_order_file_phase1(int* method_cnt , size_t* total_nmethod_size);
++  static void init_load_mode_phase1();
++
++  // Used in init phase 2.
++  static bool parse_method_line_phase2(char* const line, const int len, TRAPS);
++  static bool parse_connected_component_line_phase2(char* const line, const int len);
++  static void load_order_file_phase2(TRAPS);
++  static void init_load_mode_phase2(TRAPS);
++  static void init_dump_mode_phase2(TRAPS);
++ 
++  // Used in auto mode.
++  static int primary_hot_seg();
++  static int secondary_hot_seg();
++  static void remove_duplicate_time(GrowableArray<char*>* times);
++  static void parse_rescheduling();
++  static GrowableArray<char*>* rescheduling_time();
++ 
++  // Used in auto mode prev_control_schedule
++  static int clear_last_sample_datas();
++  static void swap_semi_jbolt_segs();
++  static int clear_manager();
++ 
++  // Used in auto mode control_schedule
++  static void init_auto_transition(size_t* segment_size, TRAPS);
++ 
++  // Used in auto mode post_control_schedule
++  static void clear_secondary_hot_seg(TRAPS);
++ 
++  // JBolt phases
++  static int reorder_phase();
++ 
++  static bool reorder_phase_available_to_collecting();
++  static bool reorder_phase_collecting_to_reordering();
++ 
++  static bool reorder_phase_available_to_profiling();
++  static bool reorder_phase_profiling_to_reordering();
++  static bool reorder_phase_reordering_to_available();
++  static bool reorder_phase_profiling_to_available();
++  static bool reorder_phase_profiling_to_waiting();
++  static bool reorder_phase_waiting_to_reordering();
++  static bool reorder_phase_waiting_to_available();
++ 
++  static bool reorder_phase_reordering_to_end();
++
++  static Method* cur_reordering_method();
++  static void set_cur_reordering_method(Method* method);
++  static int inc_reorderable_method_cnt();
++
++  // Used in reordering phase.
++  static CompileTask* create_a_task_instance(CompileTaskInfo* cti, const methodHandle& method, const methodHandle& hot_method, TRAPS);
++  static bool check_compiled_result(Method* method, int check_blob_type, TRAPS);
++  static bool enqueue_recompile_task(CompileTaskInfo* cti, const methodHandle& method, const methodHandle& hot_method, TRAPS);
++  static bool recompile_one(CompileTaskInfo* cti, const methodHandle& method, const methodHandle& hot_method, TRAPS);
++
++  static void print_code_heap(outputStream& ls, CodeHeap* heap, const char* name);
++  static void dump_nmethod_count(fileStream& file, nmethod* nm, CodeBlob* cb);
++  static void dump_code_heap_with_count(const char* filename, CodeHeap* heap);
++public:
++  static void log_stacktrace(const JfrStackTrace &stacktrace);
++  static void construct_cg_once();
++  static void dump_order_in_manual();
++  static JBoltErrorCode dump_order_in_jcmd(const char* filename);
++
++  static void check_arguments_not_set();
++  static void init_phase1();
++  static void init_phase2(TRAPS);
++  static void init_code_heaps(size_t non_nmethod_size, size_t profiled_size, size_t non_profiled_size, size_t cache_size, size_t alignment);
++
++  static bool auto_mode() { return _auto_mode; }
++
++  static bool reorder_phase_is_waiting();
++  static bool reorder_phase_is_available();
++  static bool reorder_phase_is_collecting();
++  static bool reorder_phase_is_profiling();
++  static bool reorder_phase_is_reordering();
++  static bool reorder_phase_is_profiling_or_waiting();
++  static bool reorder_phase_is_collecting_or_reordering();
++
++  static bool can_reorder_now();
++  static bool should_reorder_now();
++
++  static int calc_code_blob_type(Method* method, CompileTask* task, TRAPS);
++
++  static void handle_full_jbolt_code_cache();
++  static bool force_sweep();
++
++  static void check_start_reordering(TRAPS);
++  static void reorder_all_methods(TRAPS);
++  static void clear_structures();
++
++  static void print_code_heaps();
++  static void dump_code_heaps_with_count();
++};
++
++#endif // SHARE_JBOLT_JBOLTMANAGER_HPP
+diff --git a/src/hotspot/share/jbolt/jBoltUtils.cpp b/src/hotspot/share/jbolt/jBoltUtils.cpp
+new file mode 100644
+index 000000000..6db5c9095
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltUtils.cpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++ 
++#include "jbolt/jBoltUtils.hpp"
++ 
++JBoltUtils::MetaDataKeepAliveMark::MetaDataKeepAliveMark(Thread* thread) : _thread(thread), _kept() {
++  assert(thread == Thread::current(), "Must be current thread");
++  assert(_thread->is_in_stack((address)this), "not on stack?");
++}
++ 
++JBoltUtils::MetaDataKeepAliveMark::~MetaDataKeepAliveMark() {
++  for (int i = _kept.length() - 1; i >= 0; --i) {
++    Metadata* md = _kept.at(i);
++    int idx = _thread->metadata_handles()->find_from_end(md);
++    assert(idx != -1, "not in metadata_handles list");
++    _thread->metadata_handles()->remove_at(idx);
++  }
++}
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltUtils.hpp b/src/hotspot/share/jbolt/jBoltUtils.hpp
+new file mode 100644
+index 000000000..8258b7125
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltUtils.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++ 
++#ifndef SHARE_JBOLT_JBOLTUTILS_HPP
++#define SHARE_JBOLT_JBOLTUTILS_HPP
++ 
++#include "memory/allocation.hpp"
++#include "oops/metadata.hpp"
++#include "runtime/thread.hpp"
++#include "utilities/growableArray.hpp"
++ 
++class JBoltUtils : public AllStatic {
++public:
++  /**
++  * Keep the metadata alive.
++  *
++  * @see KeepAliveRegistrar
++  * @see methodHandle
++  */
++  class MetaDataKeepAliveMark : public StackObj {
++  private:
++    Thread* _thread;
++    GrowableArray<Metadata*> _kept;
++ 
++  public:
++    MetaDataKeepAliveMark(Thread* thread);
++    ~MetaDataKeepAliveMark();
++ 
++    void add(Metadata* md);
++ 
++    const GrowableArray<Metadata*>& kept() { return _kept; }
++  };
++};
++ 
++#endif // SHARE_JBOLT_JBOLTUTILS_HPP
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jBoltUtils.inline.hpp b/src/hotspot/share/jbolt/jBoltUtils.inline.hpp
+new file mode 100644
+index 000000000..972983df0
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jBoltUtils.inline.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++ 
++#ifndef SHARE_JBOLT_JBOLTUTILS_INLINE_HPP
++#define SHARE_JBOLT_JBOLTUTILS_INLINE_HPP
++ 
++#include "jbolt/jBoltUtils.hpp"
++ 
++// Register a metadata as 'in-use' by the thread. It's fine to register a
++// metadata multiple times (though perhaps inefficient).
++inline void JBoltUtils::MetaDataKeepAliveMark::add(Metadata* md) {
++  assert(md->is_valid(), "obj is valid");
++  assert(_thread == Thread::current(), "thread must be current");
++  _kept.push(md);
++  _thread->metadata_handles()->push(md);
++}
++ 
++#endif // SHARE_JBOLT_JBOLTUTILS_INLINE_HPP
+\ No newline at end of file
+diff --git a/src/hotspot/share/jbolt/jbolt_globals.cpp b/src/hotspot/share/jbolt/jbolt_globals.cpp
+new file mode 100644
+index 000000000..aee3feef1
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jbolt_globals.cpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "jbolt/jbolt_globals.hpp"
++#include "runtime/globals_extension.hpp"
++
++JBOLT_FLAGS(MATERIALIZE_DEVELOPER_FLAG, \
++            MATERIALIZE_PD_DEVELOPER_FLAG, \
++            MATERIALIZE_PRODUCT_FLAG, \
++            MATERIALIZE_PD_PRODUCT_FLAG, \
++            MATERIALIZE_DIAGNOSTIC_FLAG, \
++            MATERIALIZE_PD_DIAGNOSTIC_FLAG, \
++            MATERIALIZE_EXPERIMENTAL_FLAG, \
++            MATERIALIZE_NOTPRODUCT_FLAG,
++            IGNORE_RANGE, \
++            IGNORE_CONSTRAINT)
+diff --git a/src/hotspot/share/jbolt/jbolt_globals.hpp b/src/hotspot/share/jbolt/jbolt_globals.hpp
+new file mode 100644
+index 000000000..8ebc4fb7a
+--- /dev/null
++++ b/src/hotspot/share/jbolt/jbolt_globals.hpp
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#ifndef SHARE_JBOLT_JBOLT_GLOBALS_HPP
++#define SHARE_JBOLT_JBOLT_GLOBALS_HPP
++
++#include "runtime/globals.hpp"
++
++#define JBOLT_FLAGS(develop,                                                \
++                       develop_pd,                                          \
++                       product,                                             \
++                       product_pd,                                          \
++                       diagnostic,                                          \
++                       diagnostic_pd,                                       \
++                       experimental,                                        \
++                       notproduct,                                          \
++                       range,                                               \
++                       constraint)                                          \
++                                                                            \
++  experimental(bool, UseJBolt, false,                                       \
++          "Enable JBolt feature.")                                          \
++                                                                            \
++  experimental(bool, JBoltDumpMode, false,                                  \
++          "Trial run of JBolt. Collect profiling and dump it.")             \
++                                                                            \
++  experimental(bool, JBoltLoadMode, false,                                  \
++          "Second run of JBolt. Load the profiling and reorder nmethods.")  \
++                                                                            \
++  experimental(intx, JBoltSampleInterval, 600,                              \
++          "Sample interval(second) of JBolt dump mode"                      \
++          "only useful in auto mode.")                                      \
++          range(0, 36000)                                                   \
++                                                                            \
++  experimental(ccstr, JBoltOrderFile, NULL,                                 \
++          "The JBolt method order file to dump or load.")                   \
++                                                                            \
++  diagnostic(double, JBoltReorderThreshold, 0.86,                           \
++          "The threshold to trigger JBolt reorder in load mode.")           \
++          range(0.1, 0.9)                                                   \
++                                                                            \
++  experimental(uintx, JBoltCodeHeapSize, 8*M ,                              \
++          "Code heap size of MethodJBoltHot and MethodJBoltTmp heaps.")     \
++                                                                            \
++  experimental(ccstr, JBoltRescheduling, NULL,                              \
++          "Trigger rescheduling at a fixed time every day.")                \
++                                                                            \
++  diagnostic(bool, EnableDumpGraph, false,                                  \
++          "Enable JBolt.dumpgraph to produce source data files")            \
++                                                                            \
++
++// end of JBOLT_FLAGS
++
++JBOLT_FLAGS(DECLARE_DEVELOPER_FLAG,    \
++               DECLARE_PD_DEVELOPER_FLAG, \
++               DECLARE_PRODUCT_FLAG,      \
++               DECLARE_PD_PRODUCT_FLAG,   \
++               DECLARE_DIAGNOSTIC_FLAG, \
++               DECLARE_PD_DIAGNOSTIC_FLAG, \
++               DECLARE_EXPERIMENTAL_FLAG, \
++               DECLARE_NOTPRODUCT_FLAG,   \
++               IGNORE_RANGE,              \
++               IGNORE_CONSTRAINT)
++
++#endif // SHARE_JBOLT_JBOLT_GLOBALS_HPP
+diff --git a/src/hotspot/share/jfr/metadata/metadata.xml b/src/hotspot/share/jfr/metadata/metadata.xml
+index 2ae21bf0c..d24455569 100644
+--- a/src/hotspot/share/jfr/metadata/metadata.xml
++++ b/src/hotspot/share/jfr/metadata/metadata.xml
+@@ -779,6 +779,8 @@
+     <Field type="ulong" contentType="bytes" name="nonNMethodSize" label="Non-nmethod Size" />
+     <Field type="ulong" contentType="bytes" name="profiledSize" label="Profiled Size" />
+     <Field type="ulong" contentType="bytes" name="nonProfiledSize" label="Non-profiled Size" />
++    <Field type="ulong" contentType="bytes" name="jboltHotSize" label="JBolt Hot Size" />
++    <Field type="ulong" contentType="bytes" name="jboltTmpSize" label="JBolt Tmp Size" />    
+     <Field type="ulong" contentType="bytes" name="expansionSize" label="Expansion size" />
+     <Field type="ulong" contentType="bytes" name="minBlockLength" label="Minimum Block Length" />
+     <Field type="ulong" contentType="address" name="startAddress" label="Start Address" />
+diff --git a/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp b/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp
+index 59bbd0c2d..d9580e57e 100644
+--- a/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp
++++ b/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp
+@@ -66,6 +66,9 @@
+ #if INCLUDE_SHENANDOAHGC
+ #include "gc/shenandoah/shenandoahJfrSupport.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jbolt_globals.hpp"
++#endif // INCLUDE_JBOLT
+ 
+ /**
+  *  JfrPeriodic class
+@@ -554,6 +557,8 @@ TRACE_REQUEST_FUNC(CodeCacheConfiguration) {
+   event.set_nonNMethodSize(NonNMethodCodeHeapSize);
+   event.set_profiledSize(ProfiledCodeHeapSize);
+   event.set_nonProfiledSize(NonProfiledCodeHeapSize);
++  event.set_jboltHotSize(JBoltCodeHeapSize);
++  event.set_jboltTmpSize(JBoltCodeHeapSize);
+   event.set_expansionSize(CodeCacheExpansionSize);
+   event.set_minBlockLength(CodeCacheMinBlockLength);
+   event.set_startAddress((u8)CodeCache::low_bound());
+diff --git a/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampler.cpp b/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampler.cpp
+index 261e605dd..a88353346 100644
+--- a/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampler.cpp
++++ b/src/hotspot/share/jfr/periodic/sampling/jfrThreadSampler.cpp
+@@ -37,6 +37,9 @@
+ #include "runtime/semaphore.hpp"
+ #include "runtime/thread.inline.hpp"
+ #include "runtime/threadSMR.hpp"
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif
+ 
+ enum JfrSampleType {
+   NO_SAMPLE = 0,
+@@ -256,7 +259,13 @@ bool JfrThreadSampleClosure::sample_thread_in_java(JavaThread* thread, JfrStackF
+     return false;
+   }
+   EventExecutionSample *event = &_events[_added_java - 1];
+-  traceid id = JfrStackTraceRepository::add(sampler.stacktrace());
++  traceid id = 0; 
++#if INCLUDE_JBOLT
++  if (UseJBolt && JBoltManager::reorder_phase_is_profiling()) {
++    id = JfrStackTraceRepository::add_jbolt(sampler.stacktrace());
++  } else 
++#endif
++  id = JfrStackTraceRepository::add(sampler.stacktrace());
+   assert(id != 0, "Stacktrace id should not be 0");
+   event->set_stackTrace(id);
+   return true;
+diff --git a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp
+index 630116b0d..91efd5459 100644
+--- a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp
++++ b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.cpp
+@@ -58,7 +58,11 @@ JfrStackTrace::JfrStackTrace(JfrStackFrame* frames, u4 max_frames) :
+   _frames_ownership(false),
+   _reached_root(false),
+   _lineno(false),
+-  _written(false) {}
++  _written(false)
++#if INCLUDE_JBOLT
++  , _hotcount(1)
++#endif 
++  {}
+ 
+ JfrStackTrace::JfrStackTrace(traceid id, const JfrStackTrace& trace, const JfrStackTrace* next) :
+   _next(next),
+@@ -70,7 +74,11 @@ JfrStackTrace::JfrStackTrace(traceid id, const JfrStackTrace& trace, const JfrSt
+   _frames_ownership(true),
+   _reached_root(trace._reached_root),
+   _lineno(trace._lineno),
+-  _written(false) {
++  _written(false) 
++#if INCLUDE_JBOLT
++  , _hotcount(trace._hotcount)
++#endif
++{
+   copy_frames(&_frames, trace._nr_of_frames, trace._frames);
+ }
+ 
+diff --git a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.hpp b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.hpp
+index 314ac8128..7486e5bff 100644
+--- a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.hpp
++++ b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTrace.hpp
+@@ -53,6 +53,17 @@ class JfrStackFrame {
+   void write(JfrCheckpointWriter& cpw) const;
+   void resolve_lineno() const;
+ 
++#if INCLUDE_JBOLT
++  const Method* get_method() const { return _method; }
++  traceid get_methodId() const { return _methodid; }
++  int get_byteCodeIndex() const { return _bci; }
++  u1 get_type() const { return _type; }
++ 
++  static ByteSize method_offset()                { return byte_offset_of(JfrStackFrame, _method        ); }
++  static ByteSize methodid_offset()              { return byte_offset_of(JfrStackFrame, _methodid      ); }
++  static ByteSize bci_offset()                   { return byte_offset_of(JfrStackFrame, _bci           ); }
++  static ByteSize type_offset()                  { return byte_offset_of(JfrStackFrame, _type          ); }
++#endif
+   enum {
+     FRAME_INTERPRETER = 0,
+     FRAME_JIT,
+@@ -69,6 +80,9 @@ class JfrStackTrace : public JfrCHeapObj {
+   friend class ObjectSampler;
+   friend class OSThreadSampler;
+   friend class StackTraceResolver;
++#if INCLUDE_JBOLT
++  friend class JBoltManager;
++#endif
+  private:
+   const JfrStackTrace* _next;
+   JfrStackFrame* _frames;
+@@ -80,6 +94,9 @@ class JfrStackTrace : public JfrCHeapObj {
+   bool _reached_root;
+   mutable bool _lineno;
+   mutable bool _written;
++#if INCLUDE_JBOLT
++  u4 _hotcount;
++#endif
+ 
+   const JfrStackTrace* next() const { return _next; }
+ 
+@@ -107,6 +124,17 @@ class JfrStackTrace : public JfrCHeapObj {
+  public:
+   unsigned int hash() const { return _hash; }
+   traceid id() const { return _id; }
++#if INCLUDE_JBOLT
++  u4 hotcount() const { return _hotcount; }
++  const JfrStackFrame* get_frames() const { return _frames; }
++  u4 get_framesCount() const { return _nr_of_frames; }
++ 
++  static ByteSize hash_offset()                 { return byte_offset_of(JfrStackTrace, _hash         ); }
++  static ByteSize id_offset()                   { return byte_offset_of(JfrStackTrace, _id           ); }
++  static ByteSize hotcount_offset()             { return byte_offset_of(JfrStackTrace, _hotcount     ); }
++  static ByteSize frames_offset()               { return byte_offset_of(JfrStackTrace, _frames       ); }
++  static ByteSize frames_count_offset()         { return byte_offset_of(JfrStackTrace, _nr_of_frames ); }
++#endif
+ };
+ 
+ #endif // SHARE_JFR_RECORDER_STACKTRACE_JFRSTACKTRACE_HPP
+diff --git a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp
+index d873651f2..07502c767 100644
+--- a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp
++++ b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.cpp
+@@ -29,6 +29,9 @@
+ #include "jfr/recorder/stacktrace/jfrStackTraceRepository.hpp"
+ #include "jfr/support/jfrThreadLocal.hpp"
+ #include "runtime/mutexLocker.hpp"
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif
+ 
+ /*
+  * There are two separate repository instances.
+@@ -51,9 +54,16 @@ static JfrStackTraceRepository& leak_profiler_instance() {
+   return *_leak_profiler_instance;
+ }
+ 
++#if INCLUDE_JBOLT
++JfrStackTraceRepository::JfrStackTraceRepository() : _last_entries(0), _entries(0), _last_entries_jbolt(0), _entries_jbolt(0) {
++  memset(_table, 0, sizeof(_table));
++  memset(_table_jbolt, 0, sizeof(_table_jbolt));
++}
++#else
+ JfrStackTraceRepository::JfrStackTraceRepository() : _last_entries(0), _entries(0) {
+   memset(_table, 0, sizeof(_table));
+ }
++#endif
+ 
+ JfrStackTraceRepository* JfrStackTraceRepository::create() {
+   assert(_instance == NULL, "invariant");
+@@ -98,11 +108,16 @@ bool JfrStackTraceRepository::is_modified() const {
+ }
+ 
+ size_t JfrStackTraceRepository::write(JfrChunkWriter& sw, bool clear) {
++#if INCLUDE_JBOLT
++  if (clear && (UseJBolt && JBoltManager::reorder_phase_is_profiling_or_waiting())) {
++    JBoltManager::construct_cg_once();
++  }
++#endif
++  MutexLockerEx lock(JfrStacktrace_lock, Mutex::_no_safepoint_check_flag);
+   if (_entries == 0) {
+     return 0;
+   }
+-  MutexLockerEx lock(JfrStacktrace_lock, Mutex::_no_safepoint_check_flag);
+-  assert(_entries > 0, "invariant");
++
+   int count = 0;
+   for (u4 i = 0; i < TABLE_SIZE; ++i) {
+     JfrStackTrace* stacktrace = _table[i];
+@@ -121,6 +136,21 @@ size_t JfrStackTraceRepository::write(JfrChunkWriter& sw, bool clear) {
+   if (clear) {
+     memset(_table, 0, sizeof(_table));
+     _entries = 0;
++#if INCLUDE_JBOLT
++    for (u4 i = 0; i < TABLE_SIZE; ++i) {
++      JfrStackTrace* stacktrace = _table_jbolt[i];
++      while (stacktrace != NULL) {
++        JfrStackTrace* next = const_cast<JfrStackTrace*>(stacktrace->next());
++        delete stacktrace;
++        stacktrace = next;
++      }
++    }
++    memset(_table_jbolt, 0, sizeof(_table_jbolt));
++    _entries_jbolt = 0;
++  }
++  _last_entries_jbolt = _entries_jbolt;
++  {
++#endif
+   }
+   _last_entries = _entries;
+   return count;
+@@ -143,6 +173,21 @@ size_t JfrStackTraceRepository::clear(JfrStackTraceRepository& repo) {
+   const size_t processed = repo._entries;
+   repo._entries = 0;
+   repo._last_entries = 0;
++#if INCLUDE_JBOLT 
++  if (repo._entries_jbolt != 0) {
++    for (u4 i = 0; i < TABLE_SIZE; ++i) {
++      JfrStackTrace* stacktrace = repo._table_jbolt[i];
++      while (stacktrace != NULL) {
++        JfrStackTrace* next = const_cast<JfrStackTrace*>(stacktrace->next());
++        delete stacktrace;
++        stacktrace = next;
++      }
++    }
++    memset(repo._table_jbolt, 0, sizeof(repo._table_jbolt));
++    repo._entries_jbolt = 0;
++    repo._last_entries_jbolt = 0;
++  }
++#endif
+   return processed;
+ }
+ 
+@@ -232,6 +277,75 @@ const JfrStackTrace* JfrStackTraceRepository::lookup_for_leak_profiler(unsigned
+   return trace;
+ }
+ 
++#if INCLUDE_JBOLT
++size_t JfrStackTraceRepository::clear_jbolt(JfrStackTraceRepository& repo) {
++  MutexLockerEx lock(JfrStacktrace_lock, Mutex::_no_safepoint_check_flag);
++  if (repo._entries_jbolt == 0) {
++    return 0;
++  }
++ 
++  for (u4 i = 0; i < TABLE_SIZE; ++i) {
++    JfrStackTrace* stacktrace = repo._table_jbolt[i];
++    while (stacktrace != NULL) {
++      JfrStackTrace* next = const_cast<JfrStackTrace*>(stacktrace->next());
++      delete stacktrace;
++      stacktrace = next;
++    }
++  }
++  memset(repo._table_jbolt, 0, sizeof(repo._table_jbolt));
++  const size_t processed = repo._entries;
++  repo._entries_jbolt = 0;
++  repo._last_entries_jbolt = 0;
++ 
++  return processed;
++}
++ 
++size_t JfrStackTraceRepository::clear_jbolt() {
++  clear_jbolt(leak_profiler_instance());
++  return clear_jbolt(instance());
++}
++
++traceid JfrStackTraceRepository::add_jbolt(JfrStackTraceRepository& repo, const JfrStackTrace& stacktrace) {
++  traceid tid = repo.add_trace_jbolt(stacktrace);
++  if (tid == 0) {
++    stacktrace.resolve_linenos();
++    tid = repo.add_trace_jbolt(stacktrace);
++  }
++  assert(tid != 0, "invariant");
++  return tid;
++}
++ 
++traceid JfrStackTraceRepository::add_jbolt(const JfrStackTrace& stacktrace) {
++  JBoltManager::log_stacktrace(stacktrace);
++  return add_jbolt(instance(), stacktrace);
++}
++ 
++traceid JfrStackTraceRepository::add_trace_jbolt(const JfrStackTrace& stacktrace) {
++  traceid id = add_trace(stacktrace);
++  MutexLockerEx lock(JfrStacktrace_lock, Mutex::_no_safepoint_check_flag);
++  const size_t index = stacktrace._hash % TABLE_SIZE;
++ 
++  if (UseJBolt && JBoltManager::reorder_phase_is_profiling()) {
++    const JfrStackTrace* table_jbolt_entry = _table_jbolt[index];
++    while (table_jbolt_entry != NULL) {
++      if (table_jbolt_entry->equals(stacktrace)) {
++        // [jbolt]: each time add an old trace, inc its hotcount
++        const_cast<JfrStackTrace*>(table_jbolt_entry)->_hotcount++;
++        return table_jbolt_entry->id();
++      }
++      table_jbolt_entry = table_jbolt_entry->next();
++    }
++  }
++ 
++  if (id != 0 && UseJBolt && JBoltManager::reorder_phase_is_profiling()) {
++    _table_jbolt[index] = new JfrStackTrace(id, stacktrace, _table_jbolt[index]);
++    ++_entries_jbolt;
++  }
++ 
++  return id;
++}
++#endif
++
+ void JfrStackTraceRepository::clear_leak_profiler() {
+   clear(leak_profiler_instance());
+ }
+diff --git a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.hpp b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.hpp
+index bf32df99f..e8868a467 100644
+--- a/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.hpp
++++ b/src/hotspot/share/jfr/recorder/stacktrace/jfrStackTraceRepository.hpp
+@@ -42,6 +42,9 @@ class JfrStackTraceRepository : public JfrCHeapObj {
+   friend class RecordStackTrace;
+   friend class StackTraceBlobInstaller;
+   friend class WriteStackTraceRepository;
++#if INCLUDE_JBOLT
++  friend class JBoltManager;
++#endif
+ 
+  private:
+   static const u4 TABLE_SIZE = 2053;
+@@ -49,6 +52,19 @@ class JfrStackTraceRepository : public JfrCHeapObj {
+   u4 _last_entries;
+   u4 _entries;
+ 
++#if INCLUDE_JBOLT
++  // [jbolt]: an exclusive table for jbolt. It should be a subset of _table
++  JfrStackTrace* _table_jbolt[TABLE_SIZE];
++  u4 _last_entries_jbolt;
++  u4 _entries_jbolt;  
++  
++  static size_t clear_jbolt();
++  static size_t clear_jbolt(JfrStackTraceRepository& repo);  
++  traceid add_trace_jbolt(const JfrStackTrace& stacktrace);
++  static traceid add_jbolt(JfrStackTraceRepository& repo, const JfrStackTrace& stacktrace);
++  static traceid add_jbolt(const JfrStackTrace& stacktrace);
++#endif
++
+   JfrStackTraceRepository();
+   static JfrStackTraceRepository& instance();
+   static JfrStackTraceRepository* create();
+@@ -71,6 +87,13 @@ class JfrStackTraceRepository : public JfrCHeapObj {
+ 
+  public:
+   static traceid record(Thread* thread, int skip = 0);
++#if INCLUDE_JBOLT  
++  const JfrStackTrace* const * get_stacktrace_table() const { return _table; }
++  u4 get_entries_count() const { return _entries; }
++  
++  const JfrStackTrace* const * get_stacktrace_table_jbolt() const { return _table_jbolt; }
++  u4 get_entries_count_jbolt() const { return _entries_jbolt; }
++#endif
+ };
+ 
+ #endif // SHARE_JFR_RECORDER_STACKTRACE_JFRSTACKTRACEREPOSITORY_HPP
+diff --git a/src/hotspot/share/logging/logTag.hpp b/src/hotspot/share/logging/logTag.hpp
+index 0ec37b2f6..4e117530a 100644
+--- a/src/hotspot/share/logging/logTag.hpp
++++ b/src/hotspot/share/logging/logTag.hpp
+@@ -84,6 +84,7 @@
+   LOG_TAG(inlining) \
+   LOG_TAG(interpreter) \
+   LOG_TAG(itables) \
++  JBOLT_ONLY(LOG_TAG(jbolt)) \
+   LOG_TAG(jfr) \
+   LOG_TAG(jit) \
+   LOG_TAG(jni) \
+diff --git a/src/hotspot/share/memory/virtualspace.hpp b/src/hotspot/share/memory/virtualspace.hpp
+index 4dace9d88..4cdddcf0b 100644
+--- a/src/hotspot/share/memory/virtualspace.hpp
++++ b/src/hotspot/share/memory/virtualspace.hpp
+@@ -82,6 +82,16 @@ class ReservedSpace {
+                                   bool split = false, bool realloc = true);
+   inline ReservedSpace last_part (size_t partition_size);
+ 
++#if INCLUDE_JBOLT
++  static ReservedSpace static_first_part(ReservedSpace rs, size_t partition_size) {
++    return rs.first_part(partition_size);
++  }
++
++  static ReservedSpace static_last_part(ReservedSpace rs, size_t partition_size) {
++    return rs.last_part(partition_size);
++  }
++#endif
++
+   // Alignment
+   static size_t page_align_size_up(size_t size);
+   static size_t page_align_size_down(size_t size);
+diff --git a/src/hotspot/share/oops/method.hpp b/src/hotspot/share/oops/method.hpp
+index 346526487..392f9b6e1 100644
+--- a/src/hotspot/share/oops/method.hpp
++++ b/src/hotspot/share/oops/method.hpp
+@@ -99,6 +99,7 @@ class Method : public Metadata {
+ #ifndef PRODUCT
+   int64_t _compiled_invocation_count;
+ #endif
++
+   // Entry point for calling both from and to the interpreter.
+   address _i2i_entry;           // All-args-on-stack calling convention
+   // Entry point for calling from compiled code, to compiled code if it exists
+diff --git a/src/hotspot/share/opto/doCall.cpp b/src/hotspot/share/opto/doCall.cpp
+index d8bfcff2d..943c110fc 100644
+--- a/src/hotspot/share/opto/doCall.cpp
++++ b/src/hotspot/share/opto/doCall.cpp
+@@ -1035,8 +1035,8 @@ void Parse::catch_inline_exceptions(SafePointNode* ex_map) {
+ 
+ #ifndef PRODUCT
+ void Parse::count_compiled_calls(bool at_method_entry, bool is_inline) {
+-  if( CountCompiledCalls ) {
+-    if( at_method_entry ) {
++  if(CountCompiledCalls) {
++    if(at_method_entry) {
+       // bump invocation counter if top method (for statistics)
+       if (CountCompiledCalls && depth() == 1) {
+         const TypePtr* addr_type = TypeMetadataPtr::make(method());
+@@ -1067,7 +1067,6 @@ void Parse::count_compiled_calls(bool at_method_entry, bool is_inline) {
+ }
+ #endif //PRODUCT
+ 
+-
+ ciMethod* Compile::optimize_virtual_call(ciMethod* caller, int bci, ciInstanceKlass* klass,
+                                          ciKlass* holder, ciMethod* callee,
+                                          const TypeOopPtr* receiver_type, bool is_virtual,
+diff --git a/src/hotspot/share/opto/parse1.cpp b/src/hotspot/share/opto/parse1.cpp
+index c1a7b2833..e16299178 100644
+--- a/src/hotspot/share/opto/parse1.cpp
++++ b/src/hotspot/share/opto/parse1.cpp
+@@ -1185,7 +1185,7 @@ void Parse::do_method_entry() {
+   set_parse_bci(InvocationEntryBci); // Pseudo-BCP
+   set_sp(0);                      // Java Stack Pointer
+ 
+-  NOT_PRODUCT( count_compiled_calls(true/*at_method_entry*/, false/*is_inline*/); )
++  NOT_PRODUCT(count_compiled_calls(true/*at_method_entry*/, false/*is_inline*/);)
+ 
+   if (C->env()->dtrace_method_probes()) {
+     make_dtrace_method_entry(method());
+diff --git a/src/hotspot/share/runtime/flags/jvmFlag.cpp b/src/hotspot/share/runtime/flags/jvmFlag.cpp
+index 630c8becd..36d122464 100644
+--- a/src/hotspot/share/runtime/flags/jvmFlag.cpp
++++ b/src/hotspot/share/runtime/flags/jvmFlag.cpp
+@@ -33,6 +33,9 @@
+ #include "runtime/globals_extension.hpp"
+ #include "utilities/defaultStream.hpp"
+ #include "utilities/stringUtils.hpp"
++#if INCLUDE_JBOLT
++#include "jbolt/jbolt_globals.hpp"
++#endif
+ 
+ #define DEFAULT_RANGE_STR_CHUNK_SIZE 64
+ static char* create_range_str(const char *fmt, ...) {
+@@ -786,6 +789,15 @@ const char* JVMFlag::flag_error_str(JVMFlag::Error error) {
+ #define JVMCI_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JVMCI | JVMFlag::KIND_DEVELOP | JVMFlag::KIND_PLATFORM_DEPENDENT) },
+ #define JVMCI_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JVMCI | JVMFlag::KIND_NOT_PRODUCT) },
+ 
++#define JBOLT_PRODUCT_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_PRODUCT) },
++#define JBOLT_PD_PRODUCT_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_PRODUCT | JVMFlag::KIND_PLATFORM_DEPENDENT) },
++#define JBOLT_DIAGNOSTIC_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_DIAGNOSTIC) },
++#define JBOLT_PD_DIAGNOSTIC_FLAG_STRUCT( type, name,        doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_DIAGNOSTIC | JVMFlag::KIND_PLATFORM_DEPENDENT) },
++#define JBOLT_EXPERIMENTAL_FLAG_STRUCT(  type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_EXPERIMENTAL) },
++#define JBOLT_DEVELOP_FLAG_STRUCT(       type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_DEVELOP) },
++#define JBOLT_PD_DEVELOP_FLAG_STRUCT(    type, name,        doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_DEVELOP | JVMFlag::KIND_PLATFORM_DEPENDENT) },
++#define JBOLT_NOTPRODUCT_FLAG_STRUCT(    type, name, value, doc) { #type, XSTR(name), (void*) &name, NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_JBOLT | JVMFlag::KIND_NOT_PRODUCT) },
++
+ #ifdef _LP64
+ #define RUNTIME_LP64_PRODUCT_FLAG_STRUCT(type, name, value, doc) { #type, XSTR(name), &name,         NOT_PRODUCT_ARG(doc) JVMFlag::Flags(JVMFlag::DEFAULT | JVMFlag::KIND_LP64_PRODUCT) },
+ #else
+@@ -854,6 +866,18 @@ static JVMFlag flagTable[] = {
+               IGNORE_CONSTRAINT, \
+               IGNORE_WRITEABLE)
+ #endif // INCLUDE_JVMCI
++#if INCLUDE_JBOLT
++  JBOLT_FLAGS(JBOLT_DEVELOP_FLAG_STRUCT, \
++              JBOLT_PD_DEVELOP_FLAG_STRUCT, \
++              JBOLT_PRODUCT_FLAG_STRUCT, \
++              JBOLT_PD_PRODUCT_FLAG_STRUCT, \
++              JBOLT_DIAGNOSTIC_FLAG_STRUCT, \
++              JBOLT_PD_DIAGNOSTIC_FLAG_STRUCT, \
++              JBOLT_EXPERIMENTAL_FLAG_STRUCT, \
++              JBOLT_NOTPRODUCT_FLAG_STRUCT, \
++              IGNORE_RANGE, \
++              IGNORE_CONSTRAINT)
++#endif // INCLUDE_JBOLT
+ #ifdef COMPILER1
+   C1_FLAGS(C1_DEVELOP_FLAG_STRUCT, \
+            C1_PD_DEVELOP_FLAG_STRUCT, \
+diff --git a/src/hotspot/share/runtime/flags/jvmFlag.hpp b/src/hotspot/share/runtime/flags/jvmFlag.hpp
+index c0854b33c..439249f25 100644
+--- a/src/hotspot/share/runtime/flags/jvmFlag.hpp
++++ b/src/hotspot/share/runtime/flags/jvmFlag.hpp
+@@ -62,9 +62,10 @@ struct JVMFlag {
+     KIND_ARCH               = 1 << 14,
+     KIND_LP64_PRODUCT       = 1 << 15,
+     KIND_JVMCI              = 1 << 16,
++    KIND_JBOLT              = 1 << 17,
+ 
+     // set this bit if the flag was set on the command line
+-    ORIG_COMMAND_LINE       = 1 << 17,
++    ORIG_COMMAND_LINE       = 1 << 18,
+ 
+     KIND_MASK = ~(VALUE_ORIGIN_MASK | ORIG_COMMAND_LINE)
+   };
+diff --git a/src/hotspot/share/runtime/flags/jvmFlagRangeList.cpp b/src/hotspot/share/runtime/flags/jvmFlagRangeList.cpp
+index f947baf53..e5154f1f0 100644
+--- a/src/hotspot/share/runtime/flags/jvmFlagRangeList.cpp
++++ b/src/hotspot/share/runtime/flags/jvmFlagRangeList.cpp
+@@ -365,6 +365,19 @@ void JVMFlagRangeList::init(void) {
+                                  IGNORE_WRITEABLE));
+ #endif // INCLUDE_JVMCI
+ 
++#if INCLUDE_JBOLT
++  emit_range_no(NULL JBOLT_FLAGS(EMIT_RANGE_DEVELOPER_FLAG, \
++                                 EMIT_RANGE_PD_DEVELOPER_FLAG, \
++                                 EMIT_RANGE_PRODUCT_FLAG, \
++                                 EMIT_RANGE_PD_PRODUCT_FLAG, \
++                                 EMIT_RANGE_DIAGNOSTIC_FLAG, \
++                                 EMIT_RANGE_PD_DIAGNOSTIC_FLAG, \
++                                 EMIT_RANGE_EXPERIMENTAL_FLAG, \
++                                 EMIT_RANGE_NOTPRODUCT_FLAG,
++                                 EMIT_RANGE_CHECK, \
++                                 IGNORE_CONSTRAINT));
++#endif // INCLUDE_JBOLT
++
+ #ifdef COMPILER1
+   emit_range_no(NULL C1_FLAGS(EMIT_RANGE_DEVELOPER_FLAG,
+                               EMIT_RANGE_PD_DEVELOPER_FLAG,
+diff --git a/src/hotspot/share/runtime/globals_extension.hpp b/src/hotspot/share/runtime/globals_extension.hpp
+index 02491f6c7..c6adc45b0 100644
+--- a/src/hotspot/share/runtime/globals_extension.hpp
++++ b/src/hotspot/share/runtime/globals_extension.hpp
+@@ -36,6 +36,9 @@
+ #ifdef COMPILER2
+ #include "opto/c2_globals.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jbolt_globals.hpp"
++#endif
+ 
+ // Construct enum of Flag_<cmdline-arg> constants.
+ 
+@@ -62,6 +65,15 @@
+ #define JVMCI_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc)   FLAG_MEMBER(name),
+ #define JVMCI_NOTPRODUCT_FLAG_MEMBER(type, name, value, doc)     FLAG_MEMBER(name),
+ 
++#define JBOLT_PRODUCT_FLAG_MEMBER(type, name, value, doc)        FLAG_MEMBER(name),
++#define JBOLT_PD_PRODUCT_FLAG_MEMBER(type, name, doc)            FLAG_MEMBER(name),
++#define JBOLT_DEVELOP_FLAG_MEMBER(type, name, value, doc)        FLAG_MEMBER(name),
++#define JBOLT_PD_DEVELOP_FLAG_MEMBER(type, name, doc)            FLAG_MEMBER(name),
++#define JBOLT_DIAGNOSTIC_FLAG_MEMBER(type, name, value, doc)     FLAG_MEMBER(name),
++#define JBOLT_PD_DIAGNOSTIC_FLAG_MEMBER(type, name, doc)         FLAG_MEMBER(name),
++#define JBOLT_EXPERIMENTAL_FLAG_MEMBER(type, name, value, doc)   FLAG_MEMBER(name),
++#define JBOLT_NOTPRODUCT_FLAG_MEMBER(type, name, value, doc)     FLAG_MEMBER(name),
++
+ #ifdef _LP64
+ #define RUNTIME_LP64_PRODUCT_FLAG_MEMBER(type, name, value, doc) FLAG_MEMBER(name),
+ #else
+@@ -130,6 +142,18 @@ typedef enum {
+              IGNORE_CONSTRAINT, \
+              IGNORE_WRITEABLE)
+ #endif // INCLUDE_JVMCI
++#if INCLUDE_JBOLT
++ JBOLT_FLAGS(JBOLT_DEVELOP_FLAG_MEMBER, \
++             JBOLT_PD_DEVELOP_FLAG_MEMBER, \
++             JBOLT_PRODUCT_FLAG_MEMBER, \
++             JBOLT_PD_PRODUCT_FLAG_MEMBER, \
++             JBOLT_DIAGNOSTIC_FLAG_MEMBER, \
++             JBOLT_PD_DIAGNOSTIC_FLAG_MEMBER, \
++             JBOLT_EXPERIMENTAL_FLAG_MEMBER, \
++             JBOLT_NOTPRODUCT_FLAG_MEMBER, \
++             IGNORE_RANGE, \
++             IGNORE_CONSTRAINT)
++#endif
+ #ifdef COMPILER1
+  C1_FLAGS(C1_DEVELOP_FLAG_MEMBER, \
+           C1_PD_DEVELOP_FLAG_MEMBER, \
+@@ -191,6 +215,15 @@ typedef enum {
+ #define JVMCI_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)   FLAG_MEMBER_WITH_TYPE(name,type),
+ #define JVMCI_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)     FLAG_MEMBER_WITH_TYPE(name,type),
+ 
++#define JBOLT_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)        FLAG_MEMBER_WITH_TYPE(name,type),
++#define JBOLT_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc)            FLAG_MEMBER_WITH_TYPE(name,type),
++#define JBOLT_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)        FLAG_MEMBER_WITH_TYPE(name,type),
++#define JBOLT_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE(type, name, doc)            FLAG_MEMBER_WITH_TYPE(name,type),
++#define JBOLT_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)     FLAG_MEMBER_WITH_TYPE(name,type),
++#define JBOLT_PD_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, doc)         FLAG_MEMBER_WITH_TYPE(name,type),
++#define JBOLT_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)   FLAG_MEMBER_WITH_TYPE(name,type),
++#define JBOLT_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)     FLAG_MEMBER_WITH_TYPE(name,type),
++
+ #define C1_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)           FLAG_MEMBER_WITH_TYPE(name,type),
+ #define C1_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE(type, name, doc)               FLAG_MEMBER_WITH_TYPE(name,type),
+ #define C1_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE(type, name, value, doc)        FLAG_MEMBER_WITH_TYPE(name,type),
+@@ -259,6 +292,18 @@ typedef enum {
+              IGNORE_CONSTRAINT,
+              IGNORE_WRITEABLE)
+ #endif // INCLUDE_JVMCI
++#if INCLUDE_JBOLT
++ JBOLT_FLAGS(JBOLT_DEVELOP_FLAG_MEMBER_WITH_TYPE,
++             JBOLT_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
++             JBOLT_PRODUCT_FLAG_MEMBER_WITH_TYPE,
++             JBOLT_PD_PRODUCT_FLAG_MEMBER_WITH_TYPE,
++             JBOLT_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
++             JBOLT_PD_DIAGNOSTIC_FLAG_MEMBER_WITH_TYPE,
++             JBOLT_EXPERIMENTAL_FLAG_MEMBER_WITH_TYPE,
++             JBOLT_NOTPRODUCT_FLAG_MEMBER_WITH_TYPE,
++             IGNORE_RANGE,
++             IGNORE_CONSTRAINT)
++#endif // INCLUDE_JBOLT
+ #ifdef COMPILER1
+  C1_FLAGS(C1_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+           C1_PD_DEVELOP_FLAG_MEMBER_WITH_TYPE,
+diff --git a/src/hotspot/share/runtime/java.cpp b/src/hotspot/share/runtime/java.cpp
+index 84123b29e..2f61f3055 100644
+--- a/src/hotspot/share/runtime/java.cpp
++++ b/src/hotspot/share/runtime/java.cpp
+@@ -88,6 +88,9 @@
+ #if INCLUDE_JFR
+ #include "jfr/jfr.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif
+ 
+ GrowableArray<Method*>* collected_profiled_methods;
+ 
+@@ -540,6 +543,12 @@ void before_exit(JavaThread* thread) {
+   // Note: we don't wait until it actually dies.
+   os::terminate_signal_thread();
+ 
++#if INCLUDE_JBOLT
++  if (UseJBolt && JBoltDumpMode) {
++    JBoltManager::dump_order_in_manual();
++  }
++#endif
++
+   print_statistics();
+   Universe::heap()->print_tracing_info();
+ 
+diff --git a/src/hotspot/share/runtime/mutexLocker.cpp b/src/hotspot/share/runtime/mutexLocker.cpp
+index f0747079b..cfb14dd37 100644
+--- a/src/hotspot/share/runtime/mutexLocker.cpp
++++ b/src/hotspot/share/runtime/mutexLocker.cpp
+@@ -295,8 +295,8 @@ void mutex_init() {
+   // lower the JNI oopstorage lock ranks to make them super-special.
+   def(JNIGlobalAlloc_lock          , PaddedMutex  , nonleaf,     true,  Monitor::_safepoint_check_never);
+   def(JNIGlobalActive_lock         , PaddedMutex  , nonleaf-1,   true,  Monitor::_safepoint_check_never);
+-  def(JNIWeakAlloc_lock            , PaddedMutex  , nonleaf,     true,  Monitor::_safepoint_check_never);
+-  def(JNIWeakActive_lock           , PaddedMutex  , nonleaf-1,   true,  Monitor::_safepoint_check_never);
++  def(JNIWeakAlloc_lock            , PaddedMutex  , vmweak,      true,  Monitor::_safepoint_check_never);
++  def(JNIWeakActive_lock           , PaddedMutex  , vmweak-1,    true,  Monitor::_safepoint_check_never);
+   def(JNICritical_lock             , PaddedMonitor, nonleaf,     true,  Monitor::_safepoint_check_always);     // used for JNI critical regions
+   def(AdapterHandlerLibrary_lock   , PaddedMutex  , nonleaf,     true,  Monitor::_safepoint_check_always);
+ 
+diff --git a/src/hotspot/share/runtime/mutexLocker.hpp b/src/hotspot/share/runtime/mutexLocker.hpp
+index 721b3e072..99112cb34 100644
+--- a/src/hotspot/share/runtime/mutexLocker.hpp
++++ b/src/hotspot/share/runtime/mutexLocker.hpp
+@@ -181,7 +181,7 @@ void print_owned_locks_on_error(outputStream* st);
+ char *lock_name(Mutex *mutex);
+ 
+ class MutexLocker: StackObj {
+- private:
++ protected:
+   Monitor * _mutex;
+  public:
+   MutexLocker(Monitor * mutex) {
+@@ -205,6 +205,38 @@ class MutexLocker: StackObj {
+ 
+ };
+ 
++class MonitorLocker: public MutexLocker {
++ protected:
++  Monitor* as_monitor() const {
++    return static_cast<Monitor*>(_mutex);
++  }
++ 
++ public:
++  MonitorLocker(Monitor* monitor) :
++    MutexLocker(monitor) {
++    // Superclass constructor did locking
++    assert(monitor != NULL, "NULL monitor not allowed");
++  }
++ 
++  MonitorLocker(Monitor* monitor, Thread* thread) :
++    MutexLocker(monitor, thread) {
++    // Superclass constructor did locking
++    assert(monitor != NULL, "NULL monitor not allowed");
++  }
++ 
++  bool wait(long timeout = 0) {
++    return as_monitor()->wait(!Monitor::_no_safepoint_check_flag, timeout, !Monitor::_as_suspend_equivalent_flag);
++  }
++ 
++  void notify_all() {
++    as_monitor()->notify_all();
++  }
++ 
++  void notify() {
++    as_monitor()->notify();
++  }
++};
++
+ // for debugging: check that we're already owning this lock (or are at a safepoint)
+ #ifdef ASSERT
+ void assert_locked_or_safepoint(const Monitor * lock);
+diff --git a/src/hotspot/share/runtime/sweeper.cpp b/src/hotspot/share/runtime/sweeper.cpp
+index e92682b6e..82f25c50b 100644
+--- a/src/hotspot/share/runtime/sweeper.cpp
++++ b/src/hotspot/share/runtime/sweeper.cpp
+@@ -46,6 +46,9 @@
+ #include "runtime/vmThread.hpp"
+ #include "utilities/events.hpp"
+ #include "utilities/xmlstream.hpp"
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltManager.hpp"
++#endif
+ 
+ #ifdef ASSERT
+ 
+@@ -375,7 +378,7 @@ void NMethodSweeper::possibly_sweep() {
+   // allocations go to the non-profiled heap and we must be make sure that there is
+   // enough space.
+   double free_percent = 1 / CodeCache::reverse_free_ratio(CodeBlobType::MethodNonProfiled) * 100;
+-  if (free_percent <= StartAggressiveSweepingAt) {
++  if (free_percent <= StartAggressiveSweepingAt || (UseJBolt && JBoltManager::force_sweep())) {
+     do_stack_scanning();
+   }
+ 
+diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp
+index d843651a4..be2ecb437 100644
+--- a/src/hotspot/share/runtime/thread.cpp
++++ b/src/hotspot/share/runtime/thread.cpp
+@@ -138,6 +138,10 @@
+ #if INCLUDE_JFR
+ #include "jfr/jfr.hpp"
+ #endif
++#if INCLUDE_JBOLT
++#include "jbolt/jBoltDcmds.hpp"
++#include "jbolt/jBoltManager.hpp"
++#endif // INCLUDE_JBOLT
+ 
+ // Initialization after module runtime initialization
+ void universe_post_module_init();  // must happen after call_initPhase2
+@@ -3844,6 +3848,14 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
+   // Initialize Java-Level synchronization subsystem
+   ObjectMonitor::Initialize();
+ 
++#if INCLUDE_JBOLT
++  if (UseJBolt) {
++    JBoltManager::init_phase1();
++  } else {
++    JBoltManager::check_arguments_not_set();
++  }
++#endif // INCLUDE_JBOLT
++
+   // Initialize global modules
+   jint status = init_globals();
+   if (status != JNI_OK) {
+@@ -4089,6 +4101,13 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
+     ShouldNotReachHere();
+   }
+ 
++#if INCLUDE_JBOLT
++  register_jbolt_dcmds();
++  if (UseJBolt) {
++    JBoltManager::init_phase2(CATCH);
++  }
++#endif // INCLUDE_JBOLT
++
+   return JNI_OK;
+ }
+ 
+diff --git a/src/hotspot/share/utilities/growableArray.hpp b/src/hotspot/share/utilities/growableArray.hpp
+index 7f2873457..dee43669c 100644
+--- a/src/hotspot/share/utilities/growableArray.hpp
++++ b/src/hotspot/share/utilities/growableArray.hpp
+@@ -30,6 +30,9 @@
+ #include "utilities/debug.hpp"
+ #include "utilities/globalDefinitions.hpp"
+ #include "utilities/ostream.hpp"
++#if INCLUDE_JBOLT
++#include "utilities/sizes.hpp"
++#endif // INCLUDE_JBOLT
+ 
+ // A growable array.
+ 
+@@ -211,6 +214,10 @@ template<class E> class GrowableArray : public GenericGrowableArray {
+                                 // Does nothing for resource and arena objects
+   ~GrowableArray()              { if (on_C_heap()) clear_and_deallocate(); }
+ 
++#if INCLUDE_JBOLT
++  static ByteSize data_offset() { return byte_offset_of(GrowableArray, _data); }
++#endif
++
+   void  clear()                 { _len = 0; }
+   int   length() const          { return _len; }
+   int   max_length() const      { return _max; }
+diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+index 6605ab367..6dd187652 100644
+--- a/src/hotspot/share/utilities/macros.hpp
++++ b/src/hotspot/share/utilities/macros.hpp
+@@ -119,6 +119,18 @@
+ #define NOT_CDS_RETURN_(code) { return code; }
+ #endif // INCLUDE_CDS
+ 
++#ifndef INCLUDE_JBOLT
++#define INCLUDE_JBOLT 1
++#endif
++ 
++#if INCLUDE_JBOLT
++#define JBOLT_ONLY(x) x
++#define NOT_JBOLT(x)
++#else
++#define JBOLT_ONLY(x)
++#define NOT_JBOLT(x) x
++#endif // INCLUDE_JBOLT
++
+ #ifndef INCLUDE_MANAGEMENT
+ #define INCLUDE_MANAGEMENT 1
+ #endif // INCLUDE_MANAGEMENT
+diff --git a/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheCLITestCase.java b/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheCLITestCase.java
+index 1c28a1fea..b5252cf63 100644
+--- a/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheCLITestCase.java
++++ b/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheCLITestCase.java
+@@ -73,7 +73,7 @@ public class CodeCacheCLITestCase {
+          * Verifies that with disabled SegmentedCodeCache PrintCodeCache output
+          * contains only CodeCache's entry.
+          */
+-        NON_SEGMENTED(options -> !options.segmented, EnumSet.of(BlobType.All),
++        NON_SEGMENTED(options -> !options.segmented, EnumSet.copyOf(CodeCacheOptions.NON_SEGMENTED_HEAPS),
+                 CommandLineOptionTest.prepareBooleanFlag(SEGMENTED_CODE_CACHE,
+                         false)),
+         /**
+@@ -82,7 +82,7 @@ public class CodeCacheCLITestCase {
+          * profiled-nmethods heap and non-segmented CodeCache.
+          */
+         NON_TIERED(ONLY_SEGMENTED,
+-                EnumSet.of(BlobType.NonNMethod, BlobType.MethodNonProfiled),
++                EnumSet.copyOf(CodeCacheOptions.SEGMENTED_HEAPS_WO_PROFILED),
+                 CommandLineOptionTest.prepareBooleanFlag(TIERED_COMPILATION,
+                         false)),
+         /**
+@@ -91,7 +91,7 @@ public class CodeCacheCLITestCase {
+          * heaps only.
+          */
+         TIERED_LEVEL_0(SEGMENTED_SERVER,
+-                EnumSet.of(BlobType.NonNMethod, BlobType.MethodNonProfiled),
++                EnumSet.copyOf(CodeCacheOptions.SEGMENTED_HEAPS_WO_PROFILED),
+                 CommandLineOptionTest.prepareBooleanFlag(TIERED_COMPILATION,
+                         true),
+                 CommandLineOptionTest.prepareNumericFlag(TIERED_STOP_AT, 0)),
+@@ -101,7 +101,7 @@ public class CodeCacheCLITestCase {
+          * heaps only.
+          */
+         TIERED_LEVEL_1(SEGMENTED_SERVER,
+-                EnumSet.of(BlobType.NonNMethod, BlobType.MethodNonProfiled),
++                EnumSet.copyOf(CodeCacheOptions.SEGMENTED_HEAPS_WO_PROFILED),
+                 CommandLineOptionTest.prepareBooleanFlag(TIERED_COMPILATION,
+                         true),
+                 CommandLineOptionTest.prepareNumericFlag(TIERED_STOP_AT, 1)),
+@@ -110,7 +110,7 @@ public class CodeCacheCLITestCase {
+          * contain information about all three code heaps.
+          */
+         TIERED_LEVEL_4(SEGMENTED_SERVER,
+-                EnumSet.complementOf(EnumSet.of(BlobType.All)),
++                EnumSet.copyOf(CodeCacheOptions.ALL_SEGMENTED_HEAPS),
+                 CommandLineOptionTest.prepareBooleanFlag(TIERED_COMPILATION,
+                         true),
+                 CommandLineOptionTest.prepareNumericFlag(TIERED_STOP_AT, 4));
+diff --git a/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheOptions.java b/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheOptions.java
+index d5e2f16c8..868d2796d 100644
+--- a/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheOptions.java
++++ b/test/hotspot/jtreg/compiler/codecache/cli/common/CodeCacheOptions.java
+@@ -33,20 +33,27 @@ import java.util.List;
+ public class CodeCacheOptions {
+     public static final String SEGMENTED_CODE_CACHE = "SegmentedCodeCache";
+ 
+-    private static final EnumSet<BlobType> NON_SEGMENTED_HEAPS
++    public static final EnumSet<BlobType> NON_SEGMENTED_HEAPS
+             = EnumSet.of(BlobType.All);
+-    private static final EnumSet<BlobType> ALL_SEGMENTED_HEAPS
+-            = EnumSet.complementOf(NON_SEGMENTED_HEAPS);
+-    private static final EnumSet<BlobType> SEGMENTED_HEAPS_WO_PROFILED
++    public static final EnumSet<BlobType> JBOLT_HEAPS
++            = EnumSet.of(BlobType.MethodJBoltHot, BlobType.MethodJBoltTmp);
++    public static final EnumSet<BlobType> ALL_SEGMENTED_HEAPS
++            = EnumSet.complementOf(union(NON_SEGMENTED_HEAPS, JBOLT_HEAPS));
++    public static final EnumSet<BlobType> ALL_SEGMENTED_HEAPS_WITH_JBOLT
++            = union(ALL_SEGMENTED_HEAPS, JBOLT_HEAPS);
++    public static final EnumSet<BlobType> SEGMENTED_HEAPS_WO_PROFILED
+             = EnumSet.of(BlobType.NonNMethod, BlobType.MethodNonProfiled);
+-    private static final EnumSet<BlobType> ONLY_NON_METHODS_HEAP
++    public static final EnumSet<BlobType> ONLY_NON_METHODS_HEAP
+             = EnumSet.of(BlobType.NonNMethod);
+ 
+     public final long reserved;
+     public final long nonNmethods;
+     public final long nonProfiled;
+     public final long profiled;
++    public final long jboltHot;
++    public final long jboltTmp;
+     public final boolean segmented;
++    public final boolean useJBolt;
+ 
+     public static long mB(long val) {
+         return CodeCacheOptions.kB(val) * 1024L;
+@@ -56,12 +63,21 @@ public class CodeCacheOptions {
+         return val * 1024L;
+     }
+ 
++    public static <E extends Enum<E>> EnumSet<E> union(EnumSet<E> e1, EnumSet<E> e2) {
++        EnumSet<E> res = EnumSet.copyOf(e1);
++        res.addAll(e2);
++        return res;
++    }
++
+     public CodeCacheOptions(long reserved) {
+         this.reserved = reserved;
+         this.nonNmethods = 0;
+         this.nonProfiled = 0;
+         this.profiled = 0;
++        this.jboltHot = 0;
++        this.jboltTmp = 0;
+         this.segmented = false;
++        this.useJBolt = false;
+     }
+ 
+     public CodeCacheOptions(long reserved, long nonNmethods, long nonProfiled,
+@@ -70,7 +86,25 @@ public class CodeCacheOptions {
+         this.nonNmethods = nonNmethods;
+         this.nonProfiled = nonProfiled;
+         this.profiled = profiled;
++        this.jboltHot = 0;
++        this.jboltTmp = 0;
++        this.segmented = true;
++        this.useJBolt = false;
++    }
++ 
++    /**
++     * No tests for JBolt yet as the related VM options are experimental now.
++     */
++    public CodeCacheOptions(long reserved, long nonNmethods, long nonProfiled,
++            long profiled, long jboltHot, long jboltTmp) {
++        this.reserved = reserved;
++        this.nonNmethods = nonNmethods;
++        this.nonProfiled = nonProfiled;
++        this.profiled = profiled;
++        this.jboltHot = jboltHot;
++        this.jboltTmp = jboltTmp;
+         this.segmented = true;
++        this.useJBolt = true;
+     }
+ 
+     public long sizeForHeap(BlobType heap) {
+@@ -83,6 +117,10 @@ public class CodeCacheOptions {
+                 return this.nonProfiled;
+             case MethodProfiled:
+                 return this.profiled;
++            case MethodJBoltHot:
++                return this.jboltHot;
++            case MethodJBoltTmp:
++                return this.jboltTmp;
+             default:
+                 throw new Error("Unknown heap: " + heap.name());
+         }
+@@ -107,14 +145,26 @@ public class CodeCacheOptions {
+                     CommandLineOptionTest.prepareNumericFlag(
+                             BlobType.MethodProfiled.sizeOptionName, profiled));
+         }
++ 
++        if (useJBolt) {
++            Collections.addAll(options,
++                    CommandLineOptionTest.prepareNumericFlag(
++                            BlobType.MethodJBoltHot.sizeOptionName, jboltHot),
++                    CommandLineOptionTest.prepareNumericFlag(
++                            BlobType.MethodJBoltTmp.sizeOptionName, jboltTmp));
++        }
++ 
+         return options.toArray(new String[options.size()]);
+     }
+ 
+     public CodeCacheOptions mapOptions(EnumSet<BlobType> involvedCodeHeaps) {
+         if (involvedCodeHeaps.isEmpty()
+                 || involvedCodeHeaps.equals(NON_SEGMENTED_HEAPS)
+-                || involvedCodeHeaps.equals(ALL_SEGMENTED_HEAPS)) {
++                || involvedCodeHeaps.equals(ALL_SEGMENTED_HEAPS_WITH_JBOLT)) {
+             return this;
++        } else if (involvedCodeHeaps.equals(ALL_SEGMENTED_HEAPS)) {
++            return new CodeCacheOptions(reserved, nonNmethods,
++                    nonProfiled + jboltHot + jboltTmp, profiled);
+         } else if (involvedCodeHeaps.equals(SEGMENTED_HEAPS_WO_PROFILED)) {
+             return new CodeCacheOptions(reserved, nonNmethods,
+                     profiled + nonProfiled, 0L);
+diff --git a/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltDumpModeTest.java b/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltDumpModeTest.java
+new file mode 100644
+index 000000000..f85c86542
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltDumpModeTest.java
+@@ -0,0 +1,187 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/*
++ * @test
++ * @summary Test JBolt dump mode functions.
++ * @library /test/lib
++ * @requires vm.flagless
++ *
++ * @run driver compiler.codecache.jbolt.JBoltDumpModeTest
++ */
++
++package compiler.codecache.jbolt;
++
++import java.io.File;
++import java.io.IOException;
++import jdk.test.lib.process.OutputAnalyzer;
++import jdk.test.lib.process.ProcessTools;
++import jdk.test.lib.Utils;
++
++public class JBoltDumpModeTest {
++   public static final String SRC_DIR = Utils.TEST_SRC;
++   public static final String ORDER_FILE = SRC_DIR + "/order.log";
++
++   private static void createOrderFile() {
++     try {
++         File order = new File(ORDER_FILE);
++         if (!order.exists()) {
++         order.createNewFile();
++         }
++     }
++     catch (IOException e) {
++         e.printStackTrace();
++     }
++   }
++
++   private static void clearOrderFile() {
++     File order = new File(ORDER_FILE);
++     if (order.exists()) {
++       order.delete();
++     }
++   }
++
++   private static void OrderFileShouldExist() throws Exception {
++     File order = new File(ORDER_FILE);
++     if (order.exists()) {
++       order.delete();
++     }
++     else {
++       throw new RuntimeException(ORDER_FILE + " doesn't exist as expect.");
++     }
++   }
++
++   private static void OrderFileShouldNotExist() throws Exception {
++     File order = new File(ORDER_FILE);
++     if (order.exists()) {
++       throw new RuntimeException(ORDER_FILE + " exists while expect not.");
++     }
++   }
++
++   private static void testNormalUse() throws Exception {
++     ProcessBuilder pb1 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:JBoltOrderFile=" + ORDER_FILE,
++       "-XX:+JBoltDumpMode",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     ProcessBuilder pb2 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:JBoltOrderFile=" + ORDER_FILE,
++       "-XX:+JBoltDumpMode",
++       "-XX:StartFlightRecording",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     ProcessBuilder pb3 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:JBoltOrderFile=" + ORDER_FILE,
++       "-XX:+JBoltDumpMode",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     clearOrderFile();
++
++     String stdout;
++
++     OutputAnalyzer out1 = new OutputAnalyzer(pb1.start());
++     stdout = out1.getStdout();
++     if (!stdout.contains("JBolt in dump mode now, start a JFR recording named \"jbolt-jfr\".")) {
++         throw new RuntimeException(stdout);
++     }
++     out1.shouldHaveExitValue(0);
++     OrderFileShouldExist();
++
++     OutputAnalyzer out2 = new OutputAnalyzer(pb2.start());
++     stdout = out2.getStdout();
++     if (!stdout.contains("JBolt in dump mode now, start a JFR recording named \"jbolt-jfr\".")) {
++         throw new RuntimeException(stdout);
++     }
++     out2.shouldHaveExitValue(0);
++     OrderFileShouldExist();
++
++     createOrderFile();
++     OutputAnalyzer out3 = new OutputAnalyzer(pb3.start());
++     stdout = out3.getStdout();
++     if (!stdout.contains("JBoltOrderFile to dump already exists and will be overwritten:")) {
++         throw new RuntimeException(stdout);
++     }
++     out3.shouldHaveExitValue(0);
++     OrderFileShouldExist();
++   }
++
++   private static void testUnabletoRun() throws Exception {
++     ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:JBoltOrderFile=" + ORDER_FILE,
++       "-XX:+JBoltDumpMode",
++       "-XX:-FlightRecorder",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     clearOrderFile();
++
++     String stdout;
++     OutputAnalyzer out = new OutputAnalyzer(pb.start());
++
++     stdout = out.getStdout();
++     if (!stdout.contains("JBolt depends on JFR!")) {
++         throw new RuntimeException(stdout);
++     }
++     OrderFileShouldNotExist();
++   }
++
++   private static void testFatalError() throws Exception {
++     ProcessBuilder pb = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:JBoltOrderFile=" + ORDER_FILE,
++       "-XX:+JBoltDumpMode",
++       "-XX:foo",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     clearOrderFile();
++
++     OutputAnalyzer out = new OutputAnalyzer(pb.start());
++
++     out.stderrShouldContain("Could not create the Java Virtual Machine");
++     OrderFileShouldNotExist();
++   }
++
++   public static void main(String[] args) throws Exception {
++     testNormalUse();
++     testUnabletoRun();
++     testFatalError();
++   }
++}
+\ No newline at end of file
+diff --git a/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltReschedulingTest.java b/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltReschedulingTest.java
+new file mode 100644
+index 000000000..549ae3122
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltReschedulingTest.java
+@@ -0,0 +1,167 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/*
++ * @test
++ * @summary Test JBolt timing rescheduling functions.
++ * @library /test/lib
++ * @requires vm.flagless
++ *
++ * @run driver compiler.codecache.jbolt.JBoltReschedulingTest
++ */
++
++package compiler.codecache.jbolt;
++
++import java.io.File;
++import java.io.IOException;
++import jdk.test.lib.process.OutputAnalyzer;
++import jdk.test.lib.process.ProcessTools;
++import jdk.test.lib.Utils;
++
++public class JBoltReschedulingTest {
++    public static final int LONG_LENGTH = 1025;
++
++    private static void testNormalUse() throws Exception {
++      ProcessBuilder pb1 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:+UseJBolt",
++        "-XX:JBoltRescheduling=07:30,14:30,21:30",
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      ProcessBuilder pb2 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:+UseJBolt",
++        "-XX:JBoltRescheduling=00:30,01:30,02:30,03:30,03:30,04:30,05:30,06:30,07:30,08:30,09:30,10:30",
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      String stdout;
++
++      OutputAnalyzer out1 = new OutputAnalyzer(pb1.start());
++      stdout = out1.getStdout();
++      if (!stdout.contains("Set time trigger at 07:30") && !stdout.contains("Set time trigger at 14:30") && !stdout.contains("Set time trigger at 21:30")) {
++          throw new RuntimeException(stdout);
++      }
++      out1.shouldHaveExitValue(0);
++
++      OutputAnalyzer out2 = new OutputAnalyzer(pb2.start());
++      stdout = out2.getStdout();
++      // 03:30 is duplicate and 10:30 above max time length(10)
++      if (!stdout.contains("Set time trigger at 09:30") || stdout.contains("Set time trigger at 10:30")) {
++        throw new RuntimeException(stdout);
++      }
++      out2.shouldHaveExitValue(0);
++    }
++
++    private static void testErrorCases() throws Exception {
++      ProcessBuilder pb1 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:JBoltRescheduling=07:30,14:30,21:30",
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      StringBuilder sb = new StringBuilder(LONG_LENGTH);
++      for (int i = 0; i < LONG_LENGTH; ++i) {
++        sb.append('a');
++      }
++      String long_str = sb.toString();
++      ProcessBuilder pb2 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:+UseJBolt",
++        "-XX:JBoltRescheduling=" + long_str,
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      ProcessBuilder pb3 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:+UseJBolt",
++        "-XX:JBoltRescheduling=12:303",
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      ProcessBuilder pb4 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:+UseJBolt",
++        "-XX:JBoltRescheduling=1:30",
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      ProcessBuilder pb5 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:+UseJBolt",
++        "-XX:JBoltRescheduling=12.30",
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      ProcessBuilder pb6 = ProcessTools.createJavaProcessBuilder(
++        "-XX:+UnlockExperimentalVMOptions",
++        "-XX:+UseJBolt",
++        "-XX:JBoltRescheduling=24:61",
++        "-Xlog:jbolt*=trace",
++        "--version"
++      );
++
++      OutputAnalyzer out1 = new OutputAnalyzer(pb1.start());
++
++      out1.stdoutShouldContain("Do not set VM option JBoltRescheduling without UseJBolt enabled.");
++      out1.shouldHaveExitValue(1);
++
++      OutputAnalyzer out2 = new OutputAnalyzer(pb2.start());
++
++      out2.stdoutShouldContain("JBoltRescheduling is too long");
++      out2.shouldHaveExitValue(1);
++
++      OutputAnalyzer out3 = new OutputAnalyzer(pb3.start());
++
++      out3.stdoutShouldContain("Invalid time 12:303 in JBoltRescheduling");
++      out3.shouldHaveExitValue(1);
++
++      OutputAnalyzer out4 = new OutputAnalyzer(pb4.start());
++
++      out4.stdoutShouldContain("Invalid time 1:30 in JBoltRescheduling");
++      out4.shouldHaveExitValue(1);
++
++      OutputAnalyzer out5 = new OutputAnalyzer(pb5.start());
++
++      out5.stdoutShouldContain("Invalid time 12.30 in JBoltRescheduling");
++      out5.shouldHaveExitValue(1);
++
++      OutputAnalyzer out6 = new OutputAnalyzer(pb6.start());
++
++      out6.stdoutShouldContain("Invalid time 24:61 in JBoltRescheduling");
++      out6.shouldHaveExitValue(1);
++    }
++
++    public static void main(String[] args) throws Exception {
++      testNormalUse();
++      testErrorCases();
++    }
++}
+\ No newline at end of file
+diff --git a/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltVMOptionsTest.java b/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltVMOptionsTest.java
+new file mode 100644
+index 000000000..d8fddf9bf
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/codecache/jbolt/JBoltVMOptionsTest.java
+@@ -0,0 +1,291 @@
++/*
++ * Copyright (c) 2020, 2024, Huawei Technologies Co., Ltd. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++/*
++ * @test
++ * @summary Test JBolt VM options.
++ * @library /test/lib
++ * @requires vm.flagless
++ *
++ * @run driver compiler.codecache.jbolt.JBoltVMOptionsTest
++ */
++
++package compiler.codecache.jbolt;
++
++import java.io.File;
++import jdk.test.lib.process.OutputAnalyzer;
++import jdk.test.lib.process.ProcessTools;
++import jdk.test.lib.Utils;
++
++public class JBoltVMOptionsTest {
++   public static final String SRC_DIR = Utils.TEST_SRC;
++   public static final String TEMP_FILE = SRC_DIR + "/tmp.log";
++
++   public static void main(String[] args) throws Exception {
++     test1();
++     test2();
++     test3();
++     test4();
++   }
++
++   private static void clearTmpFile() {
++     File tmp = new File(TEMP_FILE);
++     if (tmp.exists()) {
++       tmp.delete();
++     }
++   }
++
++   private static void test1() throws Exception {
++     ProcessBuilder pb1 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltDumpMode",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pb2 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pb3 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-XX:+JBoltDumpMode",
++       "-XX:JBoltOrderFile=" + SRC_DIR + "/o1.log",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pb4 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:JBoltOrderFile=" + TEMP_FILE,
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     OutputAnalyzer out1 = new OutputAnalyzer(pb1.start());
++     OutputAnalyzer out2 = new OutputAnalyzer(pb2.start());
++     OutputAnalyzer out3 = new OutputAnalyzer(pb3.start());
++     OutputAnalyzer out4 = new OutputAnalyzer(pb4.start());
++
++     String stdout;
++
++     stdout = out1.getStdout();
++     if (!stdout.contains("JBoltOrderFile is not set!")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = out2.getStdout();
++     if (!stdout.contains("JBoltOrderFile is not set!")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = out3.getStdout();
++     if (!stdout.contains("Do not set both JBoltDumpMode and JBoltLoadMode!")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = out4.getStdout();
++     if (!stdout.contains("JBoltOrderFile is ignored because it is in auto mode.")) {
++       throw new RuntimeException(stdout);
++     }
++   }
++
++   private static void test2() throws Exception {
++     ProcessBuilder pb1 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+PrintFlagsFinal",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pb2 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltDumpMode",
++       "-XX:JBoltOrderFile=" + TEMP_FILE,
++       "-XX:+PrintFlagsFinal",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pb3 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-XX:JBoltOrderFile=" + SRC_DIR + "/o1.log",
++       "-XX:+PrintFlagsFinal",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     OutputAnalyzer out1 = new OutputAnalyzer(pb1.start());
++     OutputAnalyzer out2 = new OutputAnalyzer(pb2.start());
++     OutputAnalyzer out3 = new OutputAnalyzer(pb3.start());
++
++     String stdout;
++
++     stdout = out1.getStdout().replaceAll(" +", "");
++     if (!stdout.contains("JBoltDumpMode=false") || !stdout.contains("JBoltLoadMode=false")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = out2.getStdout().replaceAll(" +", "");
++     if (!stdout.contains("JBoltDumpMode=true") || !stdout.contains("JBoltLoadMode=false")) {
++       throw new RuntimeException(stdout);
++     }
++
++     clearTmpFile();
++
++     stdout = out3.getStdout().replaceAll(" +", "");
++     if (!stdout.contains("JBoltDumpMode=false") || !stdout.contains("JBoltLoadMode=true")) {
++       throw new RuntimeException(stdout);
++     }
++   }
++
++   private static void test3() throws Exception {
++     ProcessBuilder pbF0 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-XX:JBoltOrderFile=" + TEMP_FILE,
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pbF1 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-XX:JBoltOrderFile=" + SRC_DIR + "/o1.log",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pbF2 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-XX:JBoltOrderFile=" + SRC_DIR + "/o2.log",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pbF3 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-XX:JBoltOrderFile=" + SRC_DIR + "/o3.log",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pbF4 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+UseJBolt",
++       "-XX:+JBoltLoadMode",
++       "-XX:JBoltOrderFile=" + SRC_DIR + "/o4.log",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     OutputAnalyzer outF0 = new OutputAnalyzer(pbF0.start());
++     OutputAnalyzer outF1 = new OutputAnalyzer(pbF1.start());
++     OutputAnalyzer outF2 = new OutputAnalyzer(pbF2.start());
++     OutputAnalyzer outF3 = new OutputAnalyzer(pbF3.start());
++     OutputAnalyzer outF4 = new OutputAnalyzer(pbF4.start());
++
++     String stdout;
++
++     stdout = outF0.getStdout();
++     if (!stdout.contains("JBoltOrderFile does not exist or cannot be accessed!")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = outF1.getStdout();
++     if (!stdout.contains("Wrong format of JBolt order line! line=\"X 123 aa bb cc\".")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = outF2.getStdout();
++     if (!stdout.contains("Wrong format of JBolt order line! line=\"M aa/bb/C dd ()V\".")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = outF3.getStdout();
++     if (!stdout.contains("Duplicated method: {aa/bb/CC dd ()V}!")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = outF4.getStdout();
++     if (stdout.contains("Error occurred during initialization of VM")) {
++       throw new RuntimeException(stdout);
++     }
++     outF4.shouldHaveExitValue(0);
++
++     clearTmpFile();
++   }
++
++   private static void test4() throws Exception {
++     ProcessBuilder pb1 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+JBoltDumpMode",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pb2 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:+JBoltLoadMode",
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++     ProcessBuilder pb3 = ProcessTools.createJavaProcessBuilder(
++       "-XX:+UnlockExperimentalVMOptions",
++       "-XX:JBoltOrderFile=" + TEMP_FILE,
++       "-Xlog:jbolt*=trace",
++       "--version"
++     );
++
++     OutputAnalyzer out1 = new OutputAnalyzer(pb1.start());
++     OutputAnalyzer out2 = new OutputAnalyzer(pb2.start());
++     OutputAnalyzer out3 = new OutputAnalyzer(pb3.start());
++
++     String stdout;
++
++     stdout = out1.getStdout();
++     if (!stdout.contains("Do not set VM option JBoltDumpMode without UseJBolt enabled.")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = out2.getStdout();
++     if (!stdout.contains("Do not set VM option JBoltLoadMode without UseJBolt enabled.")) {
++       throw new RuntimeException(stdout);
++     }
++
++     stdout = out3.getStdout();
++     if (!stdout.contains("Do not set VM option JBoltOrderFile without UseJBolt enabled.")) {
++       throw new RuntimeException(stdout);
++     }
++
++     clearTmpFile();
++   }
++}
+\ No newline at end of file
+diff --git a/test/hotspot/jtreg/compiler/codecache/jbolt/o1.log b/test/hotspot/jtreg/compiler/codecache/jbolt/o1.log
+new file mode 100644
+index 000000000..9d7e8fb8e
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/codecache/jbolt/o1.log
+@@ -0,0 +1,2 @@
++M 123 aa/bb/C dd ()V
++X 123 aa bb cc
+\ No newline at end of file
+diff --git a/test/hotspot/jtreg/compiler/codecache/jbolt/o2.log b/test/hotspot/jtreg/compiler/codecache/jbolt/o2.log
+new file mode 100644
+index 000000000..ef348a6ab
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/codecache/jbolt/o2.log
+@@ -0,0 +1,2 @@
++M aa/bb/C dd ()V
++M 123 aa/bb/CC dd ()V
+\ No newline at end of file
+diff --git a/test/hotspot/jtreg/compiler/codecache/jbolt/o3.log b/test/hotspot/jtreg/compiler/codecache/jbolt/o3.log
+new file mode 100644
+index 000000000..fe6906b47
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/codecache/jbolt/o3.log
+@@ -0,0 +1,4 @@
++# this is a comment
++C
++M 123 aa/bb/CC dd ()V
++M 123 aa/bb/CC dd ()V
+\ No newline at end of file
+diff --git a/test/hotspot/jtreg/compiler/codecache/jbolt/o4.log b/test/hotspot/jtreg/compiler/codecache/jbolt/o4.log
+new file mode 100644
+index 000000000..13e96dbab
+--- /dev/null
++++ b/test/hotspot/jtreg/compiler/codecache/jbolt/o4.log
+@@ -0,0 +1,12 @@
++M 123 aa/bb/CC dd ()V
++# asdfadsfadfs
++C
++M 456 aa/bb/CC ddd ()V
++M 456 aa/bb/CCC dd ()V
++
++C
++
++
++
++
++M 456 aa/bb/CCCCCC ddddddd ()V
+diff --git a/test/jdk/java/lang/management/MemoryMXBean/MemoryTest.java b/test/jdk/java/lang/management/MemoryMXBean/MemoryTest.java
+index 499297ad2..fd37b73d4 100644
+--- a/test/jdk/java/lang/management/MemoryMXBean/MemoryTest.java
++++ b/test/jdk/java/lang/management/MemoryMXBean/MemoryTest.java
+@@ -94,7 +94,7 @@ public class MemoryTest {
+         expectedMaxNumPools[HEAP] = expectedNumPools;
+ 
+         expectedMinNumPools[NONHEAP] = 2;
+-        expectedMaxNumPools[NONHEAP] = 5;
++        expectedMaxNumPools[NONHEAP] = 7;
+ 
+         checkMemoryPools();
+         checkMemoryManagers();
+diff --git a/test/lib/sun/hotspot/code/BlobType.java b/test/lib/sun/hotspot/code/BlobType.java
+index 4b5a1e11d..4a7c87334 100644
+--- a/test/lib/sun/hotspot/code/BlobType.java
++++ b/test/lib/sun/hotspot/code/BlobType.java
+@@ -46,8 +46,24 @@ public enum BlobType {
+                     || type == BlobType.MethodNonProfiled;
+         }
+     },
++    // Execution hot non-profiled nmethods
++    MethodJBoltHot(2, "CodeHeap 'jbolt hot nmethods'", "JBoltCodeHeapSize") {
++        @Override
++        public boolean allowTypeWhenOverflow(BlobType type) {
++            return super.allowTypeWhenOverflow(type)
++                    || type == BlobType.MethodNonProfiled;
++        }
++    },
++    // Execution tmp non-profiled nmethods
++    MethodJBoltTmp(3, "CodeHeap 'jbolt tmp nmethods'", "JBoltCodeHeapSize") {
++        @Override
++        public boolean allowTypeWhenOverflow(BlobType type) {
++            return super.allowTypeWhenOverflow(type)
++                    || type == BlobType.MethodNonProfiled;
++        }
++    },
+     // Non-nmethods like Buffers, Adapters and Runtime Stubs
+-    NonNMethod(2, "CodeHeap 'non-nmethods'", "NonNMethodCodeHeapSize") {
++    NonNMethod(4, "CodeHeap 'non-nmethods'", "NonNMethodCodeHeapSize") {
+         @Override
+         public boolean allowTypeWhenOverflow(BlobType type) {
+             return super.allowTypeWhenOverflow(type)
+@@ -56,7 +72,7 @@ public enum BlobType {
+         }
+     },
+     // All types (No code cache segmentation)
+-    All(3, "CodeCache", "ReservedCodeCacheSize");
++    All(5, "CodeCache", "ReservedCodeCacheSize");
+ 
+     public final int id;
+     public final String sizeOptionName;
+@@ -99,6 +115,10 @@ public enum BlobType {
+             // there is no MethodProfiled in non tiered world or pure C1
+             result.remove(MethodProfiled);
+         }
++        if (!whiteBox.getBooleanVMFlag("UseJBolt") || whiteBox.getBooleanVMFlag("JBoltDumpMode")) {
++            result.remove(MethodJBoltHot);
++            result.remove(MethodJBoltTmp);
++        }
+         return result;
+     }
+ 
+-- 
+2.47.1
+
diff --git a/add-sw_64-support.patch b/add-sw_64-support.patch
new file mode 100644
index 0000000000000000000000000000000000000000..3d665e53cf3bc19b881c81b503f70812fbb991a5
--- /dev/null
+++ b/add-sw_64-support.patch
@@ -0,0 +1,88728 @@
+diff --git a/Changelog.md b/Changelog.md
+new file mode 100644
+index 0000000000..506556df23
+--- /dev/null
++++ b/Changelog.md
+@@ -0,0 +1,27 @@
++swjdk11.0.15-sw1.3.0(20240328)版主要修改
++1.8A与6B代码同源，通过读取cpuinfo自动适配8A
++2.实现部分Core4新增指令优化
++
++swjdk11.0.15-sw1.2.0(20240304)版主要修改：
++1.按照6B平台本地方法栈帧结构，实现NMT及JFR;
++2.解决地址动态patch非原子性引起的偶发sigill错，使用SafePatch选项控制，默认开启，对性能有约2%的影响;
++3.解决SPECjvm2008 sunflow偶发错;
++4.解决jdk stram NullPointerException错，添加2处memb指令(UseNecessaryMembar，使得6B平台SPECjbb2015的Max-Jops及critical-Jops下降约7%);
++5.解决test/jdk/java/lang/Math/FusedMultiplyAddTests.java在-Xcomp选项下的错误;
++6.增加ReservedCodeCacheSize大小到240M，防止发生性能异常下降;
++
++swjdk11.0.15-sw1.1.0-SP.1(20230710)版主要修改：
++1.实现兼容8A的锁序列（commit id:1a11503de47d69d52702b357eaf6f3782399443b）
++
++==========================================================================
++swjdk11.0.15-sw1.0.0版主要修改：
++
++1.按照《申威基础软件版本标识规范v1.0.0-beta.4》要求修改java -version 格式;
++
++2.解决使用gcc1030编译的jdk在自举及cpubench测试报段违例问题;
++
++3.feature新增jni-check,cmsgc,jvmti,serivices;
++
++4.默认使能UseCompressedOops;
++
++5.实现UseCRC32Intrinsics优化;
+diff --git a/README b/README
+new file mode 100644
+index 0000000000..4f57f9aa9d
+--- /dev/null
++++ b/README
+@@ -0,0 +1,21 @@
++Read READYJ.md for compilation guild!
++
++Welcome to the JDK!
++===================
++
++For information about building the JDK, including how to retrieve all
++of the source code, please see either of these files:
++
++  * doc/building.html   (html version)
++  * doc/building.md     (markdown version)
++
++See http://openjdk.java.net/ for more information about the OpenJDK
++Community and the JDK.
++
++===================
++
++如果你不能在6B服务器上顺利执行`bash native_configure release`命令，观察生成的config.log，如果出现 unrecongnized command line option '-mgprel-size=32'，说明编译使用的gcc版本较低。
++
++解决办法：
++
++修改native_configure文件，删除2处 "-mgprel-size=32"，删除1处 "-Wl,-no-relax"
+diff --git a/READSW.md b/READSW.md
+new file mode 100644
+index 0000000000..46cb3d9ace
+--- /dev/null
++++ b/READSW.md
+@@ -0,0 +1,12 @@
++## build whole jdk
++copy a jdk to root dir, named like jdk-sw-c
++native compile:
++1.export PATH=./jdk-sw-c/bin:$PATH
++2.bash native_configure release
++3.make CONF=linux-sw64-normal-custom-release images
++
++If the build environment no git, you must do "bash version_patch.sh" on the environment has git to get the git id,
++then copy the swjdk11u src to the target envirinment.
++ 1. bash native_configure  release(slowdebug)
++ 2. make all
++
+diff --git a/READYJ.md b/READYJ.md
+new file mode 100644
+index 0000000000..ef3681cc48
+--- /dev/null
++++ b/READYJ.md
+@@ -0,0 +1,16 @@
++## only build hotspot
++use cross compile:
++使用bash cross_configure slowdebug做配置
++
++copy a jdk to root dir, named like jdk-sw-c
++edit cross_compile to modify remote_ip, then
++使用bash cross_compile slowdebug jdk-sw-c 做编译
++
++## build whole jdk
++use native compile:
++copy a swjdk11 to the root of this repos and name it jdk-sw-c.
++>> export PATH=./jdk-sw-c/bin:$PATH
++>> bash native_configure release
++>> make CONF=linux-sw64-normal-custom-release images
++on x86:
++>> cp -r build/linux-sw64-normal-custom-release/images/jdk jdk-sw-c
+diff --git a/UPGRADE_GUIDE.md b/UPGRADE_GUIDE.md
+new file mode 100755
+index 0000000000..14850a4d79
+--- /dev/null
++++ b/UPGRADE_GUIDE.md
+@@ -0,0 +1,49 @@
++# 升级指南
++
++> 假设从版本a升级到版本b，参考x86代码。
++
++## S1: 建立x86的b版本和sw的a版本代码查看环境
++
++## S2: 在sw版本上如下操作
++
++1. git merge b
++
++2. 可能有冲突，就解决冲突。这个过程中可能要阅读理解b版本的x86冲突代码
++
++3. 解决后将sw版本与刚才的x86的b版本作目录级对比，主要关注三个目录：make、src和test。
++如下文件目前的确存在差异，可能需要仔细检查：
++
++src/hotspot/os/linux/os_linux.cpp
++src/hotspot/share/asm/codeBuffer.cpp
++src/hotspot/share/code/codeBlob.cpp
++src/hotspot/share/interpreter/abstractInterpreter.cpp src/hotspot/share/interpreter/abstractInterpreter.hpp src/hotspot/share/interpreter/interpreterRuntime.cpp src/hotspot/share/interpreter/interpreterRuntime.hpp src/hotspot/share/interpreter/templateInterpreterGenerator.cpp src/hotspot/share/interpreter/templateInterpreterGenerator.hpp src/hotspot/share/jfr/utilities/jfrBigEndian.hpp 
++src/hotspot/share/oops/method.cpp
++src/hotspot/share/runtime/arguments.cpp  src/hotspot/share/runtime/safepointMechanism.cpp src/hotspot/share/runtime/sharedRuntime.cpp src/hotspot/share/runtime/sharedRuntime.hpp src/hotspot/share/runtime/sharedRuntimeTrig.cpp src/hotspot/share/runtime/stubRoutines.cpp 
++src/hotspot/share/runtime/thread.cpp
++src/hotspot/share/runtime/vm_version.cpp
++src/hotspot/share/utilities/macros.hpp
++
++make/autoconf/build-aux/autoconf-config.guess
++make/autoconf/build-performance.m4
++make/autoconf/flags-cflags.m4
++make/autoconf/hotspot.m4
++make/autoconf/jdk-version.m4
++make/autoconf/platform.m4
++make/CompileJavaModules.gmk
++make/conf/jib-profiles.js
++make/devkit/Tools.gmk
++make/gensrc/Gensrc-jdk.internal.vm.compiler.gm
++make/hotspot/ide/CreateVSProject.gmk
++make/hotspot/lib/JvmFeatures.gmk
++make/launcher/Launcher-jdk.aot.gmk
++make/RunTestsPrebuilt.gmk
++make/test/JtregGraalUnit.gmk
++
++## S3: 在x86的b版本上做如下操作
++
++在git log中找到a版本，然后compare with current.
++
++关注如下目录的改动，在基本理解x86升级目的的基础上对sw相应目录做相应升级：
++
++src/hotspot/cpu/x86, src/hotspot/os_cpu/linux_x86
++
+diff --git a/cross_compile b/cross_compile
+new file mode 100755
+index 0000000000..2edf249335
+--- /dev/null
++++ b/cross_compile
+@@ -0,0 +1,20 @@
++#!/bin/bash
++level=${1?usage: $0 release/slowdebug images-jdk}
++dest=${2?usage: $0 release/slowdebug images-jdk}
++
++#level=release
++#dest=images-jdk-release
++#level=slowdebug
++#dest=images-jdk
++
++variant=custom
++remote_ip=172.16.130.191
++#remote_ip=172.16.12.167
++
++#make LOG="debug" CONF=linux-sw64-normal-$variant-$level jdk &&\
++make LOG="debug" CONF=linux-sw64-normal-$variant-$level hotspot && \
++echo -e "\n\n>>>>>build success<<<<<\n\n" &&\
++cp build/linux-sw64-normal-$variant-$level/support/modules_libs/java.base/server/libjvm.so  $dest/lib/server/ && \
++echo -e "\n\n>>>>>copy success<<<<<\n\n" && \
++ping -c 1 -W 1 $remote_ip && \
++	ssh lsp@$remote_ip "$(pwd)/$dest/bin/java -XX:+PrintCompilation -Xcomp -version"
+diff --git a/cross_configure b/cross_configure
+new file mode 100755
+index 0000000000..0a176921ef
+--- /dev/null
++++ b/cross_configure
+@@ -0,0 +1,36 @@
++#!/bin/bash
++level=${1?usage: $0 release/slowdebug}
++#crosscompiler=swgcc710-ali-cross
++crosscompiler=swgcc710-6a-cross 
++# for c version (i.e. 9916)
++  # --with-jvm-variants     JVM variants (separated by commas) to build
++  #                         (server,client,minimal,core,zero,custom) [server]
++  # --with-jvm-features    "aot cds cmsgc compiler1 compiler2 epsilongc g1gc graal jfr \
++  #                         jni-check jvmci jvmti management nmt parallelgc serialgc services vm-structs zgc"
++  # --enable-aot=no \
++  # --enable-cds=no \
++  # --disable-ccache \
++  builddate=`date +%Y-%m-%d`
++  buildtag=sw1.3.0
++  bash configure  \
++    --with-zlib=bundled \
++    --with-native-debug-symbols=internal \
++    --with-debug-level=$level \
++    --with-jvm-variants=custom \
++    --with-jvm-features=serialgc,g1gc,parallelgc,compiler2,management,nmt,jvmti,services,cmsgc,jfr\
++    --enable-sjavac=no \
++    --with-version-date=$builddate \
++    --with-version-opt=$buildtag \
++    --with-version-pre=no \
++    --disable-javac-server \
++    --with-extra-cflags="  -mieee -Wno-error=maybe-uninitialized -Wno-error=deprecated-declarations -Wno-error=type-limits -Wno-error=format-security -Wno-error=conversion-null -Wno-error=sign-compare -Wno-error=int-to-pointer-cast" \
++    --with-extra-cxxflags="-mieee -Wno-error=maybe-uninitialized -Wno-error=deprecated-declarations -Wno-error=type-limits -Wno-error=format-security -Wno-error=conversion-null -Wno-error=sign-compare -Wno-error=int-to-pointer-cast" \
++    --with-extra-ldflags=" -mieee" \
++    --openjdk-target=sw_64-unknown-linux-gnu \
++    --with-devkit=/usr/sw/$crosscompiler/usr/ \
++    --x-includes=/usr/sw/$crosscompiler/usr/include \
++    --x-libraries=/usr/sw/$crosscompiler/usr/lib \
++    --with-freetype-include=/usr/sw/$crosscompiler/usr/include/freetype2 \
++    --with-freetype-lib=/usr/sw/$crosscompiler/usr/lib/sw_64-linux-gnu \
++    --disable-warnings-as-errors
++
+diff --git a/idegen.sh b/idegen.sh
+new file mode 100644
+index 0000000000..faacaf6a2c
+--- /dev/null
++++ b/idegen.sh
+@@ -0,0 +1,6 @@
++#!/bin/bash
++rep=$(pwd)
++echo $rep
++sed -i "s!/home/yj/work/swjdk/afu11u-yj!$rep!g" compile_commands.json
++git update-index --assume-unchanged compile_commands.json
++
+diff --git a/make/CompileJavaModules.gmk b/make/CompileJavaModules.gmk
+index 46fb9b4219..b1ec753d94 100644
+--- a/make/CompileJavaModules.gmk
++++ b/make/CompileJavaModules.gmk
+@@ -430,6 +430,7 @@ jdk.internal.vm.ci_ADD_JAVAC_FLAGS += -parameters -Xlint:-exports -XDstringConca
+ 
+ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.sw64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \
+@@ -437,6 +438,7 @@ jdk.internal.vm.compiler_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sw64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler \
+@@ -456,6 +458,7 @@ jdk.internal.vm.compiler_EXCLUDES += \
+     org.graalvm.compiler.api.directives.test \
+     org.graalvm.compiler.api.test \
+     org.graalvm.compiler.asm.aarch64.test \
++    org.graalvm.compiler.asm.sw64.test \
+     org.graalvm.compiler.asm.amd64.test \
+     org.graalvm.compiler.asm.sparc.test \
+     org.graalvm.compiler.asm.test \
+@@ -493,6 +496,7 @@ jdk.internal.vm.compiler_EXCLUDES += \
+ 
+ jdk.aot_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler,jdk.aot \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.sw64=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler,jdk.aot \
+@@ -500,6 +504,7 @@ jdk.aot_ADD_JAVAC_FLAGS += -parameters -XDstringConcat=inline \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler,jdk.aot \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sw64=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler,jdk.aot \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.meta=jdk.internal.vm.compiler,jdk.aot \
+diff --git a/make/RunTestsPrebuilt.gmk b/make/RunTestsPrebuilt.gmk
+index 918140fb5d..fb937b9c34 100644
+--- a/make/RunTestsPrebuilt.gmk
++++ b/make/RunTestsPrebuilt.gmk
+@@ -225,6 +225,10 @@ endif
+ # Check number of cores and memory in MB
+ ifeq ($(OPENJDK_TARGET_OS), linux)
+   NUM_CORES := $(shell $(CAT) /proc/cpuinfo  | $(GREP) -c processor)
++  # ZHJ20170103 for SW64
++  if test "$NUM_CORES" -eq "0"; then
++    NUM_CORES=`cat /proc/cpuinfo  | grep "cpus active" | awk  '{print $4}'`
++  fi
+   MEMORY_SIZE := $(shell \
+       $(EXPR) `$(CAT) /proc/meminfo | $(GREP) MemTotal | $(AWK) '{print $$2}'` / 1024 \
+   )
+diff --git a/make/autoconf/build-aux/autoconf-config.guess b/make/autoconf/build-aux/autoconf-config.guess
+index 15ee438926..05659616a5 100644
+--- a/make/autoconf/build-aux/autoconf-config.guess
++++ b/make/autoconf/build-aux/autoconf-config.guess
+@@ -907,6 +907,9 @@ EOF
+ 	if test "$?" = 0 ; then LIBC="libc1" ; else LIBC="" ; fi
+ 	echo ${UNAME_MACHINE}-unknown-linux-gnu${LIBC}
+ 	exit ;;
++    sw_64:Linux:*:*)
++	echo ${UNAME_MACHINE}-unknown-linux-gnu
++	exit ;;
+     arm*:Linux:*:*)
+ 	eval $set_cc_for_build
+ 	if echo __ARM_EABI__ | $CC_FOR_BUILD -E - 2>/dev/null \
+diff --git a/make/autoconf/build-performance.m4 b/make/autoconf/build-performance.m4
+index 7892b4001a..b428f2721b 100644
+--- a/make/autoconf/build-performance.m4
++++ b/make/autoconf/build-performance.m4
+@@ -32,6 +32,10 @@ AC_DEFUN([BPERF_CHECK_CORES],
+   if test -f /proc/cpuinfo; then
+     # Looks like a Linux (or cygwin) system
+     NUM_CORES=`cat /proc/cpuinfo  | grep -c processor`
++    if test "$NUM_CORES" -eq "0"; then
++      # ZHJ20170103 for SW64
++      NUM_CORES=`cat /proc/cpuinfo  | grep "cpus active" | awk '{ print [$]4 }'`
++    fi
+     FOUND_CORES=yes
+   elif test -x /usr/sbin/psrinfo; then
+     # Looks like a Solaris system
+diff --git a/make/autoconf/flags-cflags.m4 b/make/autoconf/flags-cflags.m4
+index 8ead0e22d6..be7586b76d 100644
+--- a/make/autoconf/flags-cflags.m4
++++ b/make/autoconf/flags-cflags.m4
+@@ -289,9 +289,9 @@ AC_DEFUN([FLAGS_SETUP_OPTIMIZATION],
+       C_O_FLAG_NORM="-xO2 -Wc,-Qrm-s -Wc,-Qiselect-T0"
+     fi
+   elif test "x$TOOLCHAIN_TYPE" = xgcc; then
+-    C_O_FLAG_HIGHEST_JVM="-O3"
+-    C_O_FLAG_HIGHEST="-O3"
+-    C_O_FLAG_HI="-O3"
++    C_O_FLAG_HIGHEST_JVM="-O2"
++    C_O_FLAG_HIGHEST="-O2"
++    C_O_FLAG_HI="-O2"
+     C_O_FLAG_NORM="-O2"
+     C_O_FLAG_SIZE="-Os"
+     C_O_FLAG_DEBUG="-O0"
+diff --git a/make/autoconf/hotspot.m4 b/make/autoconf/hotspot.m4
+index 2c52fd98c6..0289e6072e 100644
+--- a/make/autoconf/hotspot.m4
++++ b/make/autoconf/hotspot.m4
+@@ -428,6 +428,7 @@ AC_DEFUN_ONCE([HOTSPOT_SETUP_JVM_FEATURES],
+     # Only enable jvmci on x86_64, sparcv9 and aarch64
+     if test "x$OPENJDK_TARGET_CPU" = "xx86_64" || \
+        test "x$OPENJDK_TARGET_CPU" = "xsparcv9" || \
++       test "x$OPENJDK_TARGET_CPU" = "xsw64" || \
+        test "x$OPENJDK_TARGET_CPU" = "xaarch64" ; then
+       AC_MSG_RESULT([yes])
+       JVM_FEATURES_jvmci="jvmci"
+diff --git a/make/autoconf/platform.m4 b/make/autoconf/platform.m4
+index 5d1d9efa39..fb732e25ea 100644
+--- a/make/autoconf/platform.m4
++++ b/make/autoconf/platform.m4
+@@ -162,6 +162,12 @@ AC_DEFUN([PLATFORM_EXTRACT_VARS_FROM_CPU],
+       VAR_CPU_BITS=64
+       VAR_CPU_ENDIAN=big
+       ;;
++    sw_64)
++      VAR_CPU=sw64
++      VAR_CPU_ARCH=sw64
++      VAR_CPU_BITS=64
++      VAR_CPU_ENDIAN=little
++      ;;
+     *)
+       AC_MSG_ERROR([unsupported cpu $1])
+       ;;
+@@ -534,6 +540,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+     HOTSPOT_$1_CPU=ppc_64
+   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
+     HOTSPOT_$1_CPU=ppc_64
++  elif test "x$OPENJDK_$1_CPU" = xsw64; then
++    HOTSPOT_$1_CPU=sw_64
+   fi
+   AC_SUBST(HOTSPOT_$1_CPU)
+ 
+@@ -554,6 +562,8 @@ AC_DEFUN([PLATFORM_SETUP_LEGACY_VARS_HELPER],
+     HOTSPOT_$1_CPU_DEFINE=PPC64
+   elif test "x$OPENJDK_$1_CPU" = xppc64le; then
+     HOTSPOT_$1_CPU_DEFINE=PPC64
++  elif test "x$OPENJDK_$1_CPU" = xsw64; then
++    HOTSPOT_$1_CPU_DEFINE=SW64
+ 
+   # The cpu defines below are for zero, we don't support them directly.
+   elif test "x$OPENJDK_$1_CPU" = xsparc; then
+diff --git a/make/conf/jib-profiles.js b/make/conf/jib-profiles.js
+index 3b7194e425..f4c18d9dc0 100644
+--- a/make/conf/jib-profiles.js
++++ b/make/conf/jib-profiles.js
+@@ -233,7 +233,7 @@ var getJibProfilesCommon = function (input, data) {
+     common.main_profile_names = [
+         "linux-x64", "linux-x86", "macosx-x64", "solaris-x64",
+         "solaris-sparcv9", "windows-x64", "windows-x86", "windows-aarch64",
+-        "linux-aarch64", "linux-arm32", "linux-arm64", "linux-arm-vfp-hflt",
++        "linux-aarch64", "linux-sw64", "linux-arm32", "linux-arm64", "linux-arm-vfp-hflt",
+         "linux-arm-vfp-hflt-dyn"
+     ];
+ 
+@@ -469,6 +469,17 @@ var getJibProfilesProfiles = function (input, common, data) {
+             ],
+         },
+ 
++        "linux-sw64": {
++            target_os: "linux",
++            target_cpu: "sw64",
++            build_cpu: "sw64",
++            dependencies: ["devkit", "build_devkit", "cups"],
++            configure_args: [
++                "--openjdk-target=sw_64-linux-gnu", "--with-freetype=bundled",
++                "--disable-warnings-as-errors", "--with-cpu-port=sw64",
++            ],
++        },
++
+         "linux-arm64": {
+             target_os: "linux",
+             target_cpu: "aarch64",
+diff --git a/make/devkit/Tools.gmk b/make/devkit/Tools.gmk
+index 6a716fe37e..371c627934 100644
+--- a/make/devkit/Tools.gmk
++++ b/make/devkit/Tools.gmk
+@@ -297,6 +297,10 @@ PATHEXT = $(PREFIX)/bin:
+ 
+ PATHPRE = PATH=$(PATHEXT)$(PATH)
+ NUM_CORES := $(shell cat /proc/cpuinfo | grep -c processor)
++# ZHJ20170103 for SW64
++if test "$NUM_CORES" -eq "0"; then
++  NUM_CORES=`cat /proc/cpuinfo  | grep "cpus active" | awk  '{print $4}'`
++fi
+ BUILDPAR = -j$(NUM_CORES)
+ 
+ # Default commands to when making
+diff --git a/make/gensrc/Gensrc-jdk.internal.vm.compiler.gmk b/make/gensrc/Gensrc-jdk.internal.vm.compiler.gmk
+index 4f9566a1cc..580b68b329 100644
+--- a/make/gensrc/Gensrc-jdk.internal.vm.compiler.gmk
++++ b/make/gensrc/Gensrc-jdk.internal.vm.compiler.gmk
+@@ -39,12 +39,14 @@ PROC_SRC_SUBDIRS := \
+     org.graalvm.compiler.code \
+     org.graalvm.compiler.core \
+     org.graalvm.compiler.core.aarch64 \
++    org.graalvm.compiler.core.sw64 \
+     org.graalvm.compiler.core.amd64 \
+     org.graalvm.compiler.core.common \
+     org.graalvm.compiler.core.sparc \
+     org.graalvm.compiler.debug \
+     org.graalvm.compiler.hotspot \
+     org.graalvm.compiler.hotspot.aarch64 \
++    org.graalvm.compiler.hotspot.sw64 \
+     org.graalvm.compiler.hotspot.amd64 \
+     org.graalvm.compiler.hotspot.sparc \
+     org.graalvm.compiler.graph \
+@@ -56,6 +58,7 @@ PROC_SRC_SUBDIRS := \
+     org.graalvm.compiler.nodes \
+     org.graalvm.compiler.replacements \
+     org.graalvm.compiler.replacements.aarch64 \
++    org.graalvm.compiler.replacements.sw64 \
+     org.graalvm.compiler.replacements.amd64 \
+     org.graalvm.compiler.phases \
+     org.graalvm.compiler.phases.common \
+@@ -82,6 +85,7 @@ PROCESSOR_PATH := $(call PathList, $(PROCESSOR_JARS))
+ ADD_EXPORTS := \
+     --add-modules jdk.internal.vm.ci \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.sw64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.code.site=jdk.internal.vm.compiler \
+@@ -89,6 +93,7 @@ ADD_EXPORTS := \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.common=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=jdk.internal.vm.compiler \
++    --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sw64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.events=jdk.internal.vm.compiler \
+     --add-exports jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=jdk.internal.vm.compiler \
+diff --git a/make/hotspot/ide/CreateVSProject.gmk b/make/hotspot/ide/CreateVSProject.gmk
+index 2c6507d363..554c4fadc1 100644
+--- a/make/hotspot/ide/CreateVSProject.gmk
++++ b/make/hotspot/ide/CreateVSProject.gmk
+@@ -68,6 +68,7 @@ ifeq ($(call isTargetOs, windows), true)
+ 
+   IGNORED_PLATFORMS_ARGS := \
+     -ignorePath aarch64 \
++    -ignorePath sw64 \
+     -ignorePath aix \
+     -ignorePath arm \
+     -ignorePath bsd \
+diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
+index 7b86f83cc9..9c159de4da 100644
+--- a/make/hotspot/lib/JvmFeatures.gmk
++++ b/make/hotspot/lib/JvmFeatures.gmk
+@@ -129,7 +129,7 @@ endif
+ ifneq ($(call check-jvm-feature, aot), true)
+   JVM_CFLAGS_FEATURES += -DINCLUDE_AOT=0
+   JVM_EXCLUDE_FILES += \
+-      compiledIC_aot_x86_64.cpp compiledIC_aot_aarch64.cpp      \
++      compiledIC_aot_x86_64.cpp compiledIC_aot_aarch64.cpp compiledIC_aot_sw64.cpp \
+       compilerRuntime.cpp aotCodeHeap.cpp aotCompiledMethod.cpp \
+       aotLoader.cpp compiledIC_aot.cpp
+ endif
+diff --git a/make/launcher/Launcher-jdk.aot.gmk b/make/launcher/Launcher-jdk.aot.gmk
+index 10717a5e1c..8ddcf274e5 100644
+--- a/make/launcher/Launcher-jdk.aot.gmk
++++ b/make/launcher/Launcher-jdk.aot.gmk
+@@ -32,6 +32,7 @@ $(eval $(call SetupBuildLauncher, jaotc, \
+     MAIN_CLASS := jdk.tools.jaotc.Main, \
+     EXTRA_JAVA_ARGS := -XX:+UnlockExperimentalVMOptions -XX:+EnableJVMCI \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.aarch64=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
++        --add-exports=jdk.internal.vm.ci/jdk.vm.ci.sw64=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.amd64=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.code=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.code.site=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+@@ -39,9 +40,10 @@ $(eval $(call SetupBuildLauncher, jaotc, \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.common=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.hotspot=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+     , \
+-    JAVA_ARGS := --add-exports=jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
++    JAVA_ARGS := \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.hotspot.amd64=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.hotspot.aarch64=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
++        --add-exports=jdk.internal.vm.ci/jdk.vm.ci.hotspot.sw64=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.hotspot.sparc=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.meta=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+         --add-exports=jdk.internal.vm.ci/jdk.vm.ci.runtime=$(call CommaList, jdk.internal.vm.compiler  jdk.aot) \
+diff --git a/make/test/JtregGraalUnit.gmk b/make/test/JtregGraalUnit.gmk
+index 1c1ce33ae4..0ee77936c0 100644
+--- a/make/test/JtregGraalUnit.gmk
++++ b/make/test/JtregGraalUnit.gmk
+@@ -62,6 +62,7 @@ ifeq ($(INCLUDE_GRAAL), true)
+             $(SRC_DIR)/org.graalvm.compiler.api.directives.test/src \
+             $(SRC_DIR)/org.graalvm.compiler.api.test/src \
+             $(SRC_DIR)/org.graalvm.compiler.asm.aarch64.test/src \
++            $(SRC_DIR)/org.graalvm.compiler.asm.sw64.test/src \
+             $(SRC_DIR)/org.graalvm.compiler.asm.amd64.test/src \
+             $(SRC_DIR)/org.graalvm.compiler.asm.sparc.test/src \
+             $(SRC_DIR)/org.graalvm.compiler.asm.test/src \
+diff --git a/native_configure b/native_configure
+new file mode 100755
+index 0000000000..b624c71993
+--- /dev/null
++++ b/native_configure
+@@ -0,0 +1,28 @@
++#!/bin/bash
++level=${1?usage: $0 release/slowdebug}
++  # --with-jvm-variants     JVM variants (separated by commas) to build
++  #                         (server,client,minimal,core,zero,custom) [server]
++  # --with-jvm-features    "aot cds cmsgc compiler1 compiler2 epsilongc g1gc graal jfr \
++  #                         jni-check jvmci jvmti management nmt parallelgc serialgc services vm-structs zgc"
++  # --enable-aot=no \
++  # --enable-cds=no \
++  # --disable-ccache \
++  builddate=`date +%Y-%m-%d`
++  buildtag=sw1.3.0
++  bash configure  \
++    --with-freetype=bundled \
++    --with-zlib=bundled \
++    --with-native-debug-symbols=internal \
++    --with-debug-level=$level \
++    --with-jvm-variants=custom \
++    --with-jvm-features=serialgc,vm-structs,parallelgc,compiler2,management,nmt,g1gc,cmsgc,jvmti,services,jni-check,jfr \
++    --with-version-date=$builddate \
++    --with-version-opt=$buildtag \
++    --with-version-pre=no \
++    --enable-sjavac=no \
++    --disable-javac-server \
++    --disable-warnings-as-errors \
++    --with-extra-cflags="  -mieee -Wno-error=maybe-uninitialized -Wno-error=deprecated-declarations -Wno-error=type-limits -Wno-error=format-security -Wno-error=conversion-null -Wno-error=sign-compare -Wno-error=int-to-pointer-cast -mgprel-size=32" \
++    --with-extra-cxxflags="-mieee -Wno-error=maybe-uninitialized -Wno-error=deprecated-declarations -Wno-error=type-limits -Wno-error=format-security -Wno-error=conversion-null -Wno-error=sign-compare -Wno-error=int-to-pointer-cast -mgprel-size=32" \
++    --with-extra-ldflags=" -mieee -Wl,-no-relax"
++
+diff --git a/src/hotspot/cpu/sw64/abstractInterpreter_sw64.cpp b/src/hotspot/cpu/sw64/abstractInterpreter_sw64.cpp
+new file mode 100644
+index 0000000000..1076afbb7e
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/abstractInterpreter_sw64.cpp
+@@ -0,0 +1,161 @@
++/*
++ * Copyright (c) 2003, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/constMethod.hpp"
++#include "oops/method.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/align.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/macros.hpp"
++
++
++// asm based interpreter deoptimization helpers
++int AbstractInterpreter::size_activation(int max_stack,
++                                         int temps,
++                                         int extra_args,
++                                         int monitors,
++                                         int callee_params,
++                                         int callee_locals,
++                                         bool is_top_frame) {
++  // Note: This calculation must exactly parallel the frame setup
++  // in TemplateInterpreterGenerator::generate_fixed_frame.
++
++  // fixed size of an interpreter frame:
++  int overhead = frame::sender_sp_offset -
++                 frame::interpreter_frame_initial_sp_offset;
++  // Our locals were accounted for by the caller (or last_frame_adjust
++  // on the transistion) Since the callee parameters already account
++  // for the callee's params we only need to account for the extra
++  // locals.
++  int size = overhead +
++         (callee_locals - callee_params)*Interpreter::stackElementWords +
++         monitors * frame::interpreter_frame_monitor_size() +
++         temps* Interpreter::stackElementWords + extra_args;
++
++  return size;
++}
++
++void AbstractInterpreter::layout_activation(Method* method,
++                                           int tempcount,
++                                           int popframe_extra_args,
++                                           int moncount,
++                                           int caller_actual_parameters,
++                                           int callee_param_count,
++                                           int callee_locals,
++                                           frame* caller,
++                                           frame* interpreter_frame,
++                                           bool is_top_frame,
++                                           bool is_bottom_frame) {
++  // The frame interpreter_frame is guaranteed to be the right size,
++  // as determined by a previous call to the size_activation() method.
++  // It is also guaranteed to be walkable even though it is in a
++  // skeletal state
++
++  int max_locals = method->max_locals() * Interpreter::stackElementWords;
++  int extra_locals = (method->max_locals() - method->size_of_parameters()) *
++    Interpreter::stackElementWords;
++
++#ifdef ASSERT
++  assert(caller->sp() == interpreter_frame->sender_sp(), "Frame not properly walkable");
++#endif
++
++  interpreter_frame->interpreter_frame_set_method(method);
++  // NOTE the difference in using sender_sp and 
++  // interpreter_frame_sender_sp interpreter_frame_sender_sp is
++  // the original sp of the caller (the unextended_sp) and
++  // sender_sp is fp+8/16 (32bit/64bit) XXX
++  intptr_t* locals = interpreter_frame->sender_sp() + max_locals - 1;
++
++#ifdef ASSERT
++  if (caller->is_interpreted_frame()) {
++    assert(locals < caller->fp() + frame::interpreter_frame_initial_sp_offset, "bad placement");
++  }
++#endif
++
++  interpreter_frame->interpreter_frame_set_locals(locals);
++  BasicObjectLock* montop = interpreter_frame->interpreter_frame_monitor_begin();
++  BasicObjectLock* monbot = montop - moncount;
++  interpreter_frame->interpreter_frame_set_monitor_end(monbot);
++
++  // Set last_sp
++  intptr_t*  esp = (intptr_t*) monbot -
++    tempcount*Interpreter::stackElementWords -
++                    popframe_extra_args;
++  interpreter_frame->interpreter_frame_set_last_sp(esp);
++
++  // All frames but the initial (oldest) interpreter frame we fill in have
++  // a value for sender_sp that allows walking the stack but isn't
++  // truly correct. Correct the value here.
++  if (extra_locals != 0 &&
++      interpreter_frame->sender_sp() ==
++      interpreter_frame->interpreter_frame_sender_sp()) {
++    interpreter_frame->set_interpreter_frame_sender_sp(caller->sp() +
++                                                       extra_locals);
++  }
++  *interpreter_frame->interpreter_frame_cache_addr() =
++    method->constants()->cache();
++  *interpreter_frame->interpreter_frame_mirror_addr() =
++    method->method_holder()->java_mirror();
++}
++
++int AbstractInterpreter::BasicType_as_index(BasicType type) {
++  int i = 0;
++  switch (type) {
++    case T_BOOLEAN: i = 0; break;
++    case T_CHAR   : i = 1; break;
++    case T_BYTE   : i = 2; break;
++    case T_SHORT  : i = 3; break;
++    case T_INT    : i = 4; break;
++    case T_LONG   : i = 5; break;
++    case T_VOID   : i = 6; break;
++    case T_FLOAT  : i = 7; break;
++    case T_DOUBLE : i = 8; break;
++    case T_OBJECT : i = 9; break;
++    case T_ARRAY  : i = 9; break;
++    default       : ShouldNotReachHere();
++  }
++  assert(0 <= i && i < AbstractInterpreter::number_of_result_handlers,
++         "index out of bounds");
++  return i;
++}
++
++// How much stack a method activation needs in words.
++int AbstractInterpreter::size_top_interpreter_activation(Method* method) {
++  const int entry_size = frame::interpreter_frame_monitor_size();
++
++  // total overhead size: entry_size + (saved rbp thru expr stack
++  // bottom).  be sure to change this if you add/subtract anything
++  // to/from the overhead area
++  const int overhead_size =
++    -(frame::interpreter_frame_initial_sp_offset) + entry_size;
++
++  const int stub_code = frame::entry_frame_after_call_words;
++
++  const int method_stack = (method->max_locals() + method->max_stack()) *
++                           Interpreter::stackElementWords;
++  return (overhead_size + method_stack + stub_code);
++}
+diff --git a/src/hotspot/cpu/sw64/ad_encode.m4 b/src/hotspot/cpu/sw64/ad_encode.m4
+new file mode 100644
+index 0000000000..cd68f185f2
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/ad_encode.m4
+@@ -0,0 +1,98 @@
++dnl Copyright (c) 2014, Red Hat Inc. All rights reserved.
++dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++dnl
++dnl This code is free software; you can redistribute it and/or modify it
++dnl under the terms of the GNU General Public License version 2 only, as
++dnl published by the Free Software Foundation.
++dnl
++dnl This code is distributed in the hope that it will be useful, but WITHOUT
++dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++dnl version 2 for more details (a copy is included in the LICENSE file that
++dnl accompanied this code).
++dnl
++dnl You should have received a copy of the GNU General Public License version
++dnl 2 along with this work; if not, write to the Free Software Foundation,
++dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++dnl
++dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++dnl or visit www.oracle.com if you need additional information or have any
++dnl questions.
++dnl
++dnl 
++dnl Process this file with m4 ad_encode.m4 to generate the load/store
++dnl patterns used in sw64.ad.
++dnl
++define(choose, `loadStore($1, &MacroAssembler::$3, $2, $4,
++               $5, $6, $7, $8);dnl
++
++  %}')dnl
++define(access, `
++    $3Register $1_reg = as_$3Register($$1$$reg);
++    $4choose(MacroAssembler(&cbuf), $1_reg,$2,$mem->opcode(),
++        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl
++define(load,`
++  enc_class sw64_enc_$2($1 dst, memory mem) %{dnl
++access(dst,$2,$3)')dnl
++load(iRegI,ldrsbw)
++load(iRegI,ldrsb)
++load(iRegI,ldrb)
++load(iRegL,ldrb)
++load(iRegI,ldrshw)
++load(iRegI,ldrsh)
++load(iRegI,ldrh)
++load(iRegL,ldrh)
++load(iRegI,ldrw)
++load(iRegL,ldrw)
++load(iRegL,ldrsw)
++load(iRegL,ldr)
++load(vRegF,ldrs,Float)
++load(vRegD,ldrd,Float)
++define(STORE,`
++  enc_class sw64_enc_$2($1 src, memory mem) %{dnl
++access(src,$2,$3,$4)')dnl
++define(STORE0,`
++  enc_class sw64_enc_$2`'0(memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    choose(_masm,zr,$2,$mem->opcode(),
++        as_$3Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)')dnl
++STORE(iRegI,strb)
++STORE0(iRegI,strb)
++STORE(iRegI,strh)
++STORE0(iRegI,strh)
++STORE(iRegI,strw)
++STORE0(iRegI,strw)
++STORE(iRegL,str,,
++`// we sometimes get asked to store the stack pointer into the
++    // current thread -- we cannot do that directly on Sw64
++    if (src_reg == r31_sp) {
++      MacroAssembler _masm(&cbuf);
++      assert(as_Register($mem$$base) == rthread, "unexpected store for sp");
++      __ mov(rscratch2, sp);
++      src_reg = rscratch2;
++    }
++    ')
++STORE0(iRegL,str)
++STORE(vRegF,strs,Float)
++STORE(vRegD,strd,Float)
++
++  enc_class sw64_enc_strw_immn(immN src, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    address con = (address)$src$$constant;
++    // need to do this the hard way until we can manage relocs
++    // for 32 bit constants
++    __ movoop(rscratch2, (jobject)con);
++    if (con) __ encode_heap_oop_not_null(rscratch2);
++    choose(_masm,rscratch2,strw,$mem->opcode(),
++        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)
++
++  enc_class sw64_enc_strw_immnk(immN src, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    address con = (address)$src$$constant;
++    // need to do this the hard way until we can manage relocs
++    // for 32 bit constants
++    __ movoop(rscratch2, (jobject)con);
++    __ encode_klass_not_null(rscratch2);
++    choose(_masm,rscratch2,strw,$mem->opcode(),
++        as_Register($mem$$base),$mem$$index,$mem$$scale,$mem$$disp)
++
+diff --git a/src/hotspot/cpu/sw64/assembler_sw64.cpp b/src/hotspot/cpu/sw64/assembler_sw64.cpp
+new file mode 100644
+index 0000000000..e748c76f48
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/assembler_sw64.cpp
+@@ -0,0 +1,220 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include <stdio.h>
++#include <sys/types.h>
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "interpreter/interpreter.hpp"
++
++#ifndef PRODUCT
++const unsigned long Assembler::asm_bp = 0x00007fffee09ac88;
++#endif
++
++#include "compiler/disassembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "assembler_sw64.hpp"
++//#include "immediate_sw64.hpp"
++
++extern "C" void entry(CodeBuffer *cb);
++
++#define __ _masm.
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#endif
++
++#define BIND(label) bind(label); __ BLOCK_COMMENT(#label ":")
++
++void entry(CodeBuffer *cb) {
++
++  // {
++  //   for (int i = 0; i < 256; i+=16)
++  //     {
++  //    printf("\"%20.20g\", ", unpack(i));
++  //    printf("\"%20.20g\", ", unpack(i+1));
++  //     }
++  //   printf("\n");
++  // }
++
++  Assembler _masm(cb);
++  address entry = __ pc();
++
++  // Smoke test for assembler
++
++}
++
++#undef __
++
++#define ADDRESSEMIT(RegType, is_xx)\
++void Address::emit(RegType ra, Assembler* as, int opcode) {\
++  if (_mode == base_index_scale_disp) {\
++    guarantee(_tmp != noreg, "we need a tmp reg here");\
++    if (_scale == times_8) {\
++      as->s8addl(_index, _base, _tmp);                     \
++    } else if (_scale == times_4) {\
++      as->s4addl(_index, _base, _tmp);                     \
++    } else if (_scale == times_2) {\
++      if (_tmp != _index) {\
++        as->addl(_base, _index, _tmp); \
++        as->addl(_tmp,  _index, _tmp); \
++      } else {\
++        as->addl(_index, _index, _index); \
++        as->addl(_base,  _index, _index); \
++      }\
++    } else {\
++      as->addl(_base, _index, _tmp);\
++    }\
++    as->emit_sw2_long(opcode| as->is_xx(ra) | as->is_mdisp(_disp) | as->is_rb(_tmp));\
++  } else if (_mode == base_plus_disp) {\
++    as->emit_sw2_long(opcode| as->is_xx(ra) | as->is_mdisp(_disp) | as->is_rb(_base));\
++  } else {\
++    ShouldNotReachHere();\
++  }\
++}
++ADDRESSEMIT(Register, is_ra)
++ADDRESSEMIT(FloatRegister, is_fa)
++#undef ADDRESSEMIT
++
++// Convert the raw encoding form into the form expected by the constructor for
++// Address.  An index of 30 (rsp) corresponds to having no index, so convert
++// that to noreg for the Address constructor.
++Address Address::make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc) {
++  RelocationHolder rspec;
++  if (disp_reloc != relocInfo::none) {
++    rspec = Relocation::spec_simple(disp_reloc);
++  }
++  bool valid_index = index != sp->encoding();
++  if (valid_index) {
++        Address madr(as_Register(base), as_Register(index), (Address::ScaleFactor)scale, in_ByteSize(disp));
++        madr._mode = base_index_scale_disp;
++        madr._rspec = rspec;
++        return madr;
++  } else {
++        Address madr(as_Register(base), in_ByteSize(disp));
++        madr._mode = base_plus_disp;
++        madr._rspec = rspec;
++        return madr;
++  }
++}
++
++int AbstractAssembler::code_fill_byte() {
++  return 0x00;
++}
++
++// n.b. this is implemented in subclass MacroAssembler
++void Assembler::bang_stack_with_offset(int offset) { Unimplemented(); }
++
++
++// and now the routines called by the assembler which encapsulate the
++// above encode and decode functions
++
++//uint32_t
++//asm_util::encode_logical_immediate(bool is32, uint64_t imm)
++//{
++//  ShouldNotReachHere();
++//  return encoding_for_logical_immediate(imm);
++//}
++
++//unsigned Assembler::pack(double value) {
++//  ShouldNotReachHere();
++//  float val = (float)value;
++//  unsigned result = encoding_for_fp_immediate(val);
++//  guarantee(unpack(result) == value,
++//            "Invalid floating-point immediate operand");
++//  return result;
++//}
++
++// Packed operands for  Floating-point Move (immediate)
++
++//static float unpack(unsigned value) {
++//  ShouldNotReachHere();
++//  return 0;
++//}
++
++AddressLiteral::AddressLiteral(address target, relocInfo::relocType rtype) {
++  _is_lval = false;
++  _target = target;
++  switch (rtype) {
++  case relocInfo::oop_type:
++  case relocInfo::metadata_type:
++    // Oops are a special case. Normally they would be their own section
++    // but in cases like icBuffer they are literals in the code stream that
++    // we don't have a section for. We use none so that we get a literal address
++    // which is always patchable.
++    break;
++  case relocInfo::external_word_type:
++    _rspec = external_word_Relocation::spec(target);
++    break;
++  case relocInfo::internal_word_type:
++    _rspec = internal_word_Relocation::spec(target);
++    break;
++  case relocInfo::opt_virtual_call_type:
++    _rspec = opt_virtual_call_Relocation::spec();
++    break;
++  case relocInfo::static_call_type:
++    _rspec = static_call_Relocation::spec();
++    break;
++  case relocInfo::runtime_call_type:
++    _rspec = runtime_call_Relocation::spec();
++    break;
++  case relocInfo::poll_type:
++  case relocInfo::poll_return_type:
++    _rspec = Relocation::spec_simple(rtype);
++    break;
++  case relocInfo::none:
++    break;
++  default:
++    ShouldNotReachHere();
++    break;
++  }
++}
++
++#ifdef ASSERT
++void Assembler::check_relocation(RelocationHolder const& rspec, int format) {
++  address inst = inst_mark();
++  assert(inst != NULL && inst <= pc(), "must point to beginning of instruction");
++//  address opnd;
++
++  Relocation* r = rspec.reloc();
++  if (r->type() == relocInfo::none) {
++    return;
++  } else if (r->is_call() || format == call32_operand) {
++    // assert(format == imm32_operand, "cannot specify a nonzero format");
++//    opnd = locate_operand(inst, call32_operand);// yj todo
++  } else if (r->is_data()) {
++//    assert(format == imm_operand || format == disp32_operand
++//           LP64_ONLY(|| format == narrow_oop_operand), "format ok");
++//    opnd = locate_operand(inst, (WhichOperand)format);// yj todo
++  } else {
++//    assert(format == imm_operand, "cannot specify a format");
++    return;
++  }
++//  assert(opnd == pc(), "must put operand where relocs can find it");
++}
++#endif // ASSERT
+\ No newline at end of file
+diff --git a/src/hotspot/cpu/sw64/assembler_sw64.hpp b/src/hotspot/cpu/sw64/assembler_sw64.hpp
+new file mode 100644
+index 0000000000..8ca7c18414
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/assembler_sw64.hpp
+@@ -0,0 +1,2005 @@
++/*
++ * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_ASSEMBLER_SW64_HPP
++#define CPU_SW64_VM_ASSEMBLER_SW64_HPP
++
++#include "asm/register.hpp"
++#include "runtime/vm_version.hpp"
++
++// Define some macros to help SW64 Instructions' implementation.
++#define OP(x)           (((x) & 0x3F) << 26)
++#define PCD(oo)         (OP(oo))
++#define OPMEM(oo)       (OP(oo))
++#define BRA(oo)         (OP(oo))
++#define OFP(oo,ff)      (OP(oo) | (((ff) & 0xFF) << 5))
++#define FMA(oo,ff)      (OP(oo) | (((ff) & 0x3F) << 10))
++#define MFC(oo,ff)      (OP(oo) | ((ff) & 0xFFFF))
++#define OPR(oo,ff)      (OP(oo) | (((ff) & 0xFF) << 5))
++#define OPRL(oo,ff)     (OP(oo) | (((ff) & 0xFF) << 5))
++#define TOPR(oo,ff)     (OP(oo) | (((ff) & 0x07) << 10))
++#define TOPRL(oo,ff)    (OP(oo) | (((ff) & 0x07) << 10))
++
++#define ATMEM(oo,h)     (OP(oo) | (((h) & 0xF) << 12))
++#define PRIRET(oo,h)    (OP(oo) | (((h) & 0x1) << 20))
++#define EV6HWMEM(oo,ff) (OP(oo) | (((ff) & 0xF) << 12))
++#define CSR(oo,ff)      (OP(oo) | (((ff) & 0xFF) << 8))
++
++#define LOGX(oo,ff)     (OP(oo) | (((ff) & 0x3F) << 10))
++#define PSE_LOGX(oo,ff) (OP(oo) | (((ff) & 0x3F) << 10) | (((ff) >> 0x6) << 26 ) | 0x3E0 )
++
++REGISTER_DECLARATION(Register, V0,  i0);
++REGISTER_DECLARATION(Register, T0,  i1);
++REGISTER_DECLARATION(Register, T1,  i2);
++REGISTER_DECLARATION(Register, T2,  i3);
++REGISTER_DECLARATION(Register, T3,  i4);
++REGISTER_DECLARATION(Register, T4,  i5);
++REGISTER_DECLARATION(Register, T5,  i6);
++REGISTER_DECLARATION(Register, T6,  i7);
++REGISTER_DECLARATION(Register, T7,  i8);
++REGISTER_DECLARATION(Register, S0,  i9);
++REGISTER_DECLARATION(Register, S1,  i10);
++REGISTER_DECLARATION(Register, S2,  i11);
++REGISTER_DECLARATION(Register, S3,  i12);
++REGISTER_DECLARATION(Register, S4,  i13);
++REGISTER_DECLARATION(Register, S5,  i14);
++REGISTER_DECLARATION(Register, FP,  i15);
++REGISTER_DECLARATION(Register, A0,  i16);
++REGISTER_DECLARATION(Register, A1,  i17);
++REGISTER_DECLARATION(Register, A2,  i18);
++REGISTER_DECLARATION(Register, A3,  i19);
++REGISTER_DECLARATION(Register, A4,  i20);
++REGISTER_DECLARATION(Register, A5,  i21);
++REGISTER_DECLARATION(Register, T8,  i22);
++REGISTER_DECLARATION(Register, T9,  i23);
++REGISTER_DECLARATION(Register, T10, i24);
++REGISTER_DECLARATION(Register, T11, i25);
++REGISTER_DECLARATION(Register, RA,  i26);
++REGISTER_DECLARATION(Register, T12, i27);
++REGISTER_DECLARATION(Register, AT,  i28);
++REGISTER_DECLARATION(Register, GP,  i29);
++REGISTER_DECLARATION(Register, SP,  i30);
++REGISTER_DECLARATION(Register, R0,  i31);
++
++REGISTER_DECLARATION(FloatRegister, F0, f0);
++REGISTER_DECLARATION(FloatRegister, F1, f1);
++REGISTER_DECLARATION(FloatRegister, F2, f2);
++REGISTER_DECLARATION(FloatRegister, F3, f3);
++REGISTER_DECLARATION(FloatRegister, F4, f4);
++REGISTER_DECLARATION(FloatRegister, F5, f5);
++REGISTER_DECLARATION(FloatRegister, F6, f6);
++REGISTER_DECLARATION(FloatRegister, F7, f7);
++REGISTER_DECLARATION(FloatRegister, F8, f8);
++REGISTER_DECLARATION(FloatRegister, F9, f9);
++REGISTER_DECLARATION(FloatRegister, F10, f10);
++REGISTER_DECLARATION(FloatRegister, F11, f11);
++REGISTER_DECLARATION(FloatRegister, F12, f12);
++REGISTER_DECLARATION(FloatRegister, F13, f13);
++REGISTER_DECLARATION(FloatRegister, F14, f14);
++REGISTER_DECLARATION(FloatRegister, F15, f15);
++REGISTER_DECLARATION(FloatRegister, F16, f16);
++REGISTER_DECLARATION(FloatRegister, F17, f17);
++REGISTER_DECLARATION(FloatRegister, F18, f18);
++REGISTER_DECLARATION(FloatRegister, F19, f19);
++REGISTER_DECLARATION(FloatRegister, F20, f20);
++REGISTER_DECLARATION(FloatRegister, F21, f21);
++REGISTER_DECLARATION(FloatRegister, F22, f22);
++REGISTER_DECLARATION(FloatRegister, F23, f23);
++REGISTER_DECLARATION(FloatRegister, F24, f24);
++REGISTER_DECLARATION(FloatRegister, F25, f25);
++REGISTER_DECLARATION(FloatRegister, F26, f26);
++REGISTER_DECLARATION(FloatRegister, F27, f27);
++REGISTER_DECLARATION(FloatRegister, F28, f28);
++REGISTER_DECLARATION(FloatRegister, F29, f29);
++REGISTER_DECLARATION(FloatRegister, F30, f30);
++REGISTER_DECLARATION(FloatRegister, F31, f31);
++
++////REGISTER_DECLARATION(Register, c_rarg0, i0);
++REGISTER_DECLARATION(Register, c_rarg0, A0);
++REGISTER_DECLARATION(Register, c_rarg1, A1);
++REGISTER_DECLARATION(Register, c_rarg2, A2);
++REGISTER_DECLARATION(Register, c_rarg3, A3);
++REGISTER_DECLARATION(Register, c_rarg4, A4);
++REGISTER_DECLARATION(Register, c_rarg5, A5);
++
++REGISTER_DECLARATION(FloatRegister, c_farg0, F16);
++REGISTER_DECLARATION(FloatRegister, c_farg1, F17);
++REGISTER_DECLARATION(FloatRegister, c_farg2, F18);
++REGISTER_DECLARATION(FloatRegister, c_farg3, F19);
++REGISTER_DECLARATION(FloatRegister, c_farg4, F20);
++REGISTER_DECLARATION(FloatRegister, c_farg5, F21);
++
++// Symbolically name the register arguments used by the Java calling convention.
++// We have control over the convention for java so we can do what we please.
++// What pleases us is to offset the java calling convention so that when
++// we call a suitable jni method the arguments are lined up and we don't
++// have to do much shuffling. A suitable jni method is non-static and a
++// small number of arguments
++//
++//  |--------------------------------------------------------------------|
++//  | c_rarg0  c_rarg1  c_rarg2 c_rarg3 c_rarg4 c_rarg5 c_rarg6 c_rarg7  |
++//  |--------------------------------------------------------------------|
++//  | r0       r1       r2      r3      r4      r5      r6      r7       |
++//  |--------------------------------------------------------------------|
++//  | j_rarg7  j_rarg0  j_rarg1 j_rarg2 j_rarg3 j_rarg4 j_rarg5 j_rarg6  |
++//  |--------------------------------------------------------------------|
++
++
++REGISTER_DECLARATION(Register, j_rarg0, c_rarg1);
++REGISTER_DECLARATION(Register, j_rarg1, c_rarg2);
++REGISTER_DECLARATION(Register, j_rarg2, c_rarg3);
++REGISTER_DECLARATION(Register, j_rarg3, c_rarg4);
++REGISTER_DECLARATION(Register, j_rarg4, c_rarg5);
++REGISTER_DECLARATION(Register, j_rarg5, c_rarg0);
++
++// Java floating args are passed as per C
++
++REGISTER_DECLARATION(FloatRegister, j_farg0, F16);
++REGISTER_DECLARATION(FloatRegister, j_farg1, F17);
++REGISTER_DECLARATION(FloatRegister, j_farg2, F18);
++REGISTER_DECLARATION(FloatRegister, j_farg3, F19);
++REGISTER_DECLARATION(FloatRegister, j_farg4, F20);
++REGISTER_DECLARATION(FloatRegister, j_farg5, F21);
++
++// registers used to hold VM data either temporarily within a method
++// or across method calls
++
++// volatile (caller-save) registers
++
++// r8 is used for indirect result location return
++// we use it and r9 as scratch registers
++REGISTER_DECLARATION(Register, rscratch1, T5);
++REGISTER_DECLARATION(Register, rscratch2, T6);
++REGISTER_DECLARATION(Register, rscratch3, T11);
++REGISTER_DECLARATION(Register, rscratch4, AT);
++//TODO:need delete, we should not use rscratch1_GP & rscratch2_AT, we should use rcc or rscratch4 to replace jzy
++REGISTER_DECLARATION(Register, rscratch1_GP, GP);
++REGISTER_DECLARATION(Register, rscratch2_AT, AT);
++
++
++// non-volatile (callee-save) registers are r16-29
++// of which the following are dedicated global state
++
++// link register
++REGISTER_DECLARATION(Register, lr,        RA);
++// frame pointer
++REGISTER_DECLARATION(Register, rfp,       FP);
++
++REGISTER_DECLARATION(Register, rbcp,      S0);
++REGISTER_DECLARATION(Register, rlocals,   S1);
++REGISTER_DECLARATION(Register, rthread,   S2);
++REGISTER_DECLARATION(Register, rmethod,   S3);
++REGISTER_DECLARATION(Register, rsender,   S4);
++REGISTER_DECLARATION(Register, r12_heapbase, S5);
++
++REGISTER_DECLARATION(Register, rdispatch, T8);
++REGISTER_DECLARATION(Register, rnext,     T10);
++REGISTER_DECLARATION(Register, rmonitors, T11);
++//REGISTER_DECLARATION(Register, rcpool,    T12);  //???
++REGISTER_DECLARATION(Register, pv,    T12);  // as target procedure, maybe be used as temp register
++
++REGISTER_DECLARATION(Register, esp,       SP);
++REGISTER_DECLARATION(Register, rcc,       GP);
++
++REGISTER_DECLARATION(Register,      FSR,  V0);
++REGISTER_DECLARATION(Register,      SSR,  T4);
++REGISTER_DECLARATION(FloatRegister, FSF,  f0);
++REGISTER_DECLARATION(FloatRegister, SSF,  f1);
++REGISTER_DECLARATION(FloatRegister, FTF,  f14);
++REGISTER_DECLARATION(FloatRegister, FcmpRES, f29); //TODO:need delete jzy
++REGISTER_DECLARATION(FloatRegister, fcc, f29);
++REGISTER_DECLARATION(FloatRegister, fscratch1, f28);
++REGISTER_DECLARATION(FloatRegister, fzero, f31);
++
++// x86 GPR simulation
++REGISTER_DECLARATION(Register, rax,     V0);
++REGISTER_DECLARATION(Register, rdi,     A0);
++REGISTER_DECLARATION(Register, rsi,     A1);
++REGISTER_DECLARATION(Register, rdx,     A2);
++REGISTER_DECLARATION(Register, rcx,     A3);
++REGISTER_DECLARATION(Register, r8,      A4);
++REGISTER_DECLARATION(Register, r9,      A5);
++REGISTER_DECLARATION(Register, rbx,     S3);
++REGISTER_DECLARATION(Register, rbp,     FP);
++REGISTER_DECLARATION(Register, r12,     S5);
++REGISTER_DECLARATION(Register, r13,     S0);
++REGISTER_DECLARATION(Register, r14,     S1);
++REGISTER_DECLARATION(Register, r15,     S2);
++REGISTER_DECLARATION(Register, r10,     T5);
++REGISTER_DECLARATION(Register, r11,     T6);
++REGISTER_DECLARATION(Register, rsp,     SP);
++#define OPT_SAFEPOINT        1
++
++#define assert_cond(ARG1) assert(ARG1, #ARG1)
++
++class Assembler;
++
++class ArrayAddress;
++
++// Addressing modes
++class Address {
++public:
++  enum ScaleFactor {
++    no_scale = -1,
++    times_1  =  0,
++    times_2  =  1,
++    times_4  =  2,
++    times_8  =  3,
++    times_ptr = times_8
++  };
++  
++  static ScaleFactor times(int size) {
++    assert(size >= 1 && size <= 8 && is_power_of_2(size), "bad scale size");
++    if (size == 8)  return times_8;
++    if (size == 4)  return times_4;
++    if (size == 2)  return times_2;
++    return times_1;
++  }
++  
++  static int scale_size(ScaleFactor scale) {
++    assert(scale != no_scale, "");
++    assert(((1 << (int)times_1) == 1 &&
++            (1 << (int)times_2) == 2 &&
++            (1 << (int)times_4) == 4 &&
++            (1 << (int)times_8) == 8), "");
++    return (1 << (int)scale);
++  }
++  
++  enum mode { base_plus_disp, base_index_scale_disp };
++
++ private:
++  Register _base;
++  Register _index;
++  Register _tmp;
++  ScaleFactor _scale;
++  long _offset;
++  int      _disp;//why int not long? jzy
++  enum mode _mode;
++
++  RelocationHolder _rspec;
++
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of
++  // the item.  We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to a
++  // register to reach it. Otherwise if near we can do PC-relative
++  // addressing.
++  address          _target;
++
++ public:
++  Address()
++    : _base(noreg),
++      _disp(0) {
++  }
++  
++  Address(Register base, Register index, ScaleFactor scale, int disp = 0)
++    : _base (base),
++      _index(index),
++      _scale(scale),
++      _disp (disp),
++      _mode (base_index_scale_disp),
++      _tmp  (noreg) {
++    assert(!index->is_valid() == (scale == Address::no_scale),
++           "inconsistent address");
++  }
++  
++  Address(Register base, RegisterOrConstant index, ScaleFactor scale = times_1, int disp = 0)
++    : _base (base),
++      _index(index.register_or_noreg()),
++      _scale(scale),
++      _disp (disp + (index.constant_or_zero() * scale_size(scale))),
++      _mode (index.is_constant() ? base_plus_disp : base_index_scale_disp),      
++      _tmp  (noreg){
++    if (!index.is_register())  scale = Address::no_scale;
++    assert(!_index->is_valid() == (scale == Address::no_scale),
++           "inconsistent address");
++  }
++  
++  Address(Register base, int disp = 0)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(disp),
++      _tmp(noreg), 
++      _mode(base_plus_disp){
++  }
++  
++  void emit(Register ra, Assembler* as, int opcode);
++  void emit(FloatRegister ra, Assembler* as, int opcode);
++   
++#ifdef ASSERT
++  Address(Register base, ByteSize disp)
++    : _base(base),
++      _index(noreg),
++      _scale(no_scale),
++      _disp(in_bytes(disp)),
++      _tmp(noreg), 
++      _mode(base_plus_disp){
++  }
++  
++  Address(Register base, Register index, ScaleFactor scale, ByteSize disp)
++    : _base(base),
++      _index(index),
++      _scale(scale),
++      _disp(in_bytes(disp)),
++      _mode(base_index_scale_disp),      
++      _tmp(noreg){
++    assert(!index->is_valid() == (scale == Address::no_scale),
++           "inconsistent address");
++  }
++  
++  Address(Register base, RegisterOrConstant index, ScaleFactor scale, ByteSize disp)
++    : _base (base),
++      _index(index.register_or_noreg()),
++      _scale(scale),
++      _disp (in_bytes(disp) + (index.constant_or_zero() * scale_size(scale))),
++      _mode (base_index_scale_disp),      
++      _tmp  (noreg) {
++    if (!index.is_register())  scale = Address::no_scale;
++    assert(!_index->is_valid() == (scale == Address::no_scale),
++           "inconsistent address");
++  }
++#endif // ASSERT
++
++  // accessors
++  bool        uses(Register reg) const { return _base == reg || _index == reg; }
++  Register    base()             const { return _base;  }
++  int         disp()             const { return _disp;  }
++  Register    index()            const { return _index; }
++  ScaleFactor scale()            const { return _scale;   }
++  void setTmp(Register reg)  { 
++    _tmp = reg;
++  }
++  long offset() const {
++    return _offset;
++  }
++  mode getMode() const {
++    return _mode;
++  }
++  address target() const { return _target; }
++  const RelocationHolder& rspec() const { return _rspec; }
++  static Address make_raw(int base, int index, int scale, int disp, relocInfo::relocType disp_reloc);
++
++ private:
++  
++  RelocationHolder rspec_from_rtype(relocInfo::relocType rtype, address addr) {
++    switch (rtype) {
++      case relocInfo::external_word_type:
++        return external_word_Relocation::spec(addr);
++      case relocInfo::internal_word_type:
++        return internal_word_Relocation::spec(addr);
++      case relocInfo::opt_virtual_call_type:
++        return opt_virtual_call_Relocation::spec();
++      case relocInfo::static_call_type:
++        return static_call_Relocation::spec();
++      case relocInfo::runtime_call_type:
++        return runtime_call_Relocation::spec();
++      case relocInfo::poll_type:
++      case relocInfo::poll_return_type:
++        return Relocation::spec_simple(rtype);
++      case relocInfo::none:
++      case relocInfo::oop_type:
++        // Oops are a special case. Normally they would be their own section
++        // but in cases like icBuffer they are literals in the code stream that
++        // we don't have a section for. We use none so that we get a literal address
++        // which is always patchable.
++        return RelocationHolder();
++      default:
++        ShouldNotReachHere();
++        return RelocationHolder();
++    }
++  }
++
++public:
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class LIR_Assembler; // base/index/scale/disp  
++};
++
++class Argument {
++ private:
++  int _number;
++ public:
++  enum {
++    n_register_parameters = 6,        // 6 integer registers used to pass parameters
++    n_float_register_parameters = 6,   // 6 float registers used to pass parameters
++
++    n_int_register_parameters_c   = 6,  // r0, r1, ... r7 (c_rarg0, c_rarg1, ...)
++    n_float_register_parameters_c = 6,  // v0, v1, ... v7 (c_farg0, c_farg1, ... )
++    n_int_register_parameters_j   = 6, // r1, ... r7, r0 (rj_rarg0, j_rarg1, ...
++    n_float_register_parameters_j = 6  // v0, v1, ... v7 (j_farg0, j_farg1, ..
++  };
++
++  Argument(int number):_number(number){ }
++  
++  int number()const {return _number;}
++  bool is_Register()const {return _number < n_register_parameters;}
++  bool is_FloatRegister()const {return _number < n_float_register_parameters;}
++
++  Register as_Register()const {
++    assert(is_Register(), "must be a register argument");
++    return ::as_Register(A0->encoding() + _number);
++  }
++  FloatRegister  as_FloatRegister()const {
++    assert(is_FloatRegister(), "must be a float register argument");
++    return ::as_FloatRegister(F16->encoding() + _number);
++  }
++
++  Address as_caller_address()const {return Address(esp, (number() - n_register_parameters) * wordSize);}
++};
++
++class AddressLiteral {
++  friend class ArrayAddress;
++  RelocationHolder _rspec;
++  // Typically we use AddressLiterals we want to use their rval
++  // However in some situations we want the lval (effect address) of the item.
++  // We provide a special factory for making those lvals.
++  bool _is_lval;
++
++  // If the target is far we'll need to load the ea of this to
++  // a register to reach it. Otherwise if near we can do rip
++  // relative addressing.
++
++  address          _target;
++
++ protected:
++  // creation
++  AddressLiteral()
++    : _is_lval(false),
++      _target(NULL)
++  {}
++
++  public:
++
++
++  AddressLiteral(address target, relocInfo::relocType rtype);
++
++  AddressLiteral(address target, RelocationHolder const& rspec)
++    : _rspec(rspec),
++      _is_lval(false),
++      _target(target)
++  {}
++
++  AddressLiteral addr() {
++    AddressLiteral ret = *this;
++    ret._is_lval = true;
++    return ret;
++  }
++
++
++ private:
++
++  address target() { return _target; }
++  bool is_lval() { return _is_lval; }
++
++  relocInfo::relocType reloc() const { return _rspec.type(); }
++  const RelocationHolder& rspec() const { return _rspec; }
++
++  friend class Assembler;
++  friend class MacroAssembler;
++  friend class Address;
++  friend class LIR_Assembler;
++};
++
++// Convience classes
++class RuntimeAddress: public AddressLiteral {
++
++  public:
++
++  RuntimeAddress(address target) : AddressLiteral(target, relocInfo::runtime_call_type) {}
++
++};
++
++class ExternalAddress: public AddressLiteral {
++ private:
++  static relocInfo::relocType reloc_for_target(address target) {
++    // Sometimes ExternalAddress is used for values which aren't
++    // exactly addresses, like the card table base.
++    // external_word_type can't be used for values in the first page
++    // so just skip the reloc in that case.
++    return external_word_Relocation::can_be_relocated(target) ? relocInfo::external_word_type : relocInfo::none;
++  }
++
++ public:
++
++  ExternalAddress(address target) : AddressLiteral(target, reloc_for_target(target)) {}
++
++};
++
++class InternalAddress: public AddressLiteral {
++
++  public:
++
++  InternalAddress(address target) : AddressLiteral(target, relocInfo::internal_word_type) {}
++};
++
++// x86 can do array addressing as a single operation since disp can be an absolute
++// address amd64 can't. We create a class that expresses the concept but does extra
++// magic on amd64 to get the final result
++
++class ArrayAddress {
++  private:
++
++  AddressLiteral _base;
++  Address        _index;
++
++  public:
++
++  ArrayAddress() {};
++  ArrayAddress(AddressLiteral base, Address index): _base(base), _index(index) {};
++  AddressLiteral base() { return _base; }
++  Address index() { return _index; }
++
++};
++
++class Assembler : public AbstractAssembler {
++  friend class AbstractAssembler; // for the non-virtual hack
++
++#ifndef PRODUCT
++  static const unsigned long asm_bp;
++
++  void emit_long(jint x) {
++    if ((unsigned long)pc() == asm_bp)
++      asm volatile ("nop");
++    AbstractAssembler::emit_int32(x);
++  }
++#else
++  void emit_long(jint x) {
++    AbstractAssembler::emit_int32(x);
++  }
++#endif
++
++public:
++  enum Condition {
++    zero         = 0x4,
++    notZero      = 0x5,
++    equal        = 0x4,
++    notEqual     = 0x5,
++    less         = 0xc,
++    lessEqual    = 0xe,
++    greater      = 0xf,
++    greaterEqual = 0xd,
++    below        = 0x2,
++    belowEqual   = 0x6,
++    above        = 0x7,
++    aboveEqual   = 0x3,
++    overflow     = 0x0,
++    noOverflow   = 0x1,
++    carrySet     = 0x2,
++    carryClear   = 0x3,
++    positive     = 0x9,
++    negative     = 0x8,
++    notNegative  = 0x10,
++    success  = 0xa,
++    failed   = 0xb,
++//    // Conditional branch (immediate)
++//    EQ, NE, HS, CS=HS, LO, CC=LO, MI, PL, VS, VC, HI, LS, GE, LT, GT, LE, AL, NV
++  };
++  enum ConditionLength {
++    bitl = 64,
++    bitw = 32,
++    bith = 16,
++    bitb = 8
++  };
++
++  enum WhichOperand {
++    imm_operand  = 0,            // embedded 32-bit|64-bit immediate operand
++    disp32_operand = 1,          // embedded 32-bit displacement or address
++    call32_operand = 2,          // embedded 32-bit self-relative displacement
++    narrow_oop_operand = 3,      // embedded 32-bit immediate narrow oop
++    _WhichOperand_limit = 4
++  };
++  enum { instruction_size = 4 };
++
++  // The maximum range of a branch is fixed for the Sw64
++  // architecture.  In debug mode we shrink it in order to test
++  // trampolines, but not so small that branches in the interpreter
++  // are out of range.
++  static const unsigned long branch_range = NOT_DEBUG(128 * M) DEBUG_ONLY(2 * M);
++
++  static bool reachable_from_branch_at(address branch, address target) {
++    return uabs(target - branch) < branch_range;
++  }
++
++  // Floating-point Move (immediate)
++private:
++  unsigned pack(double value);
++
++public:
++
++  Assembler(CodeBuffer* code) : AbstractAssembler(code) {
++#ifdef CHECK_DELAY
++    delay_state = no_delay;
++#endif
++  }
++  
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset) {
++    ShouldNotCallThis();
++    return RegisterOrConstant();
++  }
++  
++  // Stack overflow checking
++  virtual void bang_stack_with_offset(int offset);
++  
++  static bool operand_valid_for_logical_immediate(bool is32, uint64_t imm);
++  static bool operand_valid_for_add_sub_immediate(long imm);
++  static bool operand_valid_for_float_immediate(double imm);
++  
++  void emit_data64(jlong data, relocInfo::relocType rtype, int format = 0);
++  void emit_data64(jlong data, RelocationHolder const& rspec, int format = 0);
++
++public:
++  enum ops_mem {
++    op_call     = OPMEM(0x01),
++    op_ret      = OPMEM(0x02),
++    op_jmp      = OPMEM(0x03),
++    op_ldwe     = OPMEM(0x09),  op_fillcs   = op_ldwe,
++    op_ldse     = OPMEM(0x0A),  op_e_fillcs = op_ldse,
++    op_ldde     = OPMEM(0x0B),  op_fillcs_e = op_ldde,
++    op_vlds     = OPMEM(0x0C),  op_e_fillde = op_vlds,
++    op_vldd     = OPMEM(0x0D),
++    op_vsts     = OPMEM(0x0E),
++    op_vstd     = OPMEM(0x0F),
++    op_ldbu     = OPMEM(0x20),  op_flushd   = op_ldbu,
++    op_ldhu     = OPMEM(0x21),  op_evictdg  = op_ldhu,
++    op_ldw      = OPMEM(0x22),  op_s_fillcs = op_ldw,
++    op_ldl      = OPMEM(0x23),  op_s_fillde = op_ldl,
++    op_ldl_u    = OPMEM(0x24),  op_evictdl  = op_ldl_u,
++    op_flds     = OPMEM(0x26),  op_fillde   = op_flds,
++    op_fldd     = OPMEM(0x27),  op_fillde_e = op_fldd,
++    op_stb      = OPMEM(0x28),
++    op_sth      = OPMEM(0x29),
++    op_stw      = OPMEM(0x2A),
++    op_stl      = OPMEM(0x2B),
++    op_stl_u    = OPMEM(0x2C),
++    op_fsts     = OPMEM(0x2E),
++    op_fstd     = OPMEM(0x2F),
++    op_ldi      = OPMEM(0x3E),
++    op_ldih     = OPMEM(0x3F)
++  };
++
++  enum ops_atmem {
++    op_lldw     = ATMEM(0x08, 0x0),
++    op_lldl     = ATMEM(0x08, 0x1),
++    op_ldw_inc  = ATMEM(0x08, 0x2),  //SW2F
++    op_ldl_inc  = ATMEM(0x08, 0x3),  //SW2F
++    op_ldw_dec  = ATMEM(0x08, 0x4),  //SW2F
++    op_ldl_dec  = ATMEM(0x08, 0x5),  //SW2F
++    op_ldw_set  = ATMEM(0x08, 0x6),  //SW2F
++    op_ldl_set  = ATMEM(0x08, 0x7),  //SW2F
++    op_lstw     = ATMEM(0x08, 0x8),
++    op_lstl     = ATMEM(0x08, 0x9),
++    op_ldw_nc   = ATMEM(0x08, 0xA),
++    op_ldl_nc   = ATMEM(0x08, 0xB),
++    op_ldd_nc   = ATMEM(0x08, 0xC),
++    op_stw_nc   = ATMEM(0x08, 0xD),
++    op_stl_nc   = ATMEM(0x08, 0xE),
++    op_std_nc   = ATMEM(0x08, 0xF),
++    op_vldw_u   = ATMEM(0x1C, 0x0),
++    op_vstw_u   = ATMEM(0x1C, 0x1),
++    op_vlds_u   = ATMEM(0x1C, 0x2),
++    op_vsts_u   = ATMEM(0x1C, 0x3),
++    op_vldd_u   = ATMEM(0x1C, 0x4),
++    op_vstd_u   = ATMEM(0x1C, 0x5),
++    op_vstw_ul  = ATMEM(0x1C, 0x8),
++    op_vstw_uh  = ATMEM(0x1C, 0x9),
++    op_vsts_ul  = ATMEM(0x1C, 0xA),
++    op_vsts_uh  = ATMEM(0x1C, 0xB),
++    op_vstd_ul  = ATMEM(0x1C, 0xC),
++    op_vstd_uh  = ATMEM(0x1C, 0xD),
++    op_vldd_nc  = ATMEM(0x1C, 0xE),
++    op_vstd_nc  = ATMEM(0x1C, 0xF),
++    op_ldbu_a   = ATMEM(0x1E, 0x0),  //SW8A
++    op_ldhu_a   = ATMEM(0x1E, 0x1),  //SW8A
++    op_ldw_a    = ATMEM(0x1E, 0x2),  //SW8A
++    op_ldl_a    = ATMEM(0x1E, 0x3),  //SW8A
++    op_flds_a   = ATMEM(0x1E, 0x4),  //SW8A
++    op_fldd_a   = ATMEM(0x1E, 0x5),  //SW8A
++    op_stb_a    = ATMEM(0x1E, 0x6),  //SW8A
++    op_sth_a    = ATMEM(0x1E, 0x7),  //SW8A
++    op_stw_a    = ATMEM(0x1E, 0x8),  //SW8A
++    op_stl_a    = ATMEM(0x1E, 0x9),  //SW8A
++    op_fsts_a   = ATMEM(0x1E, 0xA),  //SW8A
++    op_fstd_a   = ATMEM(0x1E, 0xB)   //SW8A
++  };
++
++  enum ops_ev6hwmem {
++    op_pri_ld   = EV6HWMEM(0x25, 0x0),
++    op_pri_st   = EV6HWMEM(0x2D, 0x0),
++  };
++
++  enum ops_opr {
++    op_addw     = OPR(0x10, 0x00),
++    op_subw     = OPR(0x10, 0x01),
++    op_s4addw   = OPR(0x10, 0x02),
++    op_s4subw   = OPR(0x10, 0x03),
++    op_s8addw   = OPR(0x10, 0x04),
++    op_s8subw   = OPR(0x10, 0x05),
++    op_addl     = OPR(0x10, 0x08),
++    op_subl     = OPR(0x10, 0x09),
++    op_s4addl   = OPR(0x10, 0x0A),
++    op_s4subl   = OPR(0x10, 0x0B),
++    op_s8addl   = OPR(0x10, 0x0C),
++    op_s8subl   = OPR(0x10, 0x0D),
++    op_mulw     = OPR(0x10, 0x10),
++    op_divw     = OPR(0x10, 0x11),  //SW8A
++    op_udivw    = OPR(0x10, 0x12),  //SW8A
++    op_remw     = OPR(0x10, 0x13),  //SW8A
++    op_uremw    = OPR(0x10, 0x14),  //SW8A
++    op_mull     = OPR(0x10, 0x18),
++    op_umulh    = OPR(0x10, 0x19),
++    op_divl     = OPR(0x10, 0x1A),  //SW8A
++    op_udivl    = OPR(0x10, 0x1B),  //SW8A
++    op_reml     = OPR(0x10, 0x1C),  //SW8A
++    op_ureml    = OPR(0x10, 0x1D),  //SW8A
++    op_addpi    = OPR(0x10, 0x1E),  //SW8A
++    op_addpis   = OPR(0x10, 0x1F),  //SW8A
++    op_cmpeq    = OPR(0x10, 0x28),
++    op_cmplt    = OPR(0x10, 0x29),
++    op_cmple    = OPR(0x10, 0x2A),
++    op_cmpult   = OPR(0x10, 0x2B),
++    op_cmpule   = OPR(0x10, 0x2C),
++    op_sbt      = OPR(0x10, 0x2D),  //SW8A
++    op_cbt      = OPR(0x10, 0x2E),  //SW8A
++    op_and      = OPR(0x10, 0x38),
++    op_bic      = OPR(0x10, 0x39),
++    op_bis      = OPR(0x10, 0x3A),
++    op_ornot    = OPR(0x10, 0x3B),
++    op_xor      = OPR(0x10, 0x3C),
++    op_eqv      = OPR(0x10, 0x3D),
++    op_inslb    = OPR(0x10, 0x40),  //0x10.40~0x10.47
++    op_inslh    = OPR(0x10, 0x41),
++    op_inslw    = OPR(0x10, 0x42),
++    op_insll    = OPR(0x10, 0x43),
++    op_inshb    = OPR(0x10, 0x44),
++    op_inshh    = OPR(0x10, 0x45),
++    op_inshw    = OPR(0x10, 0x46),
++    op_inshl    = OPR(0x10, 0x47),
++    op_slll     = OPR(0x10, 0x48),
++    op_srll     = OPR(0x10, 0x49),
++    op_sral     = OPR(0x10, 0x4A),
++    op_roll     = OPR(0x10, 0x4B),  //SW8A
++    op_sllw     = OPR(0x10, 0x4C),  //SW8A
++    op_srlw     = OPR(0x10, 0x4D),  //SW8A
++    op_sraw     = OPR(0x10, 0x4E),  //SW8A
++    op_rolw     = OPR(0x10, 0x4F),  //SW8A
++    op_extlb    = OPR(0x10, 0x50),  //0x10.50~0x10.57
++    op_extlh    = OPR(0x10, 0x51),
++    op_extlw    = OPR(0x10, 0x52),
++    op_extll    = OPR(0x10, 0x53),
++    op_exthb    = OPR(0x10, 0x54),
++    op_exthh    = OPR(0x10, 0x55),
++    op_exthw    = OPR(0x10, 0x56),
++    op_exthl    = OPR(0x10, 0x57),
++    op_ctpop    = OPR(0x10, 0x58),
++    op_ctlz     = OPR(0x10, 0x59),
++    op_cttz     = OPR(0x10, 0x5A),
++    op_revbh    = OPR(0x10, 0x5B),  //SW8A
++    op_revbw    = OPR(0x10, 0x5C),  //SW8A
++    op_revbl    = OPR(0x10, 0x5D),  //SW8A
++    op_casw     = OPR(0x10, 0x5E),  //SW8A
++    op_casl     = OPR(0x10, 0x5F),  //SW8A
++    op_masklb   = OPR(0x10, 0x60),  //0x10.60~0x10.67
++    op_masklh   = OPR(0x10, 0x61),
++    op_masklw   = OPR(0x10, 0x62),
++    op_maskll   = OPR(0x10, 0x63),
++    op_maskhb   = OPR(0x10, 0x64),
++    op_maskhh   = OPR(0x10, 0x65),
++    op_maskhw   = OPR(0x10, 0x66),
++    op_maskhl   = OPR(0x10, 0x67),
++    op_zap      = OPR(0x10, 0x68),
++    op_zapnot   = OPR(0x10, 0x69),
++    op_sextb    = OPR(0x10, 0x6A),
++    op_sexth    = OPR(0x10, 0x6B),
++    op_cmpgeb   = OPR(0x10, 0x6C),  //0x10.6C
++    op_fimovs   = OPR(0x10, 0x70),
++    op_fimovd   = OPR(0x10, 0x78),
++    op_cmovdl   = OFP(0x10, 0x72),
++    op_cmovdl_g = OFP(0x10, 0x74),
++    op_cmovdl_p = OFP(0x10, 0x7A),
++    op_cmovdl_z = OFP(0x10, 0x7C),
++    op_cmovdl_n = OFP(0x10, 0x80),
++    op_cmovdlu  = OFP(0x10, 0x81),
++    op_cmovdlu_g= OFP(0x10, 0x82),
++    op_cmovdlu_p= OFP(0x10, 0x83),
++    op_cmovdlu_z= OFP(0x10, 0x84),
++    op_cmovdlu_n= OFP(0x10, 0x85),
++    op_cmovdw   = OFP(0x10, 0x8B),
++    op_cmovdw_g = OFP(0x10, 0x8C),
++    op_cmovdw_p = OFP(0x10, 0x8D),
++    op_cmovdw_z = OFP(0x10, 0x8E),
++    op_cmovdw_n = OFP(0x10, 0x8F),
++    op_cmovdwu  = OFP(0x10, 0x86),
++    op_cmovdwu_g= OFP(0x10, 0x87),
++    op_cmovdwu_p= OFP(0x10, 0x88),
++    op_cmovdwu_z= OFP(0x10, 0x89),
++    op_cmovdwu_n= OFP(0x10, 0x8A),
++    op_seleq    = TOPR(0x11, 0x0),
++    op_selge    = TOPR(0x11, 0x1),
++    op_selgt    = TOPR(0x11, 0x2),
++    op_selle    = TOPR(0x11, 0x3),
++    op_sellt    = TOPR(0x11, 0x4),
++    op_selne    = TOPR(0x11, 0x5),
++    op_sellbc   = TOPR(0x11, 0x6),
++    op_sellbs   = TOPR(0x11, 0x7)
++  };
++
++  enum ops_oprl{
++    op_addw_l   = OPRL(0x12, 0x00),
++    op_subw_l   = OPRL(0x12, 0x01),
++    op_s4addw_l = OPRL(0x12, 0x02),
++    op_s4subw_l = OPRL(0x12, 0x03),
++    op_s8addw_l = OPRL(0x12, 0x04),
++    op_s8subw_l = OPRL(0x12, 0x05),
++    op_addl_l   = OPRL(0x12, 0x08),
++    op_subl_l   = OPRL(0x12, 0x09),
++    op_s4addl_l = OPRL(0x12, 0x0A),
++    op_s4subl_l = OPRL(0x12, 0x0B),
++    op_s8addl_l = OPRL(0x12, 0x0C),
++    op_s8subl_l = OPRL(0x12, 0x0D),
++    op_mulw_l   = OPRL(0x12, 0x10),
++    op_mull_l   = OPRL(0x12, 0x18),
++    op_umulh_l  = OPRL(0x12, 0x19),
++    op_cmpeq_l  = OPRL(0x12, 0x28),
++    op_cmplt_l  = OPRL(0x12, 0x29),
++    op_cmple_l  = OPRL(0x12, 0x2A),
++    op_cmpult_l = OPRL(0x12, 0x2B),
++    op_cmpule_l = OPRL(0x12, 0x2C),
++    op_sbt_l    = OPRL(0x12, 0x2D),  //SW8A
++    op_cbt_l    = OPRL(0x12, 0x2E),  //SW8A
++    op_and_l    = OPRL(0x12, 0x38),
++    op_bic_l    = OPRL(0x12, 0x39),
++    op_bis_l    = OPRL(0x12, 0x3A),
++    op_ornot_l  = OPRL(0x12, 0x3B),
++    op_xor_l    = OPRL(0x12, 0x3C),
++    op_eqv_l    = OPRL(0x12, 0x3D),
++    op_inslb_l  = OPRL(0x12, 0x40),  //0x12.40~0x12.47
++    op_inslh_l  = OPRL(0x12, 0x41),
++    op_inslw_l  = OPRL(0x12, 0x42),
++    op_insll_l  = OPRL(0x12, 0x43),
++    op_inshb_l  = OPRL(0x12, 0x44),
++    op_inshh_l  = OPRL(0x12, 0x45),
++    op_inshw_l  = OPRL(0x12, 0x46),
++    op_inshl_l  = OPRL(0x12, 0x47),
++    op_slll_l   = OPRL(0x12, 0x48),
++    op_srll_l   = OPRL(0x12, 0x49),
++    op_sral_l   = OPRL(0x12, 0x4A),
++    op_roll_l   = OPRL(0x12, 0x4B),  //SW8A
++    op_sllw_l   = OPRL(0x12, 0x4C),  //SW8A
++    op_srlw_l   = OPRL(0x12, 0x4D),  //SW8A
++    op_sraw_l   = OPRL(0x12, 0x4E),  //SW8A
++    op_rolw_l   = OPRL(0x12, 0x4F),  //SW8A
++    op_extlb_l  = OPRL(0x12, 0x50),  //0x12.50~0x12.57
++    op_extlh_l  = OPRL(0x12, 0x51),
++    op_extlw_l  = OPRL(0x12, 0x52),
++    op_extll_l  = OPRL(0x12, 0x53),
++    op_exthb_l  = OPRL(0x12, 0x54),
++    op_exthh_l  = OPRL(0x12, 0x55),
++    op_exthw_l  = OPRL(0x12, 0x56),
++    op_exthl_l  = OPRL(0x12, 0x57),
++    op_masklb_l = OPRL(0x12, 0x60),  //0x12.60~0x12.67
++    op_masklh_l = OPRL(0x12, 0x61),
++    op_masklw_l = OPRL(0x12, 0x62),
++    op_maskll_l = OPRL(0x12, 0x63),
++    op_maskhb_l = OPRL(0x12, 0x64),
++    op_maskhh_l = OPRL(0x12, 0x65),
++    op_maskhw_l = OPRL(0x12, 0x66),
++    op_maskhl_l = OPRL(0x12, 0x67),
++    op_zap_l    = OPRL(0x12, 0x68),
++    op_zapnot_l = OPRL(0x12, 0x69),
++    op_sextb_l  = OPRL(0x12, 0x6A),
++    op_sexth_l  = OPRL(0x12, 0x6B),
++    op_cmpgeb_l = OPRL(0x12, 0x6C),  //0x12.6C
++    op_seleq_l  = TOPRL(0x13, 0x0),
++    op_selge_l  = TOPRL(0x13, 0x1),
++    op_selgt_l  = TOPRL(0x13, 0x2),
++    op_selle_l  = TOPRL(0x13, 0x3),
++    op_sellt_l  = TOPRL(0x13, 0x4),
++    op_selne_l  = TOPRL(0x13, 0x5),
++    op_sellbc_l = TOPRL(0x13, 0x6),
++    op_sellbs_l = TOPRL(0x13, 0x7)
++  };
++
++  enum ops_bra {
++    op_br   = BRA(0x04),
++    op_bsr  = BRA(0x05),
++    op_beq  = BRA(0x30),
++    op_bne  = BRA(0x31),
++    op_blt  = BRA(0x32),
++    op_ble  = BRA(0x33),
++    op_bgt  = BRA(0x34),
++    op_bge  = BRA(0x35),
++    op_blbc = BRA(0x36),
++    op_blbs = BRA(0x37),
++    op_fbeq = BRA(0x38),
++    op_fbne = BRA(0x39),
++    op_fblt = BRA(0x3A),
++    op_fble = BRA(0x3B),
++    op_fbgt = BRA(0x3C),
++    op_fbge = BRA(0x3D),
++    op_lbr  = BRA(0x1D),           //SW8A
++  };
++
++  enum ops_fp {
++    op_fadds    = OFP(0x18, 0x00),
++    op_faddd    = OFP(0x18, 0x01),
++    op_fsubs    = OFP(0x18, 0x02),
++    op_fsubd    = OFP(0x18, 0x03),
++    op_fmuls    = OFP(0x18, 0x04),
++    op_fmuld    = OFP(0x18, 0x05),
++    op_fdivs    = OFP(0x18, 0x06),
++    op_fdivd    = OFP(0x18, 0x07),
++    op_fsqrts   = OFP(0x18, 0x08),
++    op_fsqrtd   = OFP(0x18, 0x09),
++    op_fcmpeq   = OFP(0x18, 0x10),
++    op_fcmple   = OFP(0x18, 0x11),
++    op_fcmplt   = OFP(0x18, 0x12),
++    op_fcmpun   = OFP(0x18, 0x13),
++    op_fcvtsd   = OFP(0x18, 0x20),
++    op_fcvtds   = OFP(0x18, 0x21),
++    op_fcvtdl_g = OFP(0x18, 0x22),  //lx_fcvtdl
++    op_fcvtdl_p = OFP(0x18, 0x23),
++    op_fcvtdl_z = OFP(0x18, 0x24),
++    op_fcvtdl_n = OFP(0x18, 0x25),  //lx_fcvtdl
++    op_fcvtdl   = OFP(0x18, 0x27),
++    op_fcvtwl   = OFP(0x18, 0x28),
++    op_fcvtlw   = OFP(0x18, 0x29),
++    op_fcvtls   = OFP(0x18, 0x2D),
++    op_fcvtld   = OFP(0x18, 0x2F),
++    op_fcpys    = OFP(0x18, 0x30),
++    op_fcpyse   = OFP(0x18, 0x31),
++    op_fcpysn   = OFP(0x18, 0x32),
++    op_ifmovs   = OFP(0x18, 0x40),
++    op_ifmovd   = OFP(0x18, 0x41),
++    op_cmovls   = OFP(0x18, 0x48),
++    op_cmovld   = OFP(0x18, 0x4A),
++    op_cmovuls  = OFP(0x18, 0x4C),
++    op_cmovuld  = OFP(0x18, 0x4E),
++    op_cmovws   = OFP(0x18, 0x49),
++    op_cmovwd   = OFP(0x18, 0x4B),
++    op_cmovuws  = OFP(0x18, 0x4D),
++    op_cmovuwd  = OFP(0x18, 0x4F),
++    op_rfpcr    = OFP(0x18, 0x50),
++    op_wfpcr    = OFP(0x18, 0x51),
++    op_setfpec0 = OFP(0x18, 0x54),
++    op_setfpec1 = OFP(0x18, 0x55),
++    op_setfpec2 = OFP(0x18, 0x56),
++    op_setfpec3 = OFP(0x18, 0x57),
++    op_frecs    = OFP(0x18, 0x58),  //SW8A
++    op_frecd    = OFP(0x18, 0x59),  //SW8A
++    op_fris     = OFP(0x18, 0x5A),  //SW8A
++    op_fris_g   = OFP(0x18, 0x5B),  //SW8A
++    op_fris_p   = OFP(0x18, 0x5C),  //SW8A
++    op_fris_z   = OFP(0x18, 0x5D),  //SW8A
++    op_fris_n   = OFP(0x18, 0x5F),  //SW8A
++    op_frid     = OFP(0x18, 0x60),  //SW8A
++    op_frid_g   = OFP(0x18, 0x61),  //SW8A
++    op_frid_p   = OFP(0x18, 0x62),  //SW8A
++    op_frid_z   = OFP(0x18, 0x63),  //SW8A
++    op_frid_n   = OFP(0x18, 0x64),  //SW8A
++    op_vaddw    = OFP(0x1A, 0x00),
++    op_vsubw    = OFP(0x1A, 0x01),
++    op_vcmpgew  = OFP(0x1A, 0x02),
++    op_vcmpeqw  = OFP(0x1A, 0x03),
++    op_vcmplew  = OFP(0x1A, 0x04),
++    op_vcmpltw  = OFP(0x1A, 0x05),
++    op_vcmpulew = OFP(0x1A, 0x06),
++    op_vcmpultw = OFP(0x1A, 0x07),
++    op_vsllw    = OFP(0x1A, 0x08),
++    op_vsrlw    = OFP(0x1A, 0x09),
++    op_vsraw    = OFP(0x1A, 0x0A),
++    op_vrolw    = OFP(0x1A, 0x0B),
++    op_sllow    = OFP(0x1A, 0x0C),
++    op_srlow    = OFP(0x1A, 0x0D),
++    op_vaddl    = OFP(0x1A, 0x0E),
++    op_vsubl    = OFP(0x1A, 0x0F),
++    op_vsllb    = OFP(0x1A, 0x10),  //SW8A
++    op_vsrlb    = OFP(0x1A, 0x11),  //SW8A
++    op_vsrab    = OFP(0x1A, 0x12),  //SW8A
++    op_vrolb    = OFP(0x1A, 0x13),  //SW8A
++    op_vsllh    = OFP(0x1A, 0x14),  //SW8A
++    op_vsrlh    = OFP(0x1A, 0x15),  //SW8A
++    op_vsrah    = OFP(0x1A, 0x16),  //SW8A
++    op_vrolh    = OFP(0x1A, 0x17),  //SW8A
++    op_ctpopow  = OFP(0x1A, 0x18),
++    op_ctlzow   = OFP(0x1A, 0x19),
++    op_vslll    = OFP(0x1A, 0x1A),  //SW8A
++    op_vsrll    = OFP(0x1A, 0x1B),  //SW8A
++    op_vsral    = OFP(0x1A, 0x1C),  //SW8A
++    op_vroll    = OFP(0x1A, 0x1D),  //SW8A
++    op_vmaxb    = OFP(0x1A, 0x1E),  //SW8A
++    op_vminb    = OFP(0x1A, 0x1F),  //SW8A
++    op_vucaddw  = OFP(0x1A, 0x40),
++    op_vucsubw  = OFP(0x1A, 0x41),
++    op_vucaddh  = OFP(0x1A, 0x42),
++    op_vucsubh  = OFP(0x1A, 0x43),
++    op_vucaddb  = OFP(0x1A, 0x44),
++    op_vucsubb  = OFP(0x1A, 0x45),
++    op_sraow    = OFP(0x1A, 0x46),  //SW8A
++    op_vsumw    = OFP(0x1A, 0x47),  //SW8A
++    op_vsuml    = OFP(0x1A, 0x48),  //SW8A
++    op_vsm4r    = OFP(0x1A, 0x49),  //SW8A, ENCRYPT
++    op_vbinvw   = OFP(0x1A, 0x4A),  //SW8A, ENCRYPT
++    op_vcmpueqb = OFP(0x1A, 0x4B),  //SW8A
++    op_vcmpugtb = OFP(0x1A, 0x4C),  //SW8A
++    op_vsm3msw  = OFP(0x1A, 0x4D),  //SW8A, ENCRYPT
++    op_vmaxh    = OFP(0x1A, 0x50),  //SW8A
++    op_vminh    = OFP(0x1A, 0x51),  //SW8A
++    op_vmaxw    = OFP(0x1A, 0x52),  //SW8A
++    op_vminw    = OFP(0x1A, 0x53),  //SW8A
++    op_vmaxl    = OFP(0x1A, 0x54),  //SW8A
++    op_vminl    = OFP(0x1A, 0x55),  //SW8A
++    op_vumaxb   = OFP(0x1A, 0x56),  //SW8A
++    op_vuminb   = OFP(0x1A, 0x57),  //SW8A
++    op_vumaxh   = OFP(0x1A, 0x58),  //SW8A
++    op_vuminh   = OFP(0x1A, 0x59),  //SW8A
++    op_vumaxw   = OFP(0x1A, 0x5A),  //SW8A
++    op_vuminw   = OFP(0x1A, 0x5B),  //SW8A
++    op_vumaxl   = OFP(0x1A, 0x5C),  //SW8A
++    op_vuminl   = OFP(0x1A, 0x5D),  //SW8A
++    op_vadds    = OFP(0x1A, 0x80),
++    op_vaddd    = OFP(0x1A, 0x81),
++    op_vsubs    = OFP(0x1A, 0x82),
++    op_vsubd    = OFP(0x1A, 0x83),
++    op_vmuls    = OFP(0x1A, 0x84),
++    op_vmuld    = OFP(0x1A, 0x85),
++    op_vdivs    = OFP(0x1A, 0x86),
++    op_vdivd    = OFP(0x1A, 0x87),
++    op_vsqrts   = OFP(0x1A, 0x88),
++    op_vsqrtd   = OFP(0x1A, 0x89),
++    op_vfcmpeq  = OFP(0x1A, 0x8C),
++    op_vfcmple  = OFP(0x1A, 0x8D),
++    op_vfcmplt  = OFP(0x1A, 0x8E),
++    op_vfcmpun  = OFP(0x1A, 0x8F),
++    op_vcpys    = OFP(0x1A, 0x90),
++    op_vcpyse   = OFP(0x1A, 0x91),
++    op_vcpysn   = OFP(0x1A, 0x92),
++    op_vsums    = OFP(0x1A, 0x93),  //SW8A
++    op_vsumd    = OFP(0x1A, 0x94),  //SW8A
++    op_vfcvtsd  = OFP(0x1A, 0x95),  //SW8A
++    op_vfcvtds  = OFP(0x1A, 0x96),  //SW8A
++    op_vfcvtls  = OFP(0x1A, 0x99),  //SW8A
++    op_vfcvtld  = OFP(0x1A, 0x9A),  //SW8A
++    op_vfcvtdl  = OFP(0x1A, 0x9B),  //SW8A
++    op_vfcvtdl_g    = OFP(0x1A, 0x9C),  //SW8A
++    op_vfcvtdl_p    = OFP(0x1A, 0x9D),  //SW8A
++    op_vfcvtdl_z    = OFP(0x1A, 0x9E),  //SW8A
++    op_vfcvtdl_n    = OFP(0x1A, 0x9F),  //SW8A
++    op_vfris    = OFP(0x1A, 0xA0),  //SW8A
++    op_vfris_g  = OFP(0x1A, 0xA1),  //SW8A
++    op_vfris_p  = OFP(0x1A, 0xA2),  //SW8A
++    op_vfris_z  = OFP(0x1A, 0xA3),  //SW8A
++    op_vfris_n  = OFP(0x1A, 0xA4),  //SW8A
++    op_vfrid    = OFP(0x1A, 0xA5),  //SW8A
++    op_vfrid_g  = OFP(0x1A, 0xA6),  //SW8A
++    op_vfrid_p  = OFP(0x1A, 0xA7),  //SW8A
++    op_vfrid_z  = OFP(0x1A, 0xA8),  //SW8A
++    op_vfrid_n  = OFP(0x1A, 0xA9),  //SW8A
++    op_vfrecs   = OFP(0x1A, 0xAA),  //SW8A
++    op_vfrecd   = OFP(0x1A, 0xAB),  //SW8A
++    op_vmaxs    = OFP(0x1A, 0xAC),  //SW8A
++    op_vmins    = OFP(0x1A, 0xAD),  //SW8A
++    op_vmaxd    = OFP(0x1A, 0xAE),  //SW8A
++    op_vmind    = OFP(0x1A, 0xAF),  //SW8A
++  };
++
++  enum ops_fpl {
++    op_vaddw_l      = OFP(0x1A, 0x20),
++    op_vsubw_l      = OFP(0x1A, 0x21),
++    op_vcmpgew_l    = OFP(0x1A, 0x22),
++    op_vcmpeqw_l    = OFP(0x1A, 0x23),
++    op_vcmplew_l    = OFP(0x1A, 0x24),
++    op_vcmpltw_l    = OFP(0x1A, 0x25),
++    op_vcmpulew_l   = OFP(0x1A, 0x26),
++    op_vcmpultw_l   = OFP(0x1A, 0x27),
++    op_vsllw_l      = OFP(0x1A, 0x28),
++    op_vsrlw_l      = OFP(0x1A, 0x29),
++    op_vsraw_l      = OFP(0x1A, 0x2A),
++    op_vrolw_l      = OFP(0x1A, 0x2B),
++    op_sllow_l      = OFP(0x1A, 0x2C),
++    op_srlow_l      = OFP(0x1A, 0x2D),
++    op_vaddl_l      = OFP(0x1A, 0x2E),
++    op_vsubl_l      = OFP(0x1A, 0x2F),
++    op_vsllb_l      = OFP(0x1A, 0x30),  //SW8A
++    op_vsrlb_l      = OFP(0x1A, 0x31),  //SW8A
++    op_vsrab_l      = OFP(0x1A, 0x32),  //SW8A
++    op_vrolb_l      = OFP(0x1A, 0x33),  //SW8A
++    op_vsllh_l      = OFP(0x1A, 0x34),  //SW8A
++    op_vsrlh_l      = OFP(0x1A, 0x35),  //SW8A
++    op_vsrah_l      = OFP(0x1A, 0x36),  //SW8A
++    op_vrolh_l      = OFP(0x1A, 0x37),  //SW8A
++    op_vslll_l      = OFP(0x1A, 0x3A),  //SW8A
++    op_vsrll_l      = OFP(0x1A, 0x3B),  //SW8A
++    op_vsral_l      = OFP(0x1A, 0x3C),  //SW8A
++    op_vroll_l      = OFP(0x1A, 0x3D),  //SW8A
++    op_vucaddw_l    = OFP(0x1A, 0x60),
++    op_vucsubw_l    = OFP(0x1A, 0x61),
++    op_vucaddh_l    = OFP(0x1A, 0x62),
++    op_vucsubh_l    = OFP(0x1A, 0x63),
++    op_vucaddb_l    = OFP(0x1A, 0x64),
++    op_vucsubb_l    = OFP(0x1A, 0x65),
++    op_sraow_l      = OFP(0x1A, 0x66),  //SW8A
++    op_vsm4key_l    = OFP(0x1A, 0x68),  //SW8A, ENCRYPT
++    op_vcmpueqb_l   = OFP(0x1A, 0x6B),  //SW8A
++    op_vcmpugtb_l   = OFP(0x1A, 0x6C),  //SW8A
++    op_vfcvtsh_l    = OFP(0x1B, 0x35),  //SW8A
++    op_vfcvths_l    = OFP(0x1B, 0x36)   //SW8A
++  };
++
++  enum ops_fma {
++    op_fmas     = FMA(0x19, 0x00),
++    op_fmad     = FMA(0x19, 0x01),
++    op_fmss     = FMA(0x19, 0x02),
++    op_fmsd     = FMA(0x19, 0x03),
++    op_fnmas    = FMA(0x19, 0x04),
++    op_fnmad    = FMA(0x19, 0x05),
++    op_fnmss    = FMA(0x19, 0x06),
++    op_fnmsd    = FMA(0x19, 0x07),
++    op_fseleq   = FMA(0x19, 0x10),
++    op_fselne   = FMA(0x19, 0x11),
++    op_fsellt   = FMA(0x19, 0x12),
++    op_fselle   = FMA(0x19, 0x13),
++    op_fselgt   = FMA(0x19, 0x14),
++    op_fselge   = FMA(0x19, 0x15),
++    op_vmas     = FMA(0x1B, 0x00),
++    op_vmad     = FMA(0x1B, 0x01),
++    op_vmss     = FMA(0x1B, 0x02),
++    op_vmsd     = FMA(0x1B, 0x03),
++    op_vnmas    = FMA(0x1B, 0x04),
++    op_vnmad    = FMA(0x1B, 0x05),
++    op_vnmss    = FMA(0x1B, 0x06),
++    op_vnmsd    = FMA(0x1B, 0x07),
++    op_vfseleq  = FMA(0x1B, 0x10),
++    op_vfsellt  = FMA(0x1B, 0x12),
++    op_vfselle  = FMA(0x1B, 0x13),
++    op_vseleqw  = FMA(0x1B, 0x18),
++    op_vsellbcw = FMA(0x1B, 0x19),
++    op_vselltw  = FMA(0x1B, 0x1A),
++    op_vsellew  = FMA(0x1B, 0x1B),
++    op_vcpyw    = FMA(0x1B, 0x24),
++    op_vcpyf    = FMA(0x1B, 0x25),
++    op_vconw    = FMA(0x1B, 0x26),
++    op_vshfw    = FMA(0x1B, 0x27),
++    op_vcons    = FMA(0x1B, 0x28),
++    op_vcond    = FMA(0x1B, 0x29),
++    op_vinsectlh    = FMA(0x1B, 0x2C),  //SW8A
++    op_vinsectlw    = FMA(0x1B, 0x2D),  //SW8A
++    op_vinsectll    = FMA(0x1B, 0x2E),  //SW8A
++    op_vinsectlb    = FMA(0x1B, 0x2F),  //SW8A
++    op_vshfqb   = FMA(0x1B, 0x31),  //SW8A
++    op_vcpyb    = FMA(0x1B, 0x32),  //SW8A
++    op_vcpyh    = FMA(0x1B, 0x33)   //SW8A
++  };
++
++  enum ops_fmal {
++    op_vinsw_l      = FMA(0x1B, 0x20),
++    op_vinsf_l      = FMA(0x1B, 0x21),
++    op_vextw_l      = FMA(0x1B, 0x22),
++    op_vextf_l      = FMA(0x1B, 0x23),
++    op_vinsb_l      = FMA(0x1B, 0x2A),  //SW8A
++    op_vinsh_l      = FMA(0x1B, 0x2B),  //SW8A
++    op_vshfq_l      = FMA(0x1B, 0x30),  //SW8A
++    op_vsm3r_l      = FMA(0x1B, 0x34),  //SW8A, ENCRYPT
++    op_vseleqw_l    = FMA(0x1B, 0x38),
++    op_vsellbcw_l   = FMA(0x1B, 0x39),
++    op_vselltw_l    = FMA(0x1B, 0x3A),
++    op_vsellew_l    = FMA(0x1B, 0x3B)
++  };
++
++  enum ops_extra {
++    op_sys_call     = PCD(0x00),
++    op_memb         = MFC(0x06, 0x0000),
++    op_imemb        = MFC(0x06, 0x0001),  //SW8A
++    op_wmemb        = MFC(0x06, 0x0002),  //SW8A
++    op_rtc          = MFC(0x06, 0x0020),
++    op_rcid         = MFC(0x06, 0x0040),
++    op_halt         = MFC(0x06, 0x0080),
++    op_rd_f         = MFC(0x06, 0x1000),  //SW2F
++    op_wr_f         = MFC(0x06, 0x1020),  //SW2F
++    op_rtid         = MFC(0x06, 0x1040),
++    op_csrws        = CSR(0x06, 0xFC),    //SW8A
++    op_csrwc        = CSR(0x06, 0xFD),    //SW8A
++    op_csrr         = CSR(0x06, 0xFE),
++    op_csrw         = CSR(0x06, 0xFF),
++    op_pri_ret      = PRIRET(0x07, 0x0),
++    op_vlog         = LOGX(0x14, 0x00),
++    op_vbisw        = PSE_LOGX(0x14, 0x30),
++    op_vxorw        = PSE_LOGX(0x14, 0x3c),
++    op_vandw        = PSE_LOGX(0x14, 0xc0),
++    op_veqvw        = PSE_LOGX(0x14, 0xc3),
++    op_vornotw      = PSE_LOGX(0x14, 0xf3),
++    op_vbicw        = PSE_LOGX(0x14, 0xfc),
++    op_dpfhr        = ATMEM(0x1E, 0xE),    //SW6B
++    op_dpfhw        = ATMEM(0x1E, 0xF),    //SW6B
++  };
++
++  // compute inverse of simm
++  static int inv_simm(int x, int nbits) {
++    return (int)(x << (32 - nbits)) >> (32 - nbits);
++  }
++
++  static int inv_simm16( int x ) { return inv_simm(x, 16); }  //ZHJ20110307 modified
++  
++  // inverse of u_field
++  static int inv_u_field(int x, int hi_bit, int lo_bit) {
++    juint r = juint(x) >> lo_bit;
++    r &= fmask( hi_bit, lo_bit);
++    return int(r);
++  }
++
++  static int sw2_op(int inst)       {return (int)(inst & OP(-1)); }
++  static int sw2_arith_op(int inst) {return (int)(inst & OPR(-1, -1)); }
++  static int sw2_mfc_op(int inst)   {return (int)(inst & MFC(-1, -1)); }
++
++  static Register sw2_ra( int x )   { return as_Register(inv_u_field(x,  25,  21)); }
++  static Register sw2_rb( int x )   { return as_Register(inv_u_field(x,  20,  16)); }
++  static Register sw2_rc( int x )   { return as_Register(inv_u_field(x,  4,  0)); }
++  static int sw2_mdisp( int x )     { return inv_simm16(x); }
++  
++  static int fmask(uint32_t hi_bit, uint32_t lo_bit) {
++    assert( hi_bit >= lo_bit  &&  hi_bit < 32, "bad bits");
++    return (1 << ( hi_bit-lo_bit + 1 )) - 1;
++  }
++
++#ifdef ASSERT
++  static int u_field(int x, int hi_bit, int lo_bit) {
++    assert( ( x & ~fmask(hi_bit, lo_bit))  == 0,
++            "value out of range");
++    int r = x << lo_bit;
++    assert( inv_u_field(r, hi_bit, lo_bit) == x, "just checking");
++    return r;
++  }
++#else
++  // make sure this is inlined as it will reduce code size significantly
++  #define u_field(x, hi_bit, lo_bit)   ((x) << (lo_bit))
++#endif
++
++  static int opcode(int insn) { return (insn>>26)&0x3f; }
++  static int rs(int insn) { return (insn>>21)&0x1f; }
++  static int rt(int insn) { return (insn>>16)&0x1f; }
++  static int imm_off(int insn) { return (short)bitfield(insn, 0, 16); }
++
++  // the plain int register fields.
++  static int is_ra (Register ra)  { return u_field ( ra->encoding(), 25, 21 ); };
++  static int is_rb (Register rb)  { return u_field ( rb->encoding(), 20, 16 ); };
++  static int is_rc (Register rc)  { return u_field ( rc->encoding(),  4,  0 ); };
++  /* for the third operand of ternary operands integer insn. */
++  static int is_r3        (Register r3)  {  return u_field ( r3->encoding(),  9,  5 ); };
++  /* th th fields for dpfhr and dpfhw instructions */
++  static int is_th        (int th)       {  return u_field ( th, 25, 21 ); };
++
++  //the plain fp register fields.
++  static int is_fa (FloatRegister fa)  { return u_field ( fa->encoding(), 25, 21 ); };
++  static int is_fb (FloatRegister fb)  { return u_field ( fb->encoding(), 20, 16 ); };
++  static int is_fc (FloatRegister fc)  { return u_field ( fc->encoding(),  4,  0 ); };
++  /* the plain fp register fields */
++  static int is_f3         (FloatRegister f3)  {  return u_field ( f3->encoding(),  9,  5 ); };
++
++  static void assert_signed_range(intptr_t x, int nbits) {
++    assert(nbits == 32 || (-(1 << nbits-1) <= x  &&  x < ( 1 << nbits-1)),
++           "value out of range");
++  }
++
++  // signed immediate, in low bits, nbits long
++  static int simm(int x, int nbits) {
++    assert_signed_range(x, nbits);
++    return x  &  (( 1 << nbits ) - 1);
++  }
++  static int simm2(int64_t val, int msb, int lsb) {
++    int nbits = msb - lsb + 1;
++    int64_t chk = val >> (nbits - 1);
++    guarantee (chk == -1 || chk == 0, "Field too big for insn");
++    unsigned uval = val;
++    unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
++    uval &= mask;
++    uval <<= lsb;
++    return uval;
++  }
++  inline void check_delay() {
++# ifdef CHECK_DELAY
++    guarantee(delay_state != at_delay_slot, "must say delayed() when filling delay slot");
++    delay_state = no_delay;
++# endif
++  }
++  
++  void emit_sw2_long(int);  // shadows AbstractAssembler::emit_long
++
++  void nop(int i = 1)               { assert(i > 0, "count > 0"); for (; i > 0 ; i--) emit_sw2_long( op_ldi  | is_ra(R0) ); }
++  
++  /* the unsigned 8-bit literal of operate format insns.  */
++  static int is_lit       (int lit)    {  return u_field ( lit ,20, 13 ); };
++  
++  /* the signed 13-bit literal of operate format insns.  */
++  static int is_apint     (int apint)  {  return simm2 ( apint, 25, 13 ); };
++
++  /* the signed 16-bit displacement of memory format insns.  from here
++     we can't tell what relocation should be used, so don't use a default.  */
++  static int is_mdisp     (int mdisp)  {  return simm ( mdisp ,16 ); };
++
++  /* the signed "23-bit" aligned displacement of branch format insns.  */
++  static int is_bdisp     (int bdisp)  {  return simm ( bdisp ,21 ); };
++
++  /* the 26-bit palcode function */
++  static int is_palfn     (int palfn)    {  return simm ( palfn, 26 ); };
++  /* the optional signed "16-bit" aligned displacement of the jmp/jsr hint */
++  static int is_jmphint   (int jmphint)  {  return simm ( jmphint, 16); };
++
++  /* the optional hint to ret/jsr_coroutine */
++  static int is_rethint   (int rethint)     {  return simm ( rethint, 16); };
++  /* the 12-bit displacement for the ev[46] hw_{  return u_field (ld,st} (pal1b/pal1f) insns.  */
++  static int is_ev6hwdisp  (int ev6hwdisp)  {  return simm ( ev6hwdisp, 12 ); };
++
++  /* sw2 simd settle instruction lit */
++  static int is_fmalit          (int fmalit)       {  return u_field ( fmalit ,9 ,5 ); };//v1.1
++
++  static int is_rpiindex        (int rpiindex)     {  return u_field (  rpiindex ,7, 0 ); };
++
++  static int is_atmdisp         ( int atmdisp )    {  return u_field ( atmdisp, 10, 0 ); };
++
++  static int is_vlog_h          ( int vlog )       {  return u_field ( (vlog & 0xff) >>6 , 27, 26 ); };
++  static int is_vlog_l          ( int vlog )       {  return u_field ( vlog & 0x3f  , 15, 10 ); };
++
++  void flush() {
++#ifdef CHECK_DELAY
++    guarantee( delay_state == no_delay, "ending code with a delay slot");
++#endif
++    AbstractAssembler::flush();
++  }
++  
++  void assert_not_delayed() {
++#ifdef CHECK_DELAY
++    assert_not_delayed("next instruction should not be a delay slot");
++#endif
++  }
++
++  void assert_not_delayed(const char* msg) {
++#ifdef CHECK_DELAY
++    if(delay_state != no_delay){
++      tty->print_cr("%s:%d, pc: %lx", __func__, __LINE__, pc());
++    }
++    assert(delay_state == no_delay, msg);
++#endif
++  }
++  
++protected:
++#ifdef ASSERT
++  void check_relocation(RelocationHolder const& rspec, int format);
++#endif
++
++  // instruction only in sw2, including sw2f, sw4a, sw6a
++  static void sw2_only() { assert( VM_Version::sw2only(), "This instruction only works on sw2f, sw4a or sw6a"); }
++  // instruction only in sw3, including sw6b
++  static void sw3_only() { assert( VM_Version::sw3only(), "This instruction only works on sw6b"); }
++  static void sw4_only() { assert( VM_Version::sw4only(), "This instruction only works on sw6b"); }
++
++public:
++  // SW64 common helper functions
++  static bool operand_valid_for_simple_type_instruction_immediate(int imm) { return is_lit(imm); }
++  static bool operand_valid_for_storage_type_instruction_immediate(int imm) { return is_simm16(imm); }
++  
++  // SW64 Generic instructions
++  void sys_call_b( int palfn );
++  void sys_call  ( int palfn );
++  void call      ( Register ra, Register rb, int jmphint );
++  void ret       ( Register ra, Register rb, int rethint );
++  void jmp       ( Register ra, Register rb, int jmphint );
++  void br        ( Register ra, int bdisp );
++  void bsr       ( Register ra, int bdisp );
++  void memb      ( void );
++  void imemb     ( void );
++  void wmemb     ( void );
++  void rtc       ( Register ra, Register rb );
++  void rcid      ( Register ra);
++  void halt      ( void);
++  void rd_f      ( Register ra );  //SW2F
++  void wr_f      ( Register ra );  //SW2F
++  void rtid      ( Register ra);
++  void csrws     ( Register ra, int rpiindex );
++  void csrwc     ( Register ra, int rpiindex );
++  void csrr      ( Register ra, int rpiindex );
++  void csrw      ( Register ra, int rpiindex );
++  void pri_ret   ( Register ra );
++  void lldw      ( Register ra, int atmdisp, Register rb );
++  void lldl      ( Register ra, int atmdisp, Register rb );
++  void ldw_inc   ( Register ra, int atmdisp, Register rb );  //SW2F
++  void ldl_inc   ( Register ra, int atmdisp, Register rb );  //SW2F
++  void ldw_dec   ( Register ra, int atmdisp, Register rb );  //SW2F
++  void ldl_dec   ( Register ra, int atmdisp, Register rb );  //SW2F
++  void ldw_set   ( Register ra, int atmdisp, Register rb );  //SW2F
++  void ldl_set   ( Register ra, int atmdisp, Register rb );  //SW2F
++  void lstw      ( Register ra, int atmdisp, Register rb );
++  void lstl      ( Register ra, int atmdisp, Register rb );
++  void ldw_nc    ( Register ra, int atmdisp, Register rb );
++  void ldl_nc    ( Register ra, int atmdisp, Register rb );
++  void ldd_nc    ( Register ra, int atmdisp, Register rb );
++  void stw_nc    ( Register ra, int atmdisp, Register rb );
++  void stl_nc    ( Register ra, int atmdisp, Register rb );
++  void std_nc    ( Register ra, int atmdisp, Register rb );
++  void ldwe      ( FloatRegister fa, int mdisp, Register rb );
++  void ldse      ( FloatRegister fa, int mdisp, Register rb );
++  void ldde      ( FloatRegister fa, int mdisp, Register rb );
++  void vlds      ( FloatRegister fa, int mdisp, Register rb );
++  void vldd      ( FloatRegister fa, int mdisp, Register rb );
++  void vsts      ( FloatRegister fa, int mdisp, Register rb );
++  void vstd      ( FloatRegister fa, int mdisp, Register rb );
++
++  void addw   ( Register ra, Register rb, Register rc );
++  void addw   ( Register ra, int lit,     Register rc );
++  void subw   ( Register ra, Register rb, Register rc );
++  void subw   ( Register ra, int lit,     Register rc );
++  void s4addw    ( Register ra, Register rb, Register rc );
++  void s4addw    ( Register ra, int lit,     Register rc );
++  void s4subw    ( Register ra, Register rb, Register rc );
++  void s4subw    ( Register ra, int lit,     Register rc );
++  void s8addw    ( Register ra, Register rb, Register rc );
++  void s8addw    ( Register ra, int lit,     Register rc );
++  void s8subw    ( Register ra, Register rb, Register rc );
++  void s8subw    ( Register ra, int lit,     Register rc );
++  void addl      ( Register ra, Register rb, Register rc );
++  void addl      ( Register ra, int lit,     Register rc );
++  void subl      ( Register ra, Register rb, Register rc );
++  void subl      ( Register ra, int lit,     Register rc );
++  void s4addl    ( Register ra, Register rb, Register rc );
++  void s4addl    ( Register ra, int lit,     Register rc );
++  void s4subl    ( Register ra, Register rb, Register rc );
++  void s4subl    ( Register ra, int lit,     Register rc );
++  void s8addl    ( Register ra, Register rb, Register rc );
++  void s8addl    ( Register ra, int lit,     Register rc );
++  void s8subl    ( Register ra, Register rb, Register rc );
++  void s8subl    ( Register ra, int lit,     Register rc );
++  void mulw      ( Register ra, Register rb, Register rc );
++  void mulw      ( Register ra, int lit,     Register rc );
++  void divw      ( Register ra, Register rb, Register rc );  //SW6B
++  void udivw     ( Register ra, Register rb, Register rc );  //SW6B
++  void remw      ( Register ra, Register rb, Register rc );  //SW6B
++  void uremw     ( Register ra, Register rb, Register rc );  //SW6B
++  void mull      ( Register ra, Register rb, Register rc );
++  void mull      ( Register ra, int lit,     Register rc );
++  void umulh     ( Register ra, Register rb, Register rc );
++  void umulh     ( Register ra, int lit,     Register rc );
++  void divl      ( Register ra, Register rb, Register rc );  //SW6B
++  void udivl     ( Register ra, Register rb, Register rc );  //SW6B
++  void reml      ( Register ra, Register rb, Register rc );  //SW6B
++  void ureml     ( Register ra, Register rb, Register rc );  //SW6B
++  void addpi     (              int apint,   Register rc );  //SW6B
++  void addpis    (              int apint,   Register rc );  //SW6B
++
++  void cmpeq     ( Register ra, Register rb, Register rc );
++  void cmpeq     ( Register ra, int lit,     Register rc );
++  void cmplt     ( Register ra, Register rb, Register rc );
++  void cmplt     ( Register ra, int lit,     Register rc );
++  void cmple     ( Register ra, Register rb, Register rc );
++  void cmple     ( Register ra, int lit,     Register rc );
++  void cmpult    ( Register ra, Register rb, Register rc );
++  void cmpult    ( Register ra, int lit,     Register rc );
++  void cmpule    ( Register ra, Register rb, Register rc );
++  void cmpule    ( Register ra, int lit,     Register rc );
++  void sbt       ( Register ra, Register rb, Register rc );
++  void sbt       ( Register ra, int lit,     Register rc );
++  void cbt       ( Register ra, Register rb, Register rc );
++  void cbt       ( Register ra, int lit,     Register rc );
++  void and_ins   ( Register ra, Register rb, Register rc );
++  void and_ins   ( Register ra, int lit,     Register rc );
++  void bic       ( Register ra, Register rb, Register rc );
++  void bic       ( Register ra, int lit,     Register rc );
++  void bis       ( Register ra, Register rb, Register rc );
++  void bis       ( Register ra, int lit,     Register rc );
++  void ornot     ( Register ra, Register rb, Register rc );
++  void ornot     ( Register ra, int lit,     Register rc );
++  void xor_ins   ( Register ra, Register rb, Register rc );
++  void xor_ins   ( Register ra, int lit,     Register rc );
++  void eqv       ( Register ra, Register rb, Register rc );
++  void eqv       ( Register ra, int lit,     Register rc );
++  void inslb     ( Register ra, Register rb, Register rc );
++  void inslb     ( Register ra, int lit,     Register rc );
++  void inslh     ( Register ra, Register rb, Register rc );
++  void inslh     ( Register ra, int lit,     Register rc );
++  void inslw     ( Register ra, Register rb, Register rc );
++  void inslw     ( Register ra, int lit,     Register rc );
++  void insll     ( Register ra, Register rb, Register rc );
++  void insll     ( Register ra, int lit,     Register rc );
++  void inshb     ( Register ra, Register rb, Register rc );
++  void inshb     ( Register ra, int lit,     Register rc );
++  void inshh     ( Register ra, Register rb, Register rc );
++  void inshh     ( Register ra, int lit,     Register rc );
++  void inshw     ( Register ra, Register rb, Register rc );
++  void inshw     ( Register ra, int lit,     Register rc );
++  void inshl     ( Register ra, Register rb, Register rc );
++  void inshl     ( Register ra, int lit,     Register rc );
++  void slll      ( Register ra, Register rb, Register rc );
++  void slll      ( Register ra, int lit,     Register rc );
++  void srll      ( Register ra, Register rb, Register rc );
++  void srll      ( Register ra, int lit,     Register rc );
++  void sral      ( Register ra, Register rb, Register rc );
++  void sral      ( Register ra, int lit,     Register rc );
++  void roll      ( Register ra, Register rb, Register rc );
++  void roll      ( Register ra, int lit,     Register rc );
++  void sllw      ( Register ra, Register rb, Register rc );
++  void sllw      ( Register ra, int lit,     Register rc );
++  void srlw      ( Register ra, Register rb, Register rc );
++  void srlw      ( Register ra, int lit,     Register rc );
++  void sraw      ( Register ra, Register rb, Register rc );
++  void sraw      ( Register ra, int lit,     Register rc );
++  void rolw      ( Register ra, Register rb, Register rc );
++  void rolw      ( Register ra, int lit,     Register rc );
++  void extlb     ( Register ra, Register rb, Register rc );
++  void extlb     ( Register ra, int lit,     Register rc );
++  void extlh     ( Register ra, Register rb, Register rc );
++  void extlh     ( Register ra, int lit,     Register rc );
++  void extlw     ( Register ra, Register rb, Register rc );
++  void extlw     ( Register ra, int lit,     Register rc );
++  void extll     ( Register ra, Register rb, Register rc );
++  void extll     ( Register ra, int lit,     Register rc );
++  void exthb     ( Register ra, Register rb, Register rc );
++  void exthb     ( Register ra, int lit,     Register rc );
++  void exthh     ( Register ra, Register rb, Register rc );
++  void exthh     ( Register ra, int lit,     Register rc );
++  void exthw     ( Register ra, Register rb, Register rc );
++  void exthw     ( Register ra, int lit,     Register rc );
++  void exthl     ( Register ra, Register rb, Register rc );
++  void exthl     ( Register ra, int lit,     Register rc );
++  void ctpop     (              Register rb, Register rc );
++  void ctlz      (              Register rb, Register rc );
++  void cttz      (              Register rb, Register rc );
++  void revbh     (              Register rb, Register rc );
++  void revbw     (              Register rb, Register rc );
++  void revbl     (              Register rb, Register rc );
++  void casw      ( Register ra, Register rb, Register rc );
++  void casl      ( Register ra, Register rb, Register rc );
++  void masklb    ( Register ra, Register rb, Register rc );
++  void masklb    ( Register ra, int lit,     Register rc );
++  void masklh    ( Register ra, Register rb, Register rc );
++  void masklh    ( Register ra, int lit,     Register rc );
++  void masklw    ( Register ra, Register rb, Register rc );
++  void masklw    ( Register ra, int lit,     Register rc );
++  void maskll    ( Register ra, Register rb, Register rc );
++  void maskll    ( Register ra, int lit,     Register rc );
++  void maskhb    ( Register ra, Register rb, Register rc );
++  void maskhb    ( Register ra, int lit,     Register rc );
++  void maskhh    ( Register ra, Register rb, Register rc );
++  void maskhh    ( Register ra, int lit,     Register rc );
++  void maskhw    ( Register ra, Register rb, Register rc );
++  void maskhw    ( Register ra, int lit,     Register rc );
++  void maskhl    ( Register ra, Register rb, Register rc );
++  void maskhl    ( Register ra, int lit,     Register rc );
++  void zap       ( Register ra, Register rb, Register rc );
++  void zap       ( Register ra, int lit,     Register rc );
++  void zapnot    ( Register ra, Register rb, Register rc );
++  void zapnot    ( Register ra, int lit,     Register rc );
++  void sextb     (              Register rb, Register rc);
++  void sextb     (              int lit,     Register rc );
++  void sexth     (              Register rb, Register rc );
++  void sexth     (              int lit,     Register rc );
++  void cmpgeb    ( Register ra, Register rb, Register rc );
++  void cmpgeb    ( Register ra, int lit,     Register rc );
++  void fimovs    ( FloatRegister fa,         Register rc );  // For sw4a SQData
++  void fimovd    ( FloatRegister fa,         Register rc );  // For sw4a SQData
++  void seleq     ( Register ra, Register rb,Register r3, Register rc );
++  void seleq     ( Register ra, int lit,    Register r3,Register rc );
++  void selge     ( Register ra, Register rb,Register r3, Register rc );
++  void selge     ( Register ra, int lit,    Register r3,Register rc );
++  void selgt     ( Register ra, Register rb,Register r3, Register rc );
++  void selgt     ( Register ra, int lit,    Register r3,Register rc );
++  void selle     ( Register ra, Register rb,Register r3, Register rc );
++  void selle     ( Register ra, int lit,    Register r3,Register rc );
++  void sellt     ( Register ra, Register rb,Register r3, Register rc );
++  void sellt     ( Register ra, int lit,    Register r3,Register rc );
++  void selne     ( Register ra, Register rb,Register r3, Register rc );
++  void selne     ( Register ra, int lit,    Register r3,Register rc );
++  void sellbc    ( Register ra, Register rb,Register r3, Register rc );
++  void sellbc    ( Register ra, int lit,    Register r3,Register rc );
++  void sellbs    ( Register ra, Register rb,Register r3, Register rc );
++  void sellbs    ( Register ra, int lit,    Register r3,Register rc );
++
++  void vlog      ( int vlog, FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc );
++  void vbisw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vxorw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vandw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void veqvw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vornotw   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vbicw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++
++  void fadds     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void faddd     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fsubs     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fsubd     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fmuls     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fmuld     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fdivs     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fdivd     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fsqrts    (                   FloatRegister fb, FloatRegister fc );
++  void fsqrtd    (                   FloatRegister fb, FloatRegister fc );
++  void fcmpeq    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fcmple    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fcmplt    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fcmpun    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fcvtsd    (                   FloatRegister fb, FloatRegister fc );
++  void fcvtds    (                   FloatRegister fb, FloatRegister fc );
++  void fcvtdl_g  (                   FloatRegister fb, FloatRegister fc );  //lx_fcvtdl
++  void fcvtdl_p  (                   FloatRegister fb, FloatRegister fc );
++  void fcvtdl_z  (                   FloatRegister fb, FloatRegister fc );
++  void fcvtdl_n  (                   FloatRegister fb, FloatRegister fc );  //lx_fcvtdl
++  void fcvtdl    (                   FloatRegister fb, FloatRegister fc );
++  void fcvtwl    (                   FloatRegister fb, FloatRegister fc );
++  void fcvtlw    (                   FloatRegister fb, FloatRegister fc );
++  void fcvtls    (                   FloatRegister fb, FloatRegister fc );
++  void fcvtld    (                   FloatRegister fb, FloatRegister fc );
++  void fcpys     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fcpyse    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void fcpysn    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void ifmovs    ( Register ra,                        FloatRegister fc );  // For sw4a SQData
++  void ifmovd    ( Register ra,                        FloatRegister fc );  // For sw4a SQData
++  //cmov
++  void cmovdl    ( Register rc, FloatRegister fb                        );
++  void cmovdl_g  ( Register rc, FloatRegister fb                        );
++  void cmovdl_p  ( Register rc, FloatRegister fb                        );
++  void cmovdl_z  ( Register rc, FloatRegister fb                        );
++  void cmovdl_n  ( Register rc, FloatRegister fb                        );
++
++  void cmovdlu   ( Register rc, FloatRegister fb                        );
++  void cmovdlu_g ( Register rc, FloatRegister fb                        );
++  void cmovdlu_p ( Register rc, FloatRegister fb                        );
++  void cmovdlu_z ( Register rc, FloatRegister fb                        );
++  void cmovdlu_n ( Register rc, FloatRegister fb                        );
++
++  void cmovdw    ( Register rc, FloatRegister fb                        );
++  void cmovdw_g  ( Register rc, FloatRegister fb                        );
++  void cmovdw_p  ( Register rc, FloatRegister fb                        );
++  void cmovdw_z  ( Register rc, FloatRegister fb                        );
++  void cmovdw_n  ( Register rc, FloatRegister fb                        );
++
++  void cmovdwu   ( Register rc, FloatRegister fb                        );
++  void cmovdwu_g ( Register rc, FloatRegister fb                        );
++  void cmovdwu_p ( Register rc, FloatRegister fb                        );
++  void cmovdwu_z ( Register rc, FloatRegister fb                        );
++  void cmovdwu_n ( Register rc, FloatRegister fb                        );
++
++  void cmovls    ( FloatRegister fc, Register rb                        );
++  void cmovld    ( FloatRegister fc, Register rb                        );
++  void cmovuls   ( FloatRegister fc, Register rb                        );
++  void cmovuld   ( FloatRegister fc, Register rb                        );
++  void cmovws    ( FloatRegister fc, Register rb                        );
++  void cmovwd    ( FloatRegister fc, Register rb                        );
++  void cmovuws   ( FloatRegister fc, Register rb                        );
++  void cmovuwd   ( FloatRegister fc, Register rb                        );
++
++  void rfpcr     ( FloatRegister fa);
++  void wfpcr     ( FloatRegister fa);
++  void setfpec0  ();
++  void setfpec1  ();
++  void setfpec2  ();
++  void setfpec3  ();
++  void frecs     (                   FloatRegister fa, FloatRegister fc );
++  void frecd     (                   FloatRegister fa, FloatRegister fc );
++  void fris      (                   FloatRegister fb, FloatRegister fc );
++  void fris_g    (                   FloatRegister fb, FloatRegister fc );
++  void fris_p    (                   FloatRegister fb, FloatRegister fc );
++  void fris_z    (                   FloatRegister fb, FloatRegister fc );
++  void fris_n    (                   FloatRegister fb, FloatRegister fc );
++  void frid      (                   FloatRegister fb, FloatRegister fc );
++  void frid_g    (                   FloatRegister fb, FloatRegister fc );
++  void frid_p    (                   FloatRegister fb, FloatRegister fc );
++  void frid_z    (                   FloatRegister fb, FloatRegister fc );
++  void frid_n    (                   FloatRegister fb, FloatRegister fc );
++  void fmas      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fmad      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fmss      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fmsd      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fnmas     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fnmad     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fnmss     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fnmsd     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fseleq    ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fselne    ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fsellt    ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fselle    ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fselgt    ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void fselge    ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++
++  void vaddw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vaddw     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsubw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsubw     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vcmpgew   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmpgew   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vcmpeqw   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmpeqw   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vcmplew   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmplew   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vcmpltw   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmpltw   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vcmpulew  ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmpulew  ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vcmpultw  ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmpultw  ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsllw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsllw     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsrlw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsrlw     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsraw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsraw     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vrolw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vrolw     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void sllow     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void sllow     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void srlow     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void srlow     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vaddl     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vaddl     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsubl     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsubl     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsllb     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsllb     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsrlb     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsrlb     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsrab     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsrab     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vrolb     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vrolb     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsllh     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsllh     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsrlh     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsrlh     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsrah     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsrah     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vrolh     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vrolh     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void ctpopow   ( FloatRegister fa,                   FloatRegister fc );
++  void ctlzow    ( FloatRegister fa,                   FloatRegister fc );
++  void vslll     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vslll     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsrll     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsrll     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsral     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsral     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vroll     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vroll     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vmaxb     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vminb     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++
++  void vucaddw   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vucaddw   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vucsubw   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vucsubw   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vucaddh   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vucaddh   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vucsubh   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vucsubh   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vucaddb   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vucaddb   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vucsubb   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vucsubb   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void sraow     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void sraow     ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsumw     ( FloatRegister fa,                   FloatRegister fc );
++  void vsuml     ( FloatRegister fa,                   FloatRegister fc );
++  void vcmpueqb  ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmpueqb  ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vcmpugtb  ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcmpugtb  ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vmaxh     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vminh     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vmaxw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vminw     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vmaxl     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vminl     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vumaxb    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vuminb    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vumaxh    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vuminh    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vumaxw    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vuminw    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vumaxl    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vuminl    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++
++  void vsm3msw   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsm4key   ( FloatRegister fa, int lit,          FloatRegister fc );
++  void vsm4r     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vbinvw    (                   FloatRegister fb, FloatRegister fc );  
++
++  void vadds     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vaddd     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsubs     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsubd     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vmuls     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vmuld     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vdivs     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vdivd     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsqrts    (                   FloatRegister fb, FloatRegister fc );
++  void vsqrtd    (                   FloatRegister fb, FloatRegister fc );
++  void vfcmpeq   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vfcmple   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vfcmplt   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vfcmpun   ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcpys     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vfmov     ( FloatRegister fa,                   FloatRegister fc );
++  void vcpyse    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcpysn    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vsums     ( FloatRegister fa,                   FloatRegister fc );
++  void vsumd     ( FloatRegister fa,                   FloatRegister fc );
++  void vfrecs    ( FloatRegister fa,                   FloatRegister fc );
++  void vfrecd    ( FloatRegister fa,                   FloatRegister fc );
++  void vfcvtsd   (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtds   (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtls   (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtld   (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtdl   (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtdl_g (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtdl_p (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtdl_z (                   FloatRegister fb, FloatRegister fc );
++  void vfcvtdl_n (                   FloatRegister fb, FloatRegister fc );
++  void vfris     (                   FloatRegister fb, FloatRegister fc );
++  void vfris_g   (                   FloatRegister fb, FloatRegister fc );
++  void vfris_p   (                   FloatRegister fb, FloatRegister fc );
++  void vfris_z   (                   FloatRegister fb, FloatRegister fc );
++  void vfris_n   (                   FloatRegister fb, FloatRegister fc );
++  void vfrid     (                   FloatRegister fb, FloatRegister fc );
++  void vfrid_g   (                   FloatRegister fb, FloatRegister fc );
++  void vfrid_p   (                   FloatRegister fb, FloatRegister fc );
++  void vfrid_z   (                   FloatRegister fb, FloatRegister fc );
++  void vfrid_n   (                   FloatRegister fb, FloatRegister fc );
++  void vmaxs     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vmins     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vmaxd     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vmind     ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++
++  void vmas      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vmad      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vmss      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vmsd      ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vnmas     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vnmad     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vnmss     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vnmsd     ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vfseleq   ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vfsellt   ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vfselle   ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vseleqw   ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vseleqw   ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vsellbcw  ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vsellbcw  ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vselltw   ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vselltw   ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vsellew   ( FloatRegister fa, FloatRegister fb, FloatRegister f3, FloatRegister fc );
++  void vsellew   ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vinsw     ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vinsf     ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vextw     ( FloatRegister fa, int fmalit, FloatRegister fc);
++  void vextf     ( FloatRegister fa, int fmalit, FloatRegister fc);
++  void vcpyw     ( FloatRegister fa,             FloatRegister fc);
++  void vcpyf     ( FloatRegister fa,             FloatRegister fc);
++  void vconw     ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd );
++  void vshfw     ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd );
++  void vcons     ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd );
++  void vcond     ( FloatRegister va, FloatRegister vb, FloatRegister fc, FloatRegister vd );
++  void vinsb     ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vinsh     ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vinsectlh ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vinsectlw ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vinsectll ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vinsectlb ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vshfq     ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vshfqb    ( FloatRegister fa, FloatRegister fb, FloatRegister fc );
++  void vcpyb     ( FloatRegister fa,                   FloatRegister fc );
++  void vcpyh     ( FloatRegister fa,                   FloatRegister fc );
++  void vsm3r     ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vfcvtsh   ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++  void vfcvths   ( FloatRegister fa, FloatRegister fb, int fmalit,       FloatRegister fc );
++
++  void vldw_u   ( FloatRegister fa, int atmdisp, Register rb );
++  void vstw_u   ( FloatRegister fa, int atmdisp, Register rb );
++  void vlds_u   ( FloatRegister fa, int atmdisp, Register rb );
++  void vsts_u   ( FloatRegister fa, int atmdisp, Register rb );
++  void vldd_u   ( FloatRegister fa, int atmdisp, Register rb );
++  void vstd_u   ( FloatRegister fa, int atmdisp, Register rb );
++  void vstw_ul   ( FloatRegister fa, int atmdisp, Register rb );
++  void vstw_uh   ( FloatRegister fa, int atmdisp, Register rb );
++  void vsts_ul   ( FloatRegister fa, int atmdisp, Register rb );
++  void vsts_uh   ( FloatRegister fa, int atmdisp, Register rb );
++  void vstd_ul   ( FloatRegister fa, int atmdisp, Register rb );
++  void vstd_uh   ( FloatRegister fa, int atmdisp, Register rb );
++  void lbr       ( int palfn );
++  void ldbu_a    ( Register ra, int atmdisp, Register rb );
++  void ldhu_a    ( Register ra, int atmdisp, Register rb );
++  void ldw_a     ( Register ra, int atmdisp, Register rb );
++  void ldl_a     ( Register ra, int atmdisp, Register rb );
++  void stb_a     ( Register ra, int atmdisp, Register rb );
++  void sth_a     ( Register ra, int atmdisp, Register rb );
++  void stw_a     ( Register ra, int atmdisp, Register rb );
++  void stl_a     ( Register ra, int atmdisp, Register rb );
++  void flds_a    ( FloatRegister fa, int atmdisp, Register rb );
++  void fldd_a    ( FloatRegister fa, int atmdisp, Register rb );
++  void fsts_a    ( FloatRegister fa, int atmdisp, Register rb );
++  void fstd_a    ( FloatRegister fa, int atmdisp, Register rb );
++  void dpfhr     ( int th, int atmdisp, Register rb );
++  void dpfhw     ( int th, int atmdisp, Register rb );
++  void ldbu      ( Register ra, int mdisp, Register rb );
++  void ldhu      ( Register ra, int mdisp, Register rb );
++  void ldw       ( Register ra, int mdisp, Register rb );
++  void ldl       ( Register ra, int mdisp, Register rb );
++  void ldl_u     ( Register ra, int mdisp, Register rb );
++  void pri_ld    ( Register ra, int ev6hwdisp, Register rb );
++  void flds      ( FloatRegister fa, int mdisp, Register rb );
++  void fldd      ( FloatRegister fa, int mdisp, Register rb );
++  void stb       ( Register ra, int mdisp, Register rb );
++  void sth       ( Register ra, int mdisp, Register rb );
++  void stw       ( Register ra, int mdisp, Register rb );
++  void stl       ( Register ra, int mdisp, Register rb );
++  void stl_u     ( Register ra, int mdisp, Register rb );
++  void pri_st    ( Register ra, int ev6hwdisp, Register rb );
++  void fsts      ( FloatRegister fa, int mdisp, Register rb );
++  void fstd      ( FloatRegister fa, int mdisp, Register rb );
++  void beq       ( Register ra, int bdisp );
++  void bne       ( Register ra, int bdisp );
++  void blt       ( Register ra, int bdisp );
++  void ble       ( Register ra, int bdisp );
++  void bgt       ( Register ra, int bdisp );
++  void bge       ( Register ra, int bdisp );
++  void blbc      ( Register ra, int bdisp );
++  void blbs      ( Register ra, int bdisp );
++  void fbeq      ( FloatRegister fa, int bdisp );
++  void fbne      ( FloatRegister fa, int bdisp );
++  void fblt      ( FloatRegister fa, int bdisp );
++  void fble      ( FloatRegister fa, int bdisp );
++  void fbgt      ( FloatRegister fa, int bdisp );
++  void fbge      ( FloatRegister fa, int bdisp );
++  void ldi       ( Register ra, int mdisp, Register rb );
++  void ldih      ( Register ra, int mdisp, Register rb );
++
++  // cache control instruction
++  void s_fillcs  (               int mdisp, Register rb );
++  void s_fillde  (               int mdisp, Register rb );
++  void fillde    (               int mdisp, Register rb );
++  void fillde_e  (               int mdisp, Register rb );
++  void fillcs    (               int mdisp, Register rb );
++  void fillcs_e  (               int mdisp, Register rb );
++  void e_fillcs  (               int mdisp, Register rb );
++  void e_fillde  (               int mdisp, Register rb );
++  void flushd    (               int mdisp, Register rb );
++  void evictdl   (               int mdisp, Register rb );
++  void evictdg   (               int mdisp, Register rb );
++  
++  //jzy just for compiling, maybe delete in future
++  static address locate_operand(address inst, WhichOperand which) { assert(false, "unimplement locate_operand:jzy"); return inst;}
++  static address locate_next_instruction(address inst) { assert(false, "unimplement locate_next_instruction:jzy");  return inst;} 
++  static bool    is_polling_page_far() {  assert(false, "unimplement is_polling_page_far:jzy");; return false; }
++  void clflush(Address addr) {  assert(false, "unimplement clflush:jzy");  }
++};
++  
++// Invert a condition
++inline const Assembler::Condition operator~(const Assembler::Condition cond) {
++  return Assembler::Condition(int(cond) ^ 1);
++}
++
++class BiasedLockingCounters;
++
++extern "C" void das(uint64_t start, int len);
++
++#endif // CPU_SW64_VM_ASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/assembler_sw64.inline.hpp b/src/hotspot/cpu/sw64/assembler_sw64.inline.hpp
+new file mode 100644
+index 0000000000..d5f5355f8c
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/assembler_sw64.inline.hpp
+@@ -0,0 +1,1227 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_ASSEMBLER_SW64_INLINE_HPP
++#define CPU_SW64_VM_ASSEMBLER_SW64_INLINE_HPP
++
++#include "asm/assembler.inline.hpp"
++#include "asm/codeBuffer.hpp"
++#include "code/codeCache.hpp"
++
++inline void Assembler::emit_sw2_long(int x) {
++  AbstractAssembler::emit_int32(x);
++}
++
++inline void Assembler::sys_call_b( int palfn )
++  { emit_sw2_long( op_sys_call  | is_palfn(palfn) ); }
++inline void Assembler::sys_call( int palfn )
++  { sys_call_b(palfn);  /* emit_sw2_long( op_sys_call  | ( 0x1 << 25 ) | is_palfn(palfn) );*/ }
++
++inline void Assembler::call( Register ra, Register rb, int jmphint )
++  { emit_sw2_long( op_call | is_ra(ra)  | is_rb(rb)  | is_jmphint(jmphint) ); }
++inline void Assembler::ret( Register ra, Register rb, int rethint )
++  { emit_sw2_long( op_ret  | is_ra(ra)  | is_rb(rb)  | is_rethint(rethint) ); }
++inline void Assembler::jmp( Register ra, Register rb, int jmphint )
++  { emit_sw2_long( op_jmp  | is_ra(ra)  | is_rb(rb)  | is_jmphint(jmphint) ); }
++inline void Assembler::br( Register ra, int bdisp )
++  { emit_sw2_long( op_br  | is_ra(ra) | is_bdisp(bdisp) ); }
++inline void Assembler::bsr( Register ra, int bdisp )
++  { emit_sw2_long( op_bsr | is_ra(ra) | is_bdisp(bdisp) ); }
++inline void Assembler::memb( void )
++  { emit_sw2_long( op_memb); }
++inline void Assembler::imemb( void )
++  { sw3_only();   emit_sw2_long( op_imemb); }
++inline void Assembler::wmemb( void )
++  { sw4_only();   emit_sw2_long( op_wmemb); }
++inline void Assembler::rtc( Register ra, Register rb )
++  { emit_sw2_long( op_rtc     | is_ra(ra) | is_rb(rb) ); }
++inline void Assembler::rcid( Register ra )
++  { emit_sw2_long( op_rcid    | is_ra(ra) ); }
++inline void Assembler::halt( void )
++  { emit_sw2_long( op_halt ); }
++inline void Assembler::rd_f( Register ra )
++  { sw2_only();   emit_sw2_long( op_rd_f  | is_ra(ra)   |  is_rb(R0) ); }
++inline void Assembler::wr_f( Register ra )
++  { sw2_only();   emit_sw2_long( op_wr_f  | is_ra(ra)   |  is_rb(R0) ); }
++inline void Assembler::rtid( Register ra )
++  { emit_sw2_long( op_rtid    | is_ra(ra) ); }
++inline void Assembler::csrws( Register ra, int rpiindex )
++{ sw4_only();   emit_sw2_long( op_csrws | is_ra(ra)    | is_rpiindex(rpiindex) ); }
++inline void Assembler::csrwc( Register ra, int rpiindex )
++{ sw4_only();   emit_sw2_long( op_csrwc | is_ra(ra)    | is_rpiindex(rpiindex) ); }
++inline void Assembler::csrr( Register ra, int rpiindex )
++  { emit_sw2_long( op_csrr    | is_ra(ra) | is_rpiindex(rpiindex) ); }
++inline void Assembler::csrw( Register ra, int rpiindex )
++  { emit_sw2_long( op_csrw    | is_ra(ra) | is_rpiindex(rpiindex) ); }
++inline void Assembler::pri_ret( Register ra )
++  { emit_sw2_long( op_pri_ret | is_ra(ra) ); }
++
++inline void Assembler::lldw( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_lldw    | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::lldl( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_lldl    | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++
++inline void Assembler::ldw_inc( Register ra, int atmdisp, Register rb )
++  { sw2_only();   emit_sw2_long( op_ldw_inc  | is_ra(ra)  | is_atmdisp(atmdisp)  | is_rb(rb) ); }
++inline void Assembler::ldl_inc( Register ra, int atmdisp, Register rb )
++  { sw2_only();   emit_sw2_long( op_ldl_inc  | is_ra(ra)  | is_atmdisp(atmdisp)  | is_rb(rb) ); }
++inline void Assembler::ldw_dec( Register ra, int atmdisp, Register rb )
++  { sw2_only();   emit_sw2_long( op_ldw_dec  | is_ra(ra)  | is_atmdisp(atmdisp)  | is_rb(rb) ); }
++inline void Assembler::ldl_dec( Register ra, int atmdisp, Register rb )
++  { sw2_only();   emit_sw2_long( op_ldl_dec  | is_ra(ra)  | is_atmdisp(atmdisp)  | is_rb(rb) ); }
++inline void Assembler::ldw_set( Register ra, int atmdisp, Register rb )
++  { sw2_only();   emit_sw2_long( op_ldw_set  | is_ra(ra)  | is_atmdisp(atmdisp)  | is_rb(rb) ); }
++inline void Assembler::ldl_set( Register ra, int atmdisp, Register rb )
++  { sw2_only();   emit_sw2_long( op_ldl_set  | is_ra(ra)  | is_atmdisp(atmdisp)  | is_rb(rb) ); }
++
++inline void Assembler::lstw( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_lstw    | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::lstl( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_lstl    | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::ldw_nc( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_ldw_nc  | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::ldl_nc( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_ldl_nc  | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::ldd_nc( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_ldd_nc  | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::stw_nc( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_stw_nc  | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::stl_nc( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_stl_nc  | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++inline void Assembler::std_nc( Register ra, int atmdisp, Register rb )
++  { emit_sw2_long( op_std_nc  | is_ra(ra)    | is_atmdisp(atmdisp)    | is_rb(rb) ); }
++
++inline void Assembler::ldwe( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_ldwe  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::ldse( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_ldse  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::ldde( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_ldde  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::vlds( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_vlds  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::vldd( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_vldd  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::vsts( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_vsts  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::vstd( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_vstd  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++
++inline void Assembler::addw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_addw    | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::addw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_addw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::subw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_subw    | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::subw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_subw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s4addw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s4addw  | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::s4addw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s4addw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s4subw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s4subw    | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::s4subw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s4subw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s8addw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s8addw    | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::s8addw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s8addw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s8subw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s8subw    | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::s8subw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s8subw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++
++inline void Assembler::addl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_addl    | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::addl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_addl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::subl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_subl    | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::subl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_subl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s4addl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s4addl  | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::s4addl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s4addl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s4subl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s4subl    | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::s4subl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s4subl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s8addl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s8addl    | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::s8addl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s8addl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::s8subl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_s8subl    | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::s8subl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_s8subl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++
++inline void Assembler::mulw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_mulw    | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::mulw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_mulw_l  | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++
++inline void Assembler::divw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_divw  | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::udivw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_udivw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::remw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_remw  | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::uremw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_uremw | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++
++inline void Assembler::mull( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_mull    | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::mull( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_mull_l  | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++inline void Assembler::umulh( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_umulh   | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::umulh( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_umulh_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++
++inline void Assembler::divl( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_divl    | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::udivl( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_udivl   | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::reml( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_reml    | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::ureml( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_ureml   | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++
++inline void Assembler::addpi( int apint, Register rc )
++  { sw4_only();   emit_sw2_long( op_addpi   | is_apint(apint)   | is_rc(rc) ); }
++inline void Assembler::addpis( int apint, Register rc )
++  { sw4_only();   emit_sw2_long( op_addpis  | is_apint(apint)   | is_rc(rc) ); }
++
++inline void Assembler::cmpeq( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_cmpeq   | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::cmpeq( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_cmpeq_l   | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++inline void Assembler::cmplt( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_cmplt   | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::cmplt( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_cmplt_l   | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++inline void Assembler::cmple( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_cmple   | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::cmple( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_cmple_l   | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++inline void Assembler::cmpult( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_cmpult  | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::cmpult( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_cmpult_l  | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++inline void Assembler::cmpule( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_cmpule  | is_ra(ra) | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::cmpule( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_cmpule_l  | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++
++inline void Assembler::sbt( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_sbt   | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::sbt( Register ra, int lit, Register rc )
++  { sw4_only();   emit_sw2_long( op_sbt_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++inline void Assembler::cbt( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_cbt   | is_ra(ra) | is_rb(rb)   | is_rc(rc) ); }
++inline void Assembler::cbt( Register ra, int lit, Register rc )
++  { sw4_only();   emit_sw2_long( op_cbt_l | is_ra(ra) | is_lit(lit) | is_rc(rc) ); }
++
++inline void Assembler::and_ins( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_and     | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::and_ins( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_and_l   | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::bic( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_bic     | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::bic( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_bic_l   | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::bis( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_bis     | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::bis( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_bis_l   | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::ornot( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_ornot   | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::ornot( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_ornot_l | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::xor_ins( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_xor     | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::xor_ins( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_xor_l   | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::eqv( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_eqv     | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::eqv( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_eqv_l   | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++
++inline void Assembler::inslb( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_inslb  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::inslb( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_inslb_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::inslh( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_inslh  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::inslh( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_inslh_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::inslw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_inslw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::inslw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_inslw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::insll( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_insll  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::insll( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_insll_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::inshb( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_inshb  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::inshb( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_inshb_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::inshh( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_inshh  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::inshh( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_inshh_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::inshw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_inshw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::inshw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_inshw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::inshl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_inshl  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::inshl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_inshl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++
++inline void Assembler::slll( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_slll  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::slll( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_slll_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::srll( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_srll  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::srll( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_srll_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::sral( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_sral  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::sral( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_sral_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::roll( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_roll    | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::roll( Register ra, int lit, Register rc )
++  { sw4_only();   emit_sw2_long( op_roll_l  | is_ra(ra) | is_lit(lit)   | is_rc(rc) ); }
++inline void Assembler::sllw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_sllw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::sllw( Register ra, int lit, Register rc )
++  { sw4_only();   emit_sw2_long( op_sllw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::srlw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_srlw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::srlw( Register ra, int lit, Register rc )
++  { sw4_only();   emit_sw2_long( op_srlw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::sraw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_sraw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::sraw( Register ra, int lit, Register rc )
++  { sw4_only();   emit_sw2_long( op_sraw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::rolw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_rolw    | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::rolw( Register ra, int lit, Register rc )
++  { sw4_only();   emit_sw2_long( op_rolw_l  | is_ra(ra) | is_lit(lit)   | is_rc(rc) ); }
++
++inline void Assembler::extlb( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_extlb  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::extlb( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_extlb_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::extlh( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_extlh  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::extlh( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_extlh_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::extlw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_extlw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::extlw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_extlw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::extll( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_extll  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::extll( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_extll_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::exthb( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_exthb  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::exthb( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_exthb_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::exthh( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_exthh  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::exthh( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_exthh_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::exthw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_exthw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::exthw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_exthw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::exthl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_exthl  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::exthl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_exthl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++
++inline void Assembler::ctpop(  Register rb, Register rc )
++  { emit_sw2_long( op_ctpop | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::ctlz(  Register rb, Register rc )
++  { emit_sw2_long( op_ctlz  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::cttz(  Register rb, Register rc )
++  { emit_sw2_long( op_cttz  | is_rb(rb)  | is_rc(rc) ); }
++
++inline void Assembler::revbh(  Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_revbh   | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::revbw(  Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_revbw   | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::revbl(  Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_revbl   | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::casw( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_casw    | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++inline void Assembler::casl( Register ra, Register rb, Register rc )
++  { sw4_only();   emit_sw2_long( op_casl    | is_ra(ra) | is_rb(rb) | is_rc(rc) ); }
++
++inline void Assembler::masklb( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_masklb  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::masklb( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_masklb_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::masklh( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_masklh  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::masklh( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_masklh_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::masklw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_masklw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::masklw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_masklw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::maskll( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_maskll  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::maskll( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_maskll_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::maskhb( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_maskhb  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::maskhb( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_maskhb_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::maskhh( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_maskhh  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::maskhh( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_maskhh_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::maskhw( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_maskhw  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::maskhw( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_maskhw_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::maskhl( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_maskhl  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::maskhl( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_maskhl_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++
++inline void Assembler::zap( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_zap     | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::zap( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_zap_l   | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::zapnot( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_zapnot  | is_ra(ra)  | is_rb(rb)    | is_rc(rc) ); }
++inline void Assembler::zapnot( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_zapnot_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::sextb(  Register rb, Register rc)
++  { emit_sw2_long( op_sextb   | is_ra(R0)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::sextb(  int lit, Register rc )
++  { emit_sw2_long( op_sextb_l | is_ra(R0)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::sexth(  Register rb, Register rc )
++  { emit_sw2_long( op_sexth   | is_ra(R0)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::sexth(  int lit, Register rc )
++  { emit_sw2_long( op_sexth_l | is_ra(R0)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::cmpgeb( Register ra, Register rb, Register rc )
++  { emit_sw2_long( op_cmpgeb  | is_ra(ra)  | is_rb(rb)  | is_rc(rc) ); }
++inline void Assembler::cmpgeb( Register ra, int lit, Register rc )
++  { emit_sw2_long( op_cmpgeb_l  | is_ra(ra)  | is_lit(lit)  | is_rc(rc) ); }
++inline void Assembler::fimovs( FloatRegister fa,  Register rc )  // For sw4a SQData
++  { emit_sw2_long( op_fimovs  | is_fa(fa)  | is_rc(rc) ); }
++inline void Assembler::fimovd( FloatRegister fa,  Register rc )  // For sw4a SQData
++  { emit_sw2_long( op_fimovd  | is_fa(fa)  | is_rc(rc) ); }
++
++inline void Assembler::seleq( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_seleq   | is_ra(ra)  | is_rb(rb)    | is_r3(r3)    | is_rc(rc) ); }
++inline void Assembler::seleq( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_seleq_l | is_ra(ra)  | is_lit(lit)  | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selge( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_selge   | is_ra(ra)  | is_rb(rb)    | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selge( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_selge_l | is_ra(ra)  | is_lit(lit)  | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selgt( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_selgt   | is_ra(ra)  | is_rb(rb)    | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selgt( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_selgt_l | is_ra(ra)  | is_lit(lit)  | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selle( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_selle   | is_ra(ra)  | is_rb(rb)    | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selle( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_selle_l | is_ra(ra)  | is_lit(lit)  | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::sellt( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_sellt   | is_ra(ra)  | is_rb(rb)    | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::sellt( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_sellt_l | is_ra(ra)  | is_lit(lit)  | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selne( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_selne   | is_ra(ra)  | is_rb(rb)    | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::selne( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_selne_l | is_ra(ra)  | is_lit(lit)  | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::sellbc( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_sellbc  | is_ra(ra)  | is_rb(rb)    | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::sellbc( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_sellbc_l  | is_ra(ra)  | is_lit(lit) | is_r3(r3) | is_rc(rc) ); }
++inline void Assembler::sellbs( Register ra, Register rb,Register r3, Register rc )
++  { emit_sw2_long( op_sellbs  | is_ra(ra)  | is_rb(rb)    | is_r3(r3)  | is_rc(rc) ); }
++inline void Assembler::sellbs( Register ra, int lit, Register r3,Register rc )
++  { emit_sw2_long( op_sellbs_l  | is_ra(ra)  | is_lit(lit) | is_r3(r3) | is_rc(rc) ); }
++
++inline void Assembler::vlog(  int vlog ,FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vlog    | is_vlog_h(vlog) | is_vlog_l(vlog) | is_fa(fa) | is_fb(fb) | is_f3(f3) | is_fc(fc) ); }
++inline void Assembler::vbisw( FloatRegister fa , FloatRegister fb , FloatRegister fc )
++  { emit_sw2_long( op_vbisw   | is_fa(fa) | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vxorw( FloatRegister fa , FloatRegister fb , FloatRegister fc )
++  { emit_sw2_long( op_vxorw   | is_fa(fa) | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vandw( FloatRegister fa , FloatRegister fb , FloatRegister fc )
++  { emit_sw2_long( op_vandw   | is_fa(fa) | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::veqvw( FloatRegister fa , FloatRegister fb , FloatRegister fc )
++  { emit_sw2_long( op_veqvw   | is_fa(fa) | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vornotw( FloatRegister fa , FloatRegister fb , FloatRegister fc )
++  { emit_sw2_long( op_vornotw | is_fa(fa) | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vbicw( FloatRegister fa , FloatRegister fb , FloatRegister fc )
++  { emit_sw2_long( op_vbicw   | is_fa(fa) | is_fb(fb)  | is_fc(fc) ); }
++
++inline void Assembler::fadds( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fadds | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::faddd( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_faddd | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fsubs( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fsubs | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fsubd( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fsubd | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fmuls( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fmuls | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fmuld( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fmuld | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fdivs( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fdivs | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fdivd( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fdivd | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fsqrts(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fsqrts  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fsqrtd(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fsqrtd  | is_fb(fb)  | is_fc(fc) ); }
++
++inline void Assembler::fcmpeq( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcmpeq  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcmple( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcmple  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcmplt( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcmplt  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcmpun( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcmpun  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++
++inline void Assembler::fcvtsd(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtsd    | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtds(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtds    | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtdl_g(  FloatRegister fb, FloatRegister fc )  //lx_fcvtdl
++  { emit_sw2_long( op_fcvtdl_g  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtdl_p(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtdl_p  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtdl_z(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtdl_z  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtdl_n(  FloatRegister fb, FloatRegister fc )  //lx_fcvtdl
++  { emit_sw2_long( op_fcvtdl_n  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtdl(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtdl    | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtwl(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtwl    | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtlw(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtlw    | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtls(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtls    | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcvtld(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcvtld    | is_fb(fb)  | is_fc(fc) ); }
++
++inline void Assembler::fcpys( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcpys   | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcpyse( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcpyse  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::fcpysn( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_fcpysn  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::ifmovs( Register ra,  FloatRegister fc )
++  { emit_sw2_long( op_ifmovs  | is_ra(ra)  | is_fc(fc) ); }
++inline void Assembler::ifmovd( Register ra,  FloatRegister fc )
++  { emit_sw2_long( op_ifmovd  | is_ra(ra)  | is_fc(fc) ); }
++  //cmov
++inline void Assembler::cmovdl(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdl    | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdl_g(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdl_g  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdl_p(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdl_p  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdl_z(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdl_z  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdl_n(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdl_n  | is_fb(fb)  | is_rc(rc) ); }
++
++inline void Assembler::cmovdlu(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdlu    | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdlu_g(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdlu_g  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdlu_p(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdlu_p  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdlu_z(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdlu_z  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdlu_n(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdlu_n  | is_fb(fb)  | is_rc(rc) ); }
++
++inline void Assembler::cmovdw(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdw    | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdw_g(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdw_g  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdw_p(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdw_p  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdw_z(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdw_z  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdw_n(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdw_n  | is_fb(fb)  | is_rc(rc) ); }
++
++inline void Assembler::cmovdwu(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdwu    | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdwu_g(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdwu_g  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdwu_p(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdwu_p  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdwu_z(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdwu_z  | is_fb(fb)  | is_rc(rc) ); }
++inline void Assembler::cmovdwu_n(  Register rc, FloatRegister fb )
++{ sw4_only(); emit_sw2_long( op_cmovdwu_n  | is_fb(fb)  | is_rc(rc) ); }
++
++inline void Assembler::cmovls(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovls  | is_rb(rb)  | is_fc(fc) ); }
++inline void Assembler::cmovld(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovld  | is_rb(rb)  | is_fc(fc) ); }
++inline void Assembler::cmovuls(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovuls  | is_rb(rb)  | is_fc(fc) ); }
++inline void Assembler::cmovuld(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovuld  | is_rb(rb)  | is_fc(fc) ); }
++inline void Assembler::cmovws(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovws  | is_rb(rb)  | is_fc(fc) ); }
++inline void Assembler::cmovwd(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovwd  | is_rb(rb)  | is_fc(fc) ); }
++inline void Assembler::cmovuws(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovuws  | is_rb(rb)  | is_fc(fc) ); }
++inline void Assembler::cmovuwd(  FloatRegister fc, Register rb )
++{ sw4_only(); emit_sw2_long( op_cmovuwd  | is_rb(rb)  | is_fc(fc) ); }
++
++inline void Assembler::rfpcr( FloatRegister fa)
++  { emit_sw2_long( op_rfpcr   | is_fa(fa) ); }
++inline void Assembler::wfpcr( FloatRegister fa)
++  { emit_sw2_long( op_wfpcr   | is_fa(fa) ); }
++
++inline void Assembler::setfpec0() { emit_sw2_long( op_setfpec0 ); }
++inline void Assembler::setfpec1() { emit_sw2_long( op_setfpec1 ); }
++inline void Assembler::setfpec2() { emit_sw2_long( op_setfpec2 ); }
++inline void Assembler::setfpec3() { emit_sw2_long( op_setfpec3 ); }
++
++inline void Assembler::frecs(  FloatRegister fa, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_frecs   | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::frecd(  FloatRegister fa, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_frecd   | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::fris(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_fris    | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::fris_g(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_fris_g  | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::fris_p(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_fris_p  | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::fris_z(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_fris_z  | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::fris_n(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_fris_n  | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::frid(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_frid    | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::frid_g(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_frid_g  | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::frid_p(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_frid_p  | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::frid_z(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_frid_z  | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::frid_n(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_frid_n  | is_fb(fb) | is_fc(fc) ); }
++
++inline void Assembler::fmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fmas  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fmad  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fmss  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fmsd  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fnmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fnmas | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fnmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fnmad | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fnmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fnmss | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fnmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fnmsd | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++
++inline void Assembler::fseleq( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fseleq  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fselne( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fselne  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fsellt( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fsellt  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fselle( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fselle  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fselgt( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fselgt  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::fselge( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_fselge  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++
++inline void Assembler::vaddw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vaddw   | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vaddw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vaddw_l | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vsubw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsubw   | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vsubw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vsubw_l | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++
++inline void Assembler::vcmpgew( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcmpgew    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vcmpgew( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vcmpgew_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vcmpeqw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcmpeqw    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vcmpeqw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vcmpeqw_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vcmplew( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcmplew    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vcmplew( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vcmplew_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vcmpltw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcmpltw    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vcmpltw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vcmpltw_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vcmpulew( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcmpulew   | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vcmpulew( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vcmpulew_l | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vcmpultw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcmpultw   | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vcmpultw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vcmpultw_l | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++
++inline void Assembler::vsllw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsllw    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vsllw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vsllw_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vsrlw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsrlw    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vsrlw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vsrlw_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vsraw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsraw    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vsraw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vsraw_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vrolw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vrolw    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vrolw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vrolw_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::sllow( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_sllow    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::sllow( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_sllow_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::srlow( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_srlow    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::srlow( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_srlow_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++
++inline void Assembler::vaddl( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vaddl    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vaddl( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vaddl_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++inline void Assembler::vsubl( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsubl    | is_fa(fa)  | is_fb(fb)    | is_fc(fc) ); }
++inline void Assembler::vsubl( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vsubl_l  | is_fa(fa)  | is_lit(lit)  | is_fc(fc) ); }
++
++inline void Assembler::vsllb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsllb   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsllb( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsllb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsrlb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrlb   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsrlb( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrlb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsrab( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrab   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsrab( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrab_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vrolb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vrolb   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vrolb( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vrolb_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsllh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsllh   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsllh( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsllh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsrlh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrlh   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsrlh( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrlh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsrah( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrah   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsrah( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrah_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vrolh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vrolh   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vrolh( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vrolh_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++
++inline void Assembler::ctpopow( FloatRegister fa,  FloatRegister fc )
++  { emit_sw2_long( op_ctpopow | is_fa(fa)  | is_fc(fc) ); }
++inline void Assembler::ctlzow( FloatRegister fa,  FloatRegister fc )
++  { emit_sw2_long( op_ctlzow  | is_fa(fa)  | is_fc(fc) ); }
++
++inline void Assembler::vslll( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vslll   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vslll( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vslll_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsrll( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrll   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsrll( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsrll_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsral( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsral   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vsral( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsral_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vroll( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vroll   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vroll( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vroll_l | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vmaxb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmaxb   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vminb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vminb   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++
++inline void Assembler::vucaddw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vucaddw    | is_fa(fa)  | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vucaddw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vucaddw_l  | is_fa(fa)  | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vucsubw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vucsubw    | is_fa(fa)  | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vucsubw( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vucsubw_l  | is_fa(fa)  | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vucaddh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vucaddh    | is_fa(fa)  | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vucaddh( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vucaddh_l  | is_fa(fa)  | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vucsubh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vucsubh    | is_fa(fa)  | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vucsubh( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vucsubh_l  | is_fa(fa)  | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vucaddb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vucaddb    | is_fa(fa)  | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vucaddb( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vucaddb_l  | is_fa(fa)  | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vucsubb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vucsubb    | is_fa(fa)  | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vucsubb( FloatRegister fa, int lit, FloatRegister fc )
++  { emit_sw2_long( op_vucsubb_l  | is_fa(fa)  | is_lit(lit) | is_fc(fc) ); }
++
++inline void Assembler::sraow( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_sraow   | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::sraow( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long(op_sraow_l  | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vsumw( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsumw   | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vsuml( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsuml   | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vcmpueqb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vcmpueqb    | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vcmpueqb( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vcmpueqb_l  | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vcmpugtb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vcmpugtb    | is_fa(fa) | is_fb(fb)   | is_fc(fc) ); }
++inline void Assembler::vcmpugtb( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vcmpugtb_l  | is_fa(fa) | is_lit(lit) | is_fc(fc) ); }
++inline void Assembler::vmaxh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmaxh   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vminh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vminh   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vmaxw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmaxw   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vminw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vminw   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vmaxl( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmaxl   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vminl( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vminl   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vumaxb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vumaxb  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vuminb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vuminb  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vumaxh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vumaxh  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vuminh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vuminh  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vumaxw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vumaxw  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vuminw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vuminw  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vumaxl( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vumaxl  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vuminl( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vuminl  | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++
++inline void Assembler::vsm3msw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsm3msw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vsm4key( FloatRegister fa, int lit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsm4key_l   | is_fa(fa) | is_lit(lit)   | is_fc(fc) ); }
++inline void Assembler::vsm4r( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsm4r   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vbinvw(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vbinvw  | is_fb(fb) | is_fc(fc) ); }
++
++inline void Assembler::vadds( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vadds  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vaddd( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vaddd  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vsubs( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsubs  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vsubd( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsubd  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vmuls( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vmuls  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vmuld( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vmuld  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vdivs( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vdivs  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vdivd( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vdivd  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vsqrts(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsqrts | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vsqrtd(  FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vsqrtd | is_fb(fb)  | is_fc(fc) ); }
++
++inline void Assembler::vfcmpeq( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vfcmpeq  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vfcmple( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vfcmple  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vfcmplt( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vfcmplt  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vfcmpun( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vfcmpun  | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vcpys( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcpys    | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vfmov( FloatRegister fa, FloatRegister fc )
++  { emit_sw2_long( op_vcpys    | is_fa(fa)  | is_fb(fa)  | is_fc(fc) ); }
++inline void Assembler::vcpyse( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcpyse   | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++inline void Assembler::vcpysn( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { emit_sw2_long( op_vcpysn   | is_fa(fa)  | is_fb(fb)  | is_fc(fc) ); }
++
++inline void Assembler::vsums( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsums   | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vsumd( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsumd   | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vfcvtsd(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtsd | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtds(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtds | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtls(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtls | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtld(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtld | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtdl(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtdl | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtdl_g(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtdl_g | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtdl_p(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtdl_p | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtdl_z(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtdl_z | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfcvtdl_n(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtdl_n | is_fb(fb) | is_fc(fc) ); }
++
++inline void Assembler::vfris(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfris   | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfris_g(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfris_g | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfris_p(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfris_p | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfris_z(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfris_z | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfris_n(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfris_n | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfrid(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfrid   | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfrid_g(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfrid_g | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfrid_p(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfrid_p | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfrid_z(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfrid_z | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfrid_n(  FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfrid_n | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vfrecs( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfrecs  | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vfrecd( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfrecd  | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vmaxs( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmaxs   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vmins( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmins   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vmaxd( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmaxd   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vmind( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vmind   | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++
++inline void Assembler::vmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vmas  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vmad  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vmss  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vmsd  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vnmas( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vnmas  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vnmad( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vnmad  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vnmss( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vnmss  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vnmsd( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vnmsd  | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++
++inline void Assembler::vfseleq( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vfseleq     | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vfsellt( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vfsellt     | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vfselle( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vfselle     | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vseleqw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vseleqw     | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vseleqw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { emit_sw2_long( op_vseleqw_l   | is_fa(fa)  | is_fb(fb)  | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vsellbcw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vsellbcw    | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vsellbcw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { emit_sw2_long( op_vsellbcw_l  | is_fa(fa)  | is_fb(fb)  | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vselltw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vselltw     | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vselltw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { emit_sw2_long( op_vselltw_l   | is_fa(fa)  | is_fb(fb)  | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vsellew( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vsellew     | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vsellew( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { emit_sw2_long( op_vsellew_l   | is_fa(fa)  | is_fb(fb)  | is_fmalit(fmalit) | is_fc(fc) ); }
++
++inline void Assembler::vinsw( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { emit_sw2_long( op_vinsw_l | is_fa(fa)  | is_fb(fb)  | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vinsf( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { emit_sw2_long( op_vinsf_l | is_fa(fa)  | is_fb(fb)  | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vextw( FloatRegister fa, int fmalit, FloatRegister fc)
++  { emit_sw2_long( op_vextw_l | is_fa(fa)  | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vextf( FloatRegister fa, int fmalit, FloatRegister fc)
++  { emit_sw2_long( op_vextf_l | is_fa(fa)  | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vcpyw( FloatRegister fa, FloatRegister fc)
++  { emit_sw2_long( op_vcpyw | is_fa(fa)  | is_fc(fc) ); }
++inline void Assembler::vcpyf( FloatRegister fa, FloatRegister fc)
++  { emit_sw2_long( op_vcpyf | is_fa(fa)  | is_fc(fc) ); }
++inline void Assembler::vconw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vconw | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vshfw( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vshfw | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vcons( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vcons | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++inline void Assembler::vcond( FloatRegister fa,FloatRegister fb,FloatRegister f3, FloatRegister fc )
++  { emit_sw2_long( op_vcond | is_fa(fa)  | is_fb(fb)  | is_f3(f3)  | is_fc(fc) ); }
++
++inline void Assembler::vinsb( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vinsb_l   | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vinsh( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vinsh_l   | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vinsectlh( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vinsectlh | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vinsectlw( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vinsectlw | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vinsectll( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vinsectll | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vinsectlb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vinsectlb | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vshfq( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vshfq_l   | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vshfqb( FloatRegister fa, FloatRegister fb, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vshfqb    | is_fa(fa) | is_fb(fb) | is_fc(fc) ); }
++inline void Assembler::vcpyb( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vcpyb     | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vcpyh( FloatRegister fa,  FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vcpyh     | is_fa(fa) | is_fc(fc) ); }
++inline void Assembler::vsm3r( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vsm3r_l   | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vfcvtsh( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvtsh_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); }
++inline void Assembler::vfcvths( FloatRegister fa,FloatRegister fb, int fmalit, FloatRegister fc )
++  { sw4_only();   emit_sw2_long( op_vfcvths_l | is_fa(fa) | is_fb(fb) | is_fmalit(fmalit) | is_fc(fc) ); }
++
++inline void Assembler::vldw_u( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vldw_u  | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vstw_u( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vstw_u  | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vlds_u( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vsts_u  | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vsts_u( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vsts_u  | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vldd_u( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vldd_u  | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vstd_u( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vstd_u  | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vstw_ul( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vstw_ul | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vstw_uh( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vstw_uh | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vsts_ul( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vsts_ul | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vsts_uh( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vsts_uh | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vstd_ul( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vstd_ul | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++inline void Assembler::vstd_uh( FloatRegister fa, int atmdisp, Register rb )
++  { emit_sw2_long( op_vstd_uh | is_fa(fa) | is_atmdisp(atmdisp)   | is_rb(rb) ); }
++
++inline void Assembler::lbr( int palfn )
++  { sw4_only();   emit_sw2_long( op_lbr     | is_palfn(palfn) ); }
++
++inline void Assembler::ldbu_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_ldbu_a  | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::ldhu_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_ldhu_a  | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::ldw_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_ldw_a   | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::ldl_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_ldl_a   | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::stb_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_stb_a   | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::sth_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_sth_a   | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::stw_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_stw_a   | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::stl_a( Register ra, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_stl_a   | is_ra(ra) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::flds_a( FloatRegister fa, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_flds_a  | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::fldd_a( FloatRegister fa, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_fldd_a  | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::fsts_a( FloatRegister fa, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_fsts_a  | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::fstd_a( FloatRegister fa, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_fstd_a  | is_fa(fa) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++
++inline void Assembler::dpfhr( int th, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_dpfhr   | is_th(th) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++inline void Assembler::dpfhw( int th, int atmdisp, Register rb )
++  { sw4_only();   emit_sw2_long( op_dpfhw   | is_th(th) | is_atmdisp(atmdisp) | is_rb(rb) ); }
++
++inline void Assembler::ldbu( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_ldbu    | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::ldhu( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_ldhu    | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::ldw( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_ldw     | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::ldl( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_ldl     | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::ldl_u( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_ldl_u   | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++
++inline void Assembler::pri_ld( Register ra, int ev6hwdisp, Register rb )
++  { emit_sw2_long( op_pri_ld  | is_ra(ra)  | is_ev6hwdisp(ev6hwdisp)  | is_rb(rb) ); }
++
++inline void Assembler::flds( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_flds  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::fldd( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_fldd  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::stb( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_stb   | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::sth( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_sth   | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::stw( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_stw   | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::stl( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_stl   | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::stl_u( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_stl_u | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++
++inline void Assembler::pri_st( Register ra, int ev6hwdisp, Register rb )
++  { emit_sw2_long( op_pri_st  | is_ra(ra)  | is_ev6hwdisp(ev6hwdisp)  | is_rb(rb) ); }
++
++inline void Assembler::fsts( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_fsts  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::fstd( FloatRegister fa, int mdisp, Register rb )
++  { emit_sw2_long( op_fstd  | is_fa(fa)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++
++inline void Assembler::beq( Register ra, int bdisp )
++  { emit_sw2_long( op_beq  | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::bne( Register ra, int bdisp )
++  { emit_sw2_long( op_bne  | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::blt( Register ra, int bdisp )
++  { emit_sw2_long( op_blt  | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::ble( Register ra, int bdisp )
++  { emit_sw2_long( op_ble  | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::bgt( Register ra, int bdisp )
++  { emit_sw2_long( op_bgt  | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::bge( Register ra, int bdisp )
++  { emit_sw2_long( op_bge  | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::blbc( Register ra, int bdisp )
++  { emit_sw2_long( op_blbc | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::blbs( Register ra, int bdisp )
++  { emit_sw2_long( op_blbs | is_ra(ra)  | is_bdisp(bdisp) ); }
++inline void Assembler::fbeq( FloatRegister fa, int bdisp )
++  { emit_sw2_long( op_fbeq | is_fa(fa)  | is_bdisp(bdisp) ); }
++inline void Assembler::fbne( FloatRegister fa, int bdisp )
++  { emit_sw2_long( op_fbne | is_fa(fa)  | is_bdisp(bdisp) ); }
++inline void Assembler::fblt( FloatRegister fa, int bdisp )
++  { emit_sw2_long( op_fblt | is_fa(fa)  | is_bdisp(bdisp) ); }
++inline void Assembler::fble( FloatRegister fa, int bdisp )
++  { emit_sw2_long( op_fble | is_fa(fa)  | is_bdisp(bdisp) ); }
++inline void Assembler::fbgt( FloatRegister fa, int bdisp )
++  { emit_sw2_long( op_fbgt | is_fa(fa)  | is_bdisp(bdisp) ); }
++inline void Assembler::fbge( FloatRegister fa, int bdisp )
++  { emit_sw2_long( op_fbge | is_fa(fa)  | is_bdisp(bdisp) ); }
++
++inline void Assembler::ldi(  Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_ldi  | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++inline void Assembler::ldih( Register ra, int mdisp, Register rb )
++  { emit_sw2_long( op_ldih | is_ra(ra)  | is_mdisp(mdisp)  | is_rb(rb) ); }
++
++// cache control instruction
++inline void Assembler::s_fillcs( int mdisp, Register rb )
++  { ldw( R0, mdisp, rb); }
++inline void Assembler::s_fillde( int mdisp, Register rb )
++  { ldl( R0, mdisp, rb); }
++inline void Assembler::fillde( int mdisp, Register rb )
++  { flds( f31, mdisp, rb); }
++inline void Assembler::fillde_e( int mdisp, Register rb )
++  { fldd( f31, mdisp, rb); }
++inline void Assembler::fillcs( int mdisp, Register rb )
++  { ldwe( f31, mdisp, rb); }
++inline void Assembler::fillcs_e( int mdisp, Register rb )
++  { ldde( f31, mdisp, rb); }
++inline void Assembler::e_fillcs( int mdisp, Register rb )
++  { ldse( f31, mdisp, rb); }
++inline void Assembler::e_fillde( int mdisp, Register rb )
++  { vlds( f31/*V31*/, mdisp, rb); }
++inline void Assembler::flushd( int mdisp, Register rb )
++  { ldbu( R0, mdisp, rb); }
++inline void Assembler::evictdl( int mdisp, Register rb )
++  { ldl_u( R0, mdisp, rb); }
++inline void Assembler::evictdg( int mdisp, Register rb )
++  { ldhu( R0, mdisp, rb); }
++
++#endif // CPU_SW64_VM_ASSEMBLER_SW64_INLINE_HPP
+diff --git a/src/hotspot/cpu/sw64/bytes_sw64.hpp b/src/hotspot/cpu/sw64/bytes_sw64.hpp
+new file mode 100644
+index 0000000000..024cc4af17
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/bytes_sw64.hpp
+@@ -0,0 +1,131 @@
++/*
++ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_BYTES_SW64_HPP
++#define CPU_SW64_VM_BYTES_SW64_HPP
++
++#include "memory/allocation.hpp"
++
++class Bytes: AllStatic {
++public:
++  // Efficient reading and writing of unaligned unsigned data in platform-specific byte ordering
++  // (no special code is needed since x86 CPUs can access unaligned data)
++  static inline u2   get_native_u2(address p)         {
++    if ((intptr_t)p & 0x1) {
++      return ((u2)p[1] << 8) | (u2)p[0];
++    } else {
++      return *(u2*)p;
++    }
++  }
++
++  static inline u4   get_native_u4(address p)         {
++    return *(u4*)p;
++  }
++
++  static inline u8   get_native_u8(address p)         {
++    return *(u8*)p;
++  }
++
++  //use mips unaligned load instructions
++  static inline void put_native_u2(address p, u2 x)  {
++    if((intptr_t)p & 0x1) {
++      p[0] = (u_char)(x);
++      p[1] = (u_char)(x>>8);
++    } else {
++      *(u2*)p  = x;
++    }
++  }
++
++  static inline void put_native_u4(address p, u4 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while mips is little-endian
++        switch ( intptr_t(p) & 3 ) {
++        case 0:  *(u4*)p = x;
++            break;
++
++        case 2:  ((u2*)p)[1] = x >> 16;
++           ((u2*)p)[0] = x;
++           break;
++
++        default: ((u1*)p)[3] = x >> 24;
++           ((u1*)p)[2] = x >> 16;
++           ((u1*)p)[1] = x >>  8;
++           ((u1*)p)[0] = x;
++           break;
++        }
++   }
++
++  static inline void put_native_u8(address p, u8 x)   {
++    // refer to sparc implementation.
++    // Note that sparc is big-endian, while sw64 is little-endian
++    switch ( intptr_t(p) & 7 ) {
++    case 0:  *(u8*)p = x;
++      break;
++
++    case 4:  ((u4*)p)[1] = x >> 32;
++      ((u4*)p)[0] = x;
++      break;
++
++    case 2:  ((u2*)p)[3] = x >> 48;
++      ((u2*)p)[2] = x >> 32;
++      ((u2*)p)[1] = x >> 16;
++      ((u2*)p)[0] = x;
++      break;
++
++    default: ((u1*)p)[7] = x >> 56;
++      ((u1*)p)[6] = x >> 48;
++      ((u1*)p)[5] = x >> 40;
++      ((u1*)p)[4] = x >> 32;
++      ((u1*)p)[3] = x >> 24;
++      ((u1*)p)[2] = x >> 16;
++      ((u1*)p)[1] = x >>  8;
++      ((u1*)p)[0] = x;
++    }
++  }
++
++
++  // Efficient reading and writing of unaligned unsigned data in Java
++  // byte ordering (i.e. big-endian ordering). Byte-order reversal is
++  // needed since SW64 CPUs use little-endian format.
++  static inline u2   get_Java_u2(address p)           { return swap_u2(get_native_u2(p)); }
++  static inline u4   get_Java_u4(address p)           { return swap_u4(get_native_u4(p)); }
++  static inline u8   get_Java_u8(address p)           { return swap_u8(get_native_u8(p)); }
++
++  static inline void put_Java_u2(address p, u2 x)     { put_native_u2(p, swap_u2(x)); }
++  static inline void put_Java_u4(address p, u4 x)     { put_native_u4(p, swap_u4(x)); }
++  static inline void put_Java_u8(address p, u8 x)     { put_native_u8(p, swap_u8(x)); }
++
++
++  // Efficient swapping of byte ordering
++  static inline u2   swap_u2(u2 x);                   // compiler-dependent implementation
++  static inline u4   swap_u4(u4 x);                   // compiler-dependent implementation
++  static inline u8   swap_u8(u8 x);
++};
++
++
++// The following header contains the implementations of swap_u2, swap_u4, and swap_u8[_base]
++#include OS_CPU_HEADER_INLINE(bytes)
++
++#endif // CPU_SW64_VM_BYTES_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/c1_CodeStubs_sw64.cpp b/src/hotspot/cpu/sw64/c1_CodeStubs_sw64.cpp
+new file mode 100644
+index 0000000000..00d479d1c7
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_CodeStubs_sw64.cpp
+@@ -0,0 +1,365 @@
++/*
++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "nativeInst_sw64.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_sw64.inline.hpp"
++
++
++#define __ ce->masm()->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void CounterOverflowStub::emit_code(LIR_Assembler* ce) {
++  __ BIND(_entry);
++  Metadata *m = _method->as_constant_ptr()->as_metadata();
++  __ mov_metadata(rscratch1, m);
++  ce->store_parameter(rscratch1, 1);
++  ce->store_parameter(_bci, 0);
++  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::counter_overflow_id)));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index, LIR_Opr array)
++  : _throw_index_out_of_bounds_exception(false), _index(index), _array(array) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++RangeCheckStub::RangeCheckStub(CodeEmitInfo* info, LIR_Opr index)
++  : _throw_index_out_of_bounds_exception(true), _index(index), _array(NULL) {
++  assert(info != NULL, "must have info");
++  _info = new CodeEmitInfo(info);
++}
++
++void RangeCheckStub::emit_code(LIR_Assembler* ce) {
++  __ BIND(_entry);
++  if (_info->deoptimize_on_exception()) {
++    address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++    __ far_call(RuntimeAddress(a));
++    ce->add_call_info_here(_info);
++    ce->verify_oop_map(_info);
++    debug_only(__ should_not_reach_here());
++    return;
++  }
++
++  if (_index->is_cpu_register()) {
++    __ mov(rscratch1, _index->as_register());
++  } else {
++    __ mov(rscratch1, _index->as_jint());
++  }
++  Runtime1::StubID stub_id;
++  if (_throw_index_out_of_bounds_exception) {
++    stub_id = Runtime1::throw_index_exception_id;
++  } else {
++    assert(_array != NULL, "sanity");
++    __ mov(rscratch2, _array->as_pointer_register());
++    stub_id = Runtime1::throw_range_check_failed_id;
++  }
++  __ lea(lr, RuntimeAddress(Runtime1::entry_for(stub_id)));
++  __ blr(lr);
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++PredicateFailedStub::PredicateFailedStub(CodeEmitInfo* info) {
++  _info = new CodeEmitInfo(info);
++}
++
++void PredicateFailedStub::emit_code(LIR_Assembler* ce) {
++  __ BIND(_entry);
++  address a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  __ far_call(RuntimeAddress(a));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++void DivByZeroStub::emit_code(LIR_Assembler* ce) {
++  if (_offset != -1) {
++    ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  }
++  __ BIND(_entry);
++  __ far_call(Address(Runtime1::entry_for(Runtime1::throw_div0_exception_id), relocInfo::runtime_call_type));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++#ifdef ASSERT
++  __ should_not_reach_here();
++#endif
++}
++
++
++
++// Implementation of NewInstanceStub
++
++NewInstanceStub::NewInstanceStub(LIR_Opr klass_reg, LIR_Opr result, ciInstanceKlass* klass, CodeEmitInfo* info, Runtime1::StubID stub_id) {
++  _result = result;
++  _klass = klass;
++  _klass_reg = klass_reg;
++  _info = new CodeEmitInfo(info);
++  assert(stub_id == Runtime1::new_instance_id                 ||
++         stub_id == Runtime1::fast_new_instance_id            ||
++         stub_id == Runtime1::fast_new_instance_init_check_id,
++         "need new_instance id");
++  _stub_id   = stub_id;
++}
++
++
++
++void NewInstanceStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ BIND(_entry);
++  __ mov(i3, _klass_reg->as_register());
++  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub_id)));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == i0, "result must in i0,");
++  __ b(_continuation);
++}
++
++
++// Implementation of NewTypeArrayStub
++
++// Implementation of NewTypeArrayStub
++
++NewTypeArrayStub::NewTypeArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _length = length;
++  _result = result;
++  _info = new CodeEmitInfo(info);
++}
++
++
++void NewTypeArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ BIND(_entry);
++  assert(_length->as_register() == i19, "length must in i19,");
++  assert(_klass_reg->as_register() == i3, "klass_reg must in i3");
++  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_type_array_id)));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == i0, "result must in i0");
++  __ b(_continuation);
++}
++
++
++// Implementation of NewObjectArrayStub
++
++NewObjectArrayStub::NewObjectArrayStub(LIR_Opr klass_reg, LIR_Opr length, LIR_Opr result, CodeEmitInfo* info) {
++  _klass_reg = klass_reg;
++  _result = result;
++  _length = length;
++  _info = new CodeEmitInfo(info);
++}
++
++
++void NewObjectArrayStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ BIND(_entry);
++  assert(_length->as_register() == i19, "length must in i19,");
++  assert(_klass_reg->as_register() == i3, "klass_reg must in i3");
++  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::new_object_array_id)));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  assert(_result->as_register() == i0, "result must in i0");
++  __ b(_continuation);
++}
++// Implementation of MonitorAccessStubs
++
++MonitorEnterStub::MonitorEnterStub(LIR_Opr obj_reg, LIR_Opr lock_reg, CodeEmitInfo* info)
++: MonitorAccessStub(obj_reg, lock_reg)
++{
++  _info = new CodeEmitInfo(info);
++}
++
++
++void MonitorEnterStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++  __ BIND(_entry);
++  ce->store_parameter(_obj_reg->as_register(),  1);
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  Runtime1::StubID enter_id;
++  if (ce->compilation()->has_fpu_code()) {
++    enter_id = Runtime1::monitorenter_id;
++  } else {
++    enter_id = Runtime1::monitorenter_nofpu_id;
++  }
++  __ far_call(RuntimeAddress(Runtime1::entry_for(enter_id)));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  __ b(_continuation);
++}
++
++
++void MonitorExitStub::emit_code(LIR_Assembler* ce) {
++  __ BIND(_entry);
++  if (_compute_lock) {
++    // lock_reg was destroyed by fast unlocking attempt => recompute it
++    ce->monitor_address(_monitor_ix, _lock_reg);
++  }
++  ce->store_parameter(_lock_reg->as_register(), 0);
++  // note: non-blocking leaf routine => no call info needed
++  Runtime1::StubID exit_id;
++  if (ce->compilation()->has_fpu_code()) {
++    exit_id = Runtime1::monitorexit_id;
++  } else {
++    exit_id = Runtime1::monitorexit_nofpu_id;
++  }
++  __ adr(lr, _continuation);
++  __ far_jump(RuntimeAddress(Runtime1::entry_for(exit_id)));
++}
++
++
++// Implementation of patching:
++// - Copy the code at given offset to an inlined buffer (first the bytes, then the number of bytes)
++// - Replace original code with a call to the stub
++// At Runtime:
++// - call to stub, jump to runtime
++// - in runtime: preserve all registers (rspecially objects, i.e., source and destination object)
++// - in runtime: after initializing class, restore original code, reexecute instruction
++
++int PatchingStub::_patch_info_offset = -NativeGeneralJump::instruction_size;
++
++void PatchingStub::align_patch_site(MacroAssembler* masm) {
++}
++
++void PatchingStub::emit_code(LIR_Assembler* ce) {
++  assert(false, "Sw64 should not use C1 runtime patching");
++}
++
++
++void DeoptimizeStub::emit_code(LIR_Assembler* ce) {
++  __ BIND(_entry);
++  ce->store_parameter(_trap_request, 0);
++  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::deoptimize_id)));
++  ce->add_call_info_here(_info);
++  DEBUG_ONLY(__ should_not_reach_here());
++}
++
++
++void ImplicitNullCheckStub::emit_code(LIR_Assembler* ce) {
++  address a;
++  if (_info->deoptimize_on_exception()) {
++    // Deoptimize, do not throw the exception, because it is probably wrong to do it here.
++    a = Runtime1::entry_for(Runtime1::predicate_failed_trap_id);
++  } else {
++    a = Runtime1::entry_for(Runtime1::throw_null_pointer_exception_id);
++  }
++
++  ce->compilation()->implicit_exception_table()->append(_offset, __ offset());
++  __ BIND(_entry);
++  __ far_call(RuntimeAddress(a));
++  ce->add_call_info_here(_info);
++  ce->verify_oop_map(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++
++void SimpleExceptionStub::emit_code(LIR_Assembler* ce) {
++  assert(__ rsp_offset() == 0, "frame size should be fixed");
++
++  __ BIND(_entry);
++  // pass the object in a scratch register because all other registers
++  // must be preserved
++  if (_obj->is_cpu_register()) {
++    __ mov(rscratch1, _obj->as_register());
++  }
++  __ far_call(RuntimeAddress(Runtime1::entry_for(_stub)), NULL, rscratch2);
++  ce->add_call_info_here(_info);
++  debug_only(__ should_not_reach_here());
++}
++
++
++void ArrayCopyStub::emit_code(LIR_Assembler* ce) {
++  //---------------slow case: call to native-----------------
++  __ BIND(_entry);
++  // Figure out where the args should go
++  // This should really convert the IntrinsicID to the Method* and signature
++  // but I don't know how to do that.
++  //
++  VMRegPair args[5];
++  BasicType signature[5] = { T_OBJECT, T_INT, T_OBJECT, T_INT, T_INT};
++  SharedRuntime::java_calling_convention(signature, args, 5, true);
++
++  // push parameters
++  // (src, src_pos, dest, destPos, length)
++  Register r[5];
++  r[0] = src()->as_register();
++  r[1] = src_pos()->as_register();
++  r[2] = dst()->as_register();
++  r[3] = dst_pos()->as_register();
++  r[4] = length()->as_register();
++
++  // next registers will get stored on the stack
++  for (int i = 0; i < 5 ; i++ ) {
++    VMReg r_1 = args[i].first();
++    if (r_1->is_stack()) {
++      int st_off = r_1->reg2stack() * wordSize;
++      __ str (r[i], Address(sp, st_off));
++    } else {
++      assert(r[i] == args[i].first()->as_Register(), "Wrong register for arg ");
++    }
++  }
++
++  ce->align_call(lir_static_call);
++
++  ce->emit_static_call_stub();
++  if (ce->compilation()->bailed_out()) {
++    return; // CodeCache is full
++  }
++  Address resolve(SharedRuntime::get_resolve_static_call_stub(),
++                  relocInfo::static_call_type);
++  address call = __ trampoline_call(resolve);
++  if (call == NULL) {
++    ce->bailout("trampoline stub overflow");
++    return;
++  }
++  ce->add_call_info_here(info());
++
++#ifndef PRODUCT
++  __ lea(rscratch2, ExternalAddress((address)&Runtime1::_arraycopy_slowcase_cnt));
++  __ incrementw(Address(rscratch2));
++#endif
++
++  __ b(_continuation);
++}
++
++#undef __
+diff --git a/src/hotspot/cpu/sw64/c1_Defs_sw64.hpp b/src/hotspot/cpu/sw64/c1_Defs_sw64.hpp
+new file mode 100644
+index 0000000000..48c048938d
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_Defs_sw64.hpp
+@@ -0,0 +1,81 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C1_DEFS_SW64_HPP
++#define CPU_SW64_VM_C1_DEFS_SW64_HPP
++
++// native word offsets from memory address (little endian)
++enum {
++  pd_lo_word_offset_in_bytes = 0,
++  pd_hi_word_offset_in_bytes = BytesPerWord
++};
++
++// explicit rounding operations are required to implement the strictFP mode
++enum {
++  pd_strict_fp_requires_explicit_rounding = false
++};
++
++// FIXME: There are no callee-saved
++
++// registers
++enum {
++  pd_nof_cpu_regs_frame_map = RegisterImpl::number_of_registers,       // number of registers used during code emission
++  pd_nof_fpu_regs_frame_map = FloatRegisterImpl::number_of_registers,  // number of registers used during code emission
++
++  pd_nof_caller_save_cpu_regs_frame_map = 19 - 2,  // number of registers killed by calls
++  pd_nof_caller_save_fpu_regs_frame_map = 32,  // number of registers killed by calls
++
++  pd_first_callee_saved_reg = 19 - 2,
++  pd_last_callee_saved_reg = 26 - 2,
++
++  pd_last_allocatable_cpu_reg = 16,
++
++  pd_nof_cpu_regs_reg_alloc
++    = pd_last_allocatable_cpu_reg + 1,  // number of registers that are visible to register allocator
++  pd_nof_fpu_regs_reg_alloc = 8,  // number of registers that are visible to register allocator
++
++  pd_nof_cpu_regs_linearscan = 32, // number of registers visible to linear scan
++  pd_nof_fpu_regs_linearscan = pd_nof_fpu_regs_frame_map, // number of registers visible to linear scan
++  pd_nof_xmm_regs_linearscan = 0, // like sparc we don't have any of these
++  pd_first_cpu_reg = 0,
++  pd_last_cpu_reg = 16,
++  pd_first_byte_reg = 0,
++  pd_last_byte_reg = 16,
++  pd_first_fpu_reg = pd_nof_cpu_regs_frame_map,
++  pd_last_fpu_reg =  pd_first_fpu_reg + 31,
++
++  pd_first_callee_saved_fpu_reg = 8 + pd_first_fpu_reg,
++  pd_last_callee_saved_fpu_reg = 15 + pd_first_fpu_reg,
++};
++
++
++// Encoding of float value in debug info.  This is true on x86 where
++// floats are extended to doubles when stored in the stack, false for
++// Sw64 where floats and doubles are stored in their native form.
++enum {
++  pd_float_saved_as_double = false
++};
++
++#endif // CPU_SW64_VM_C1_DEFS_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/c1_FpuStackSim_sw64.cpp b/src/hotspot/cpu/sw64/c1_FpuStackSim_sw64.cpp
+new file mode 100644
+index 0000000000..1952402c01
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_FpuStackSim_sw64.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2005, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++//--------------------------------------------------------
++//               FpuStackSim
++//--------------------------------------------------------
++
++// No FPU stack on SW64
+diff --git a/src/hotspot/cpu/sw64/c1_FpuStackSim_sw64.hpp b/src/hotspot/cpu/sw64/c1_FpuStackSim_sw64.hpp
+new file mode 100644
+index 0000000000..931fdaa9a6
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_FpuStackSim_sw64.hpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C1_FPUSTACKSIM_HPP
++#define CPU_SW64_VM_C1_FPUSTACKSIM_HPP
++
++// No FPU stack on SW64
++class FpuStackSim;
++
++#endif // CPU_SW64_VM_C1_FPUSTACKSIM_HPP
+diff --git a/src/hotspot/cpu/sw64/c1_FrameMap_sw64.cpp b/src/hotspot/cpu/sw64/c1_FrameMap_sw64.cpp
+new file mode 100644
+index 0000000000..193eea91c3
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_FrameMap_sw64.cpp
+@@ -0,0 +1,357 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_LIR.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_sw64.inline.hpp"
++
++LIR_Opr FrameMap::map_to_opr(BasicType type, VMRegPair* reg, bool) {
++  LIR_Opr opr = LIR_OprFact::illegalOpr;
++  VMReg r_1 = reg->first();
++  VMReg r_2 = reg->second();
++  if (r_1->is_stack()) {
++    // Convert stack slot to an SP offset
++    // The calling convention does not count the SharedRuntime::out_preserve_stack_slots() value
++    // so we must add it in here.
++    int st_off = (r_1->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++    opr = LIR_OprFact::address(new LIR_Address(sp_opr, st_off, type));
++  } else if (r_1->is_Register()) {
++    Register reg = r_1->as_Register();
++    if (r_2->is_Register() && (type == T_LONG || type == T_DOUBLE)) {
++      Register reg2 = r_2->as_Register();
++      assert(reg2 == reg, "must be same register");
++      opr = as_long_opr(reg);
++    } else if (type == T_OBJECT || type == T_ARRAY) {
++      opr = as_oop_opr(reg);
++    } else if (type == T_METADATA) {
++      opr = as_metadata_opr(reg);
++    } else if (type == T_ADDRESS) {
++      opr = as_address_opr(reg);
++    } else {
++      opr = as_opr(reg);
++    }
++  } else if (r_1->is_FloatRegister()) {
++    assert(type == T_DOUBLE || type == T_FLOAT, "wrong type");
++    int num = r_1->as_FloatRegister()->encoding();
++    if (type == T_FLOAT) {
++      opr = LIR_OprFact::single_fpu(num);
++    } else {
++      opr = LIR_OprFact::double_fpu(num);
++    }
++  } else {
++    ShouldNotReachHere();
++  }
++  return opr;
++}
++
++LIR_Opr FrameMap::r0_opr;
++LIR_Opr FrameMap::r1_opr;
++LIR_Opr FrameMap::r2_opr;
++LIR_Opr FrameMap::r3_opr;
++LIR_Opr FrameMap::r4_opr;
++LIR_Opr FrameMap::r5_opr;
++LIR_Opr FrameMap::r6_opr;
++LIR_Opr FrameMap::r7_opr;
++LIR_Opr FrameMap::r8_opr;
++LIR_Opr FrameMap::r9_opr;
++LIR_Opr FrameMap::r10_opr;
++LIR_Opr FrameMap::r11_opr;
++LIR_Opr FrameMap::r12_opr;
++LIR_Opr FrameMap::r13_opr;
++LIR_Opr FrameMap::r14_opr;
++LIR_Opr FrameMap::r15_opr;
++LIR_Opr FrameMap::r16_opr;
++LIR_Opr FrameMap::r17_opr;
++LIR_Opr FrameMap::r18_opr;
++LIR_Opr FrameMap::r19_opr;
++LIR_Opr FrameMap::r20_opr;
++LIR_Opr FrameMap::r21_opr;
++LIR_Opr FrameMap::r22_opr;
++LIR_Opr FrameMap::r23_opr;
++LIR_Opr FrameMap::r24_opr;
++LIR_Opr FrameMap::r25_opr;
++LIR_Opr FrameMap::r26_opr;
++LIR_Opr FrameMap::r27_opr;
++LIR_Opr FrameMap::r28_opr;
++LIR_Opr FrameMap::r29_opr;
++LIR_Opr FrameMap::r30_opr;
++
++LIR_Opr FrameMap::rfp_opr;
++LIR_Opr FrameMap::sp_opr;
++
++LIR_Opr FrameMap::receiver_opr;
++
++LIR_Opr FrameMap::r0_oop_opr;
++LIR_Opr FrameMap::r1_oop_opr;
++LIR_Opr FrameMap::r2_oop_opr;
++LIR_Opr FrameMap::r3_oop_opr;
++LIR_Opr FrameMap::r4_oop_opr;
++LIR_Opr FrameMap::r5_oop_opr;
++LIR_Opr FrameMap::r6_oop_opr;
++LIR_Opr FrameMap::r7_oop_opr;
++LIR_Opr FrameMap::r8_oop_opr;
++LIR_Opr FrameMap::r9_oop_opr;
++LIR_Opr FrameMap::r10_oop_opr;
++LIR_Opr FrameMap::r11_oop_opr;
++LIR_Opr FrameMap::r12_oop_opr;
++LIR_Opr FrameMap::r13_oop_opr;
++LIR_Opr FrameMap::r14_oop_opr;
++LIR_Opr FrameMap::r15_oop_opr;
++LIR_Opr FrameMap::r16_oop_opr;
++LIR_Opr FrameMap::r17_oop_opr;
++LIR_Opr FrameMap::r18_oop_opr;
++LIR_Opr FrameMap::r19_oop_opr;
++LIR_Opr FrameMap::r20_oop_opr;
++LIR_Opr FrameMap::r21_oop_opr;
++LIR_Opr FrameMap::r22_oop_opr;
++LIR_Opr FrameMap::r23_oop_opr;
++LIR_Opr FrameMap::r24_oop_opr;
++LIR_Opr FrameMap::r25_oop_opr;
++LIR_Opr FrameMap::r26_oop_opr;
++LIR_Opr FrameMap::r27_oop_opr;
++LIR_Opr FrameMap::r28_oop_opr;
++LIR_Opr FrameMap::r29_oop_opr;
++LIR_Opr FrameMap::r30_oop_opr;
++
++LIR_Opr FrameMap::rscratch1_opr;
++LIR_Opr FrameMap::rscratch2_opr;
++LIR_Opr FrameMap::rscratch1_long_opr;
++LIR_Opr FrameMap::rscratch2_long_opr;
++
++LIR_Opr FrameMap::r0_metadata_opr;
++LIR_Opr FrameMap::r1_metadata_opr;
++LIR_Opr FrameMap::r2_metadata_opr;
++LIR_Opr FrameMap::r3_metadata_opr;
++LIR_Opr FrameMap::r4_metadata_opr;
++LIR_Opr FrameMap::r5_metadata_opr;
++
++LIR_Opr FrameMap::long0_opr;
++LIR_Opr FrameMap::long1_opr;
++LIR_Opr FrameMap::fpu0_float_opr;
++LIR_Opr FrameMap::fpu0_double_opr;
++
++LIR_Opr FrameMap::_caller_save_cpu_regs[] = { 0, };
++LIR_Opr FrameMap::_caller_save_fpu_regs[] = { 0, };
++
++//--------------------------------------------------------
++//               FrameMap
++//--------------------------------------------------------
++
++void FrameMap::initialize() {
++  assert(!_init_done, "once");
++
++  int i=0;
++  map_register(i, i0); r0_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i1); r1_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i2); r2_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i3); r3_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i4); r4_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i5); r5_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i6); r6_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i7); r7_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i10); r10_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i11); r11_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i12); r12_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i13); r13_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i14); r14_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i15); r15_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i16); r16_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i17); r17_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i18); r18_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i19); r19_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i20); r20_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i21); r21_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i22); r22_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i23); r23_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i24); r24_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i25); r25_opr = LIR_OprFact::single_cpu(i); i++;
++  map_register(i, i26); r26_opr = LIR_OprFact::single_cpu(i); i++;
++
++  map_register(i, i27); r27_opr = LIR_OprFact::single_cpu(i); i++; // rheapbase
++  map_register(i, i28); r28_opr = LIR_OprFact::single_cpu(i); i++; // rthread
++  map_register(i, i29); r29_opr = LIR_OprFact::single_cpu(i); i++; // rfp
++  map_register(i, i30); r30_opr = LIR_OprFact::single_cpu(i); i++; // lr
++//  map_register(i, r31_sp); sp_opr = LIR_OprFact::single_cpu(i); i++; // sp
++  map_register(i, i8); r8_opr = LIR_OprFact::single_cpu(i); i++;   // rscratch1
++  map_register(i, i9); r9_opr = LIR_OprFact::single_cpu(i); i++;   // rscratch2
++
++  rscratch1_opr = r8_opr;
++  rscratch2_opr = r9_opr;
++  rscratch1_long_opr = LIR_OprFact::double_cpu(r8_opr->cpu_regnr(), r8_opr->cpu_regnr());
++  rscratch2_long_opr = LIR_OprFact::double_cpu(r9_opr->cpu_regnr(), r9_opr->cpu_regnr());
++
++  long0_opr = LIR_OprFact::double_cpu(0, 0);
++  long1_opr = LIR_OprFact::double_cpu(1, 1);
++
++  fpu0_float_opr   = LIR_OprFact::single_fpu(0);
++  fpu0_double_opr  = LIR_OprFact::double_fpu(0);
++
++  _caller_save_cpu_regs[0] = r0_opr;
++  _caller_save_cpu_regs[1] = r1_opr;
++  _caller_save_cpu_regs[2] = r2_opr;
++  _caller_save_cpu_regs[3] = r3_opr;
++  _caller_save_cpu_regs[4] = r4_opr;
++  _caller_save_cpu_regs[5] = r5_opr;
++  _caller_save_cpu_regs[6]  = r6_opr;
++  _caller_save_cpu_regs[7]  = r7_opr;
++  // rscratch1, rscratch 2 not included
++  _caller_save_cpu_regs[8] = r10_opr;
++  _caller_save_cpu_regs[9] = r11_opr;
++  _caller_save_cpu_regs[10] = r12_opr;
++  _caller_save_cpu_regs[11] = r13_opr;
++  _caller_save_cpu_regs[12] = r14_opr;
++  _caller_save_cpu_regs[13] = r15_opr;
++  _caller_save_cpu_regs[14] = r16_opr;
++  _caller_save_cpu_regs[15] = r17_opr;
++  _caller_save_cpu_regs[16] = r18_opr;
++
++  for (int i = 0; i < 8; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++
++  _init_done = true;
++
++  r0_oop_opr = as_oop_opr(i0);
++  r1_oop_opr = as_oop_opr(i1);
++  r2_oop_opr = as_oop_opr(i2);
++  r3_oop_opr = as_oop_opr(i3);
++  r4_oop_opr = as_oop_opr(i4);
++  r5_oop_opr = as_oop_opr(i5);
++  r6_oop_opr = as_oop_opr(i6);
++  r7_oop_opr = as_oop_opr(i7);
++  r8_oop_opr = as_oop_opr(i8);
++  r9_oop_opr = as_oop_opr(i9);
++  r10_oop_opr = as_oop_opr(i10);
++  r11_oop_opr = as_oop_opr(i11);
++  r12_oop_opr = as_oop_opr(i12);
++  r13_oop_opr = as_oop_opr(i13);
++  r14_oop_opr = as_oop_opr(i14);
++  r15_oop_opr = as_oop_opr(i15);
++  r16_oop_opr = as_oop_opr(i16);
++  r17_oop_opr = as_oop_opr(i17);
++  r18_oop_opr = as_oop_opr(i18);
++  r19_oop_opr = as_oop_opr(i19);
++  r20_oop_opr = as_oop_opr(i20);
++  r21_oop_opr = as_oop_opr(i21);
++  r22_oop_opr = as_oop_opr(i22);
++  r23_oop_opr = as_oop_opr(i23);
++  r24_oop_opr = as_oop_opr(i24);
++  r25_oop_opr = as_oop_opr(i25);
++  r26_oop_opr = as_oop_opr(i26);
++  r27_oop_opr = as_oop_opr(i27);
++  r28_oop_opr = as_oop_opr(i28);
++  r29_oop_opr = as_oop_opr(i29);
++  r30_oop_opr = as_oop_opr(i30);
++
++  r0_metadata_opr = as_metadata_opr(i0);
++  r1_metadata_opr = as_metadata_opr(i1);
++  r2_metadata_opr = as_metadata_opr(i2);
++  r3_metadata_opr = as_metadata_opr(i3);
++  r4_metadata_opr = as_metadata_opr(i4);
++  r5_metadata_opr = as_metadata_opr(i5);
++
++//  sp_opr = as_pointer_opr(r31_sp);
++  rfp_opr = as_pointer_opr(rfp);
++
++  VMRegPair regs;
++  BasicType sig_bt = T_OBJECT;
++  SharedRuntime::java_calling_convention(&sig_bt, &regs, 1, true);
++  receiver_opr = as_oop_opr(regs.first()->as_Register());
++
++  for (int i = 0; i < nof_caller_save_fpu_regs; i++) {
++    _caller_save_fpu_regs[i] = LIR_OprFact::single_fpu(i);
++  }
++}
++
++
++Address FrameMap::make_new_address(ByteSize sp_offset) const {
++  // for rbp, based address use this:
++  // return Address(rbp, in_bytes(sp_offset) - (framesize() - 2) * 4);
++  return Address(sp, in_bytes(sp_offset));
++}
++
++
++// ----------------mapping-----------------------
++// all mapping is based on rfp addressing, except for simple leaf methods where we access
++// the locals sp based (and no frame is built)
++
++
++// Frame for simple leaf methods (quick entries)
++//
++//   +----------+
++//   | ret addr |   <- TOS
++//   +----------+
++//   | args     |
++//   | ......   |
++
++// Frame for standard methods
++//
++//   | .........|  <- TOS
++//   | locals   |
++//   +----------+
++//   |  old fp, |  <- RFP
++//   +----------+
++//   | ret addr |
++//   +----------+
++//   |  args    |
++//   | .........|
++
++
++// For OopMaps, map a local variable or spill index to an VMRegImpl name.
++// This is the offset from sp() in the frame of the slot for the index,
++// skewed by VMRegImpl::stack0 to indicate a stack location (vs.a register.)
++//
++//           framesize +
++//           stack0         stack0          0  <- VMReg
++//             |              | <registers> |
++//  ...........|..............|.............|
++//      0 1 2 3 x x 4 5 6 ... |                <- local indices
++//      ^           ^        sp()                 ( x x indicate link
++//      |           |                               and return addr)
++//  arguments   non-argument locals
++
++
++VMReg FrameMap::fpu_regname (int n) {
++  // Return the OptoReg name for the fpu stack slot "n"
++  // A spilled fpu stack slot comprises to two single-word OptoReg's.
++  return as_FloatRegister(n)->as_VMReg();
++}
++
++LIR_Opr FrameMap::stack_pointer() {
++  return FrameMap::sp_opr;
++}
++
++
++// JSR 292
++LIR_Opr FrameMap::method_handle_invoke_SP_save_opr() {
++  return LIR_OprFact::illegalOpr;  // Not needed on sw64
++}
++
++
++bool FrameMap::validate_frame() {
++  return true;
++}
+diff --git a/src/hotspot/cpu/sw64/c1_FrameMap_sw64.hpp b/src/hotspot/cpu/sw64/c1_FrameMap_sw64.hpp
+new file mode 100644
+index 0000000000..c5a7c7887a
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_FrameMap_sw64.hpp
+@@ -0,0 +1,148 @@
++/*
++ * Copyright (c) 1999, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C1_FRAMEMAP_SW64_HPP
++#define CPU_SW64_VM_C1_FRAMEMAP_SW64_HPP
++
++//  On Sw64 the frame looks as follows:
++//
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++//  | size_arguments-nof_reg_args | 2 words | size_locals-size_arguments+numreg_args | _size_monitors | spilling .
++//  +-----------------------------+---------+----------------------------------------+----------------+-----------
++
++ public:
++  static const int pd_c_runtime_reserved_arg_size;
++
++  enum {
++    first_available_sp_in_frame = 0,
++    frame_pad_in_bytes = 16,
++    nof_reg_args = 8
++  };
++
++ public:
++  static LIR_Opr receiver_opr;
++
++  static LIR_Opr r0_opr;
++  static LIR_Opr r1_opr;
++  static LIR_Opr r2_opr;
++  static LIR_Opr r3_opr;
++  static LIR_Opr r4_opr;
++  static LIR_Opr r5_opr;
++  static LIR_Opr r6_opr;
++  static LIR_Opr r7_opr;
++  static LIR_Opr r8_opr;
++  static LIR_Opr r9_opr;
++  static LIR_Opr r10_opr;
++  static LIR_Opr r11_opr;
++  static LIR_Opr r12_opr;
++  static LIR_Opr r13_opr;
++  static LIR_Opr r14_opr;
++  static LIR_Opr r15_opr;
++  static LIR_Opr r16_opr;
++  static LIR_Opr r17_opr;
++  static LIR_Opr r18_opr;
++  static LIR_Opr r19_opr;
++  static LIR_Opr r20_opr;
++  static LIR_Opr r21_opr;
++  static LIR_Opr r22_opr;
++  static LIR_Opr r23_opr;
++  static LIR_Opr r24_opr;
++  static LIR_Opr r25_opr;
++  static LIR_Opr r26_opr;
++  static LIR_Opr r27_opr;
++  static LIR_Opr r28_opr;
++  static LIR_Opr r29_opr;
++  static LIR_Opr r30_opr;
++  static LIR_Opr rfp_opr;
++  static LIR_Opr sp_opr;
++
++  static LIR_Opr r0_oop_opr;
++  static LIR_Opr r1_oop_opr;
++  static LIR_Opr r2_oop_opr;
++  static LIR_Opr r3_oop_opr;
++  static LIR_Opr r4_oop_opr;
++  static LIR_Opr r5_oop_opr;
++  static LIR_Opr r6_oop_opr;
++  static LIR_Opr r7_oop_opr;
++  static LIR_Opr r8_oop_opr;
++  static LIR_Opr r9_oop_opr;
++  static LIR_Opr r10_oop_opr;
++  static LIR_Opr r11_oop_opr;
++  static LIR_Opr r12_oop_opr;
++  static LIR_Opr r13_oop_opr;
++  static LIR_Opr r14_oop_opr;
++  static LIR_Opr r15_oop_opr;
++  static LIR_Opr r16_oop_opr;
++  static LIR_Opr r17_oop_opr;
++  static LIR_Opr r18_oop_opr;
++  static LIR_Opr r19_oop_opr;
++  static LIR_Opr r20_oop_opr;
++  static LIR_Opr r21_oop_opr;
++  static LIR_Opr r22_oop_opr;
++  static LIR_Opr r23_oop_opr;
++  static LIR_Opr r24_oop_opr;
++  static LIR_Opr r25_oop_opr;
++  static LIR_Opr r26_oop_opr;
++  static LIR_Opr r27_oop_opr;
++  static LIR_Opr r28_oop_opr;
++  static LIR_Opr r29_oop_opr;
++  static LIR_Opr r30_oop_opr;
++
++  static LIR_Opr rscratch1_opr;
++  static LIR_Opr rscratch2_opr;
++  static LIR_Opr rscratch1_long_opr;
++  static LIR_Opr rscratch2_long_opr;
++
++  static LIR_Opr r0_metadata_opr;
++  static LIR_Opr r1_metadata_opr;
++  static LIR_Opr r2_metadata_opr;
++  static LIR_Opr r3_metadata_opr;
++  static LIR_Opr r4_metadata_opr;
++  static LIR_Opr r5_metadata_opr;
++
++  static LIR_Opr long0_opr;
++  static LIR_Opr long1_opr;
++  static LIR_Opr fpu0_float_opr;
++  static LIR_Opr fpu0_double_opr;
++
++  static LIR_Opr as_long_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++  static LIR_Opr as_pointer_opr(Register r) {
++    return LIR_OprFact::double_cpu(cpu_reg2rnr(r), cpu_reg2rnr(r));
++  }
++
++  // VMReg name for spilled physical FPU stack slot n
++  static VMReg fpu_regname (int n);
++
++  static bool is_caller_save_register (LIR_Opr opr) { return true; }
++  static bool is_caller_save_register (Register r) { return true; }
++
++  static int nof_caller_save_cpu_regs() { return pd_nof_caller_save_cpu_regs_frame_map; }
++  static int last_cpu_reg()             { return pd_last_cpu_reg;  }
++  static int last_byte_reg()            { return pd_last_byte_reg; }
++
++#endif // CPU_SW64_VM_C1_FRAMEMAP_SW64_HPP
++
+diff --git a/src/hotspot/cpu/sw64/c1_LIRAssembler_sw64.cpp b/src/hotspot/cpu/sw64/c1_LIRAssembler_sw64.cpp
+new file mode 100644
+index 0000000000..a2babd70fb
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_LIRAssembler_sw64.cpp
+@@ -0,0 +1,3135 @@
++/*
++ * Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArrayKlass.hpp"
++#include "ci/ciInstance.hpp"
++#include "code/compiledIC.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "nativeInst_sw64.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_sw64.inline.hpp"
++
++
++
++#ifndef PRODUCT
++#define COMMENT(x)   do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++#define BIND(label) bind(label); COMMENT(#label ":")
++NEEDS_CLEANUP // remove this definitions ?
++const Register IC_Klass    = rscratch2;   // where the IC klass is cached
++const Register SYNC_header = i0;   // synchronization header
++const Register SHIFT_count = i0;   // where count for shift operations must be
++
++#define __ _masm->
++
++
++static void select_different_registers(Register preserve,
++                                       Register extra,
++                                       Register &tmp1,
++                                       Register &tmp2) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, extra);
++    tmp2 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2);
++}
++
++
++
++static void select_different_registers(Register preserve,
++                                       Register extra,
++                                       Register &tmp1,
++                                       Register &tmp2,
++                                       Register &tmp3) {
++  if (tmp1 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp1 = extra;
++  } else if (tmp2 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp2 = extra;
++  } else if (tmp3 == preserve) {
++    assert_different_registers(tmp1, tmp2, tmp3, extra);
++    tmp3 = extra;
++  }
++  assert_different_registers(preserve, tmp1, tmp2, tmp3);
++}
++
++
++bool LIR_Assembler::is_small_constant(LIR_Opr opr) { Unimplemented(); return false; }
++
++
++LIR_Opr LIR_Assembler::receiverOpr() {
++  return FrameMap::receiver_opr;
++}
++
++LIR_Opr LIR_Assembler::osrBufferPointer() {
++  return FrameMap::as_pointer_opr(receiverOpr()->as_register());
++}
++
++//--------------fpu register translations-----------------------
++
++
++address LIR_Assembler::float_constant(float f) {
++  address const_addr = __ float_constant(f);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++
++address LIR_Assembler::double_constant(double d) {
++  address const_addr = __ double_constant(d);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++address LIR_Assembler::int_constant(jlong n) {
++  address const_addr = __ long_constant(n);
++  if (const_addr == NULL) {
++    bailout("const section overflow");
++    return __ code()->consts()->start();
++  } else {
++    return const_addr;
++  }
++}
++
++void LIR_Assembler::set_24bit_FPU() { Unimplemented(); }
++
++void LIR_Assembler::reset_FPU() { Unimplemented(); }
++
++void LIR_Assembler::fpop() { Unimplemented(); }
++
++void LIR_Assembler::fxch(int i) { Unimplemented(); }
++
++void LIR_Assembler::fld(int i) { Unimplemented(); }
++
++void LIR_Assembler::ffree(int i) { Unimplemented(); }
++
++void LIR_Assembler::breakpoint() { Unimplemented(); }
++
++void LIR_Assembler::push(LIR_Opr opr) { Unimplemented(); }
++
++void LIR_Assembler::pop(LIR_Opr opr) { Unimplemented(); }
++
++bool LIR_Assembler::is_literal_address(LIR_Address* addr) { Unimplemented(); return false; }
++//-------------------------------------------
++
++static Register as_reg(LIR_Opr op) {
++  return op->is_double_cpu() ? op->as_register_lo() : op->as_register();
++}
++
++static jlong as_long(LIR_Opr data) {
++  jlong result;
++  switch (data->type()) {
++  case T_INT:
++    result = (data->as_jint());
++    break;
++  case T_LONG:
++    result = (data->as_jlong());
++    break;
++  default:
++    ShouldNotReachHere();
++    result = 0;  // unreachable
++  }
++  return result;
++}
++
++Address LIR_Assembler::as_Address(LIR_Address* addr, Register tmp) {
++  Register base = addr->base()->as_pointer_register();
++  LIR_Opr opr = addr->index();
++  if (opr->is_cpu_register()) {
++    Register index;
++    if (opr->is_single_cpu())
++      index = opr->as_register();
++    else
++      index = opr->as_register_lo();
++    assert(addr->disp() == 0, "must be");
++    switch(opr->type()) {
++      case T_INT:
++        return Address(base, index, Address::sxtw(addr->scale()));
++      case T_LONG:
++        return Address(base, index, Address::lsl(addr->scale()));
++      default:
++        ShouldNotReachHere();
++      }
++  } else  {
++    intptr_t addr_offset = intptr_t(addr->disp());
++    if (Address::offset_ok_for_immed(addr_offset, addr->scale()))
++      return Address(base, addr_offset, Address::lsl(addr->scale()));
++    else {
++      __ mov(tmp, addr_offset);
++      return Address(base, tmp, Address::lsl(addr->scale()));
++    }
++  }
++  return Address();
++}
++
++Address LIR_Assembler::as_Address_hi(LIR_Address* addr) {
++  ShouldNotReachHere();
++  return Address();
++}
++
++Address LIR_Assembler::as_Address(LIR_Address* addr) {
++  return as_Address(addr, rscratch1);
++}
++
++Address LIR_Assembler::as_Address_lo(LIR_Address* addr) {
++  return as_Address(addr, rscratch1);  // Ouch
++  // FIXME: This needs to be much more clever.  See x86.
++}
++
++
++void LIR_Assembler::osr_entry() {
++  offsets()->set_value(CodeOffsets::OSR_Entry, code_offset());
++  BlockBegin* osr_entry = compilation()->hir()->osr_entry();
++  ValueStack* entry_state = osr_entry->state();
++  int number_of_locks = entry_state->locks_size();
++
++  // we jump here if osr happens with the interpreter
++  // state set up to continue at the beginning of the
++  // loop that triggered osr - in particular, we have
++  // the following registers setup:
++  //
++  // r2: osr buffer
++  //
++
++  // build frame
++  ciMethod* m = compilation()->method();
++  __ build_frame(initial_frame_size_in_bytes(), bang_size_in_bytes());
++
++  // OSR buffer is
++  //
++  // locals[nlocals-1..0]
++  // monitors[0..number_of_locks]
++  //
++  // locals is a direct copy of the interpreter frame so in the osr buffer
++  // so first slot in the local array is the last local from the interpreter
++  // and last slot is local[0] (receiver) from the interpreter
++  //
++  // Similarly with locks. The first lock slot in the osr buffer is the nth lock
++  // from the interpreter frame, the nth lock slot in the osr buffer is 0th lock
++  // in the interpreter frame (the method lock if a sync method)
++
++  // Initialize monitors in the compiled activation.
++  //   r2: pointer to osr buffer
++  //
++  // All other registers are dead at this point and the locals will be
++  // copied into place by code emitted in the IR.
++
++  Register OSR_buf = osrBufferPointer()->as_pointer_register();
++  { assert(frame::interpreter_frame_monitor_size() == BasicObjectLock::size(), "adjust code below");
++    int monitor_offset = BytesPerWord * method()->max_locals() +
++      (2 * BytesPerWord) * (number_of_locks - 1);
++    // SharedRuntime::OSR_migration_begin() packs BasicObjectLocks in
++    // the OSR buffer using 2 word entries: first the lock and then
++    // the oop.
++    for (int i = 0; i < number_of_locks; i++) {
++      int slot_offset = monitor_offset - ((i * 2) * BytesPerWord);
++#ifdef ASSERT
++      // verify the interpreter's monitor has a non-null object
++      {
++        Label L;
++        __ ldr(rscratch1, Address(OSR_buf, slot_offset + 1*BytesPerWord));
++        __ cbnz(rscratch1, L);
++        __ stop("locked object is NULL");
++        __ BIND(L);
++      }
++#endif
++      __ ldr(i19, Address(OSR_buf, slot_offset + 0));
++      __ str(i19, frame_map()->address_for_monitor_lock(i));
++      __ ldr(i19, Address(OSR_buf, slot_offset + 1*BytesPerWord));
++      __ str(i19, frame_map()->address_for_monitor_object(i));
++    }
++  }
++}
++
++
++// inline cache check; done before the frame is built.
++int LIR_Assembler::check_icache() {
++  Register receiver = FrameMap::receiver_opr->as_register();
++  Register ic_klass = IC_Klass;
++  int start_offset = __ offset();
++  __ inline_cache_check(receiver, ic_klass);
++
++  // if icache check fails, then jump to runtime routine
++  // Note: RECEIVER must still contain the receiver!
++  Label dont;
++  __ br(Assembler::EQ, dont);
++  __ far_jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++
++  // We align the verified entry point unless the method body
++  // (including its inline cache check) will fit in a single 64-byte
++  // icache line.
++  if (! method()->is_accessor() || __ offset() - start_offset > 4 * 4) {
++    // force alignment after the cache check.
++    __ align(CodeEntryAlignment);
++  }
++
++  __ BIND(dont);
++  return start_offset;
++}
++
++
++void LIR_Assembler::jobject2reg(jobject o, Register reg) {
++  if (o == NULL) {
++    __ mov(reg, zr);
++  } else {
++    __ movoop(reg, o, /*immediate*/true);
++  }
++}
++
++void LIR_Assembler::deoptimize_trap(CodeEmitInfo *info) {
++  address target = NULL;
++  relocInfo::relocType reloc_type = relocInfo::none;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    reloc_type = relocInfo::section_word_type;
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    reloc_type = relocInfo::metadata_type;
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    reloc_type = relocInfo::oop_type;
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    reloc_type = relocInfo::oop_type;
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ far_call(RuntimeAddress(target));
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::jobject2reg_with_patching(Register reg, CodeEmitInfo *info) {
++  deoptimize_trap(info);
++}
++
++
++// This specifies the rsp decrement needed to build the frame
++int LIR_Assembler::initial_frame_size_in_bytes() const {
++  // if rounding, must let FrameMap know!
++
++  // The frame_map records size in slots (32bit word)
++
++  // subtract two words to account for return address and link
++  return (frame_map()->framesize() - (2*VMRegImpl::slots_per_word))  * VMRegImpl::stack_slot_size;
++}
++
++
++int LIR_Assembler::emit_exception_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(exception_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("exception handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  // the exception oop and pc are in i0, and i3
++  // no other registers need to be preserved, so invalidate them
++  __ invalidate_registers(false, true, true, false, true, true);
++
++  // check that there is really an exception
++  __ verify_not_null_oop(i0);
++
++  // search an exception handler (i0: exception oop, i3: throwing pc)
++  __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::handle_exception_from_callee_id)));  __ should_not_reach_here();
++  guarantee(code_offset() - offset <= exception_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++
++// Emit the code to remove the frame from the stack in the exception
++// unwind path.
++int LIR_Assembler::emit_unwind_handler() {
++#ifndef PRODUCT
++  if (CommentedAssembly) {
++    _masm->block_comment("Unwind handler");
++  }
++#endif
++
++  int offset = code_offset();
++
++  // Fetch the exception from TLS and clear out exception related thread state
++  __ ldr(i0, Address(rthread, JavaThread::exception_oop_offset()));
++  __ str(zr, Address(rthread, JavaThread::exception_oop_offset()));
++  __ str(zr, Address(rthread, JavaThread::exception_pc_offset()));
++
++  __ BIND(_unwind_handler_entry);
++  __ verify_not_null_oop(i0);
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ mov(i19, i0);  // Preserve the exception
++  }
++
++  // Preform needed unlocking
++  MonitorExitStub* stub = NULL;
++  if (method()->is_synchronized()) {
++    monitor_address(0, FrameMap::r0_opr);
++    stub = new MonitorExitStub(FrameMap::r0_opr, true, 0);
++    __ unlock_object(i5, i4, i0, *stub->entry());
++    __ BIND(*stub->continuation());
++  }
++
++  if (compilation()->env()->dtrace_method_probes()) {
++    __ call_Unimplemented();
++#if 0
++    __ movptr(Address(rsp, 0), rax);
++    __ mov_metadata(Address(rsp, sizeof(void*)), method()->constant_encoding());
++    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit)));
++#endif
++  }
++
++  if (method()->is_synchronized() || compilation()->env()->dtrace_method_probes()) {
++    __ mov(i0, i19);  // Restore the exception
++  }
++
++  // remove the activation and dispatch to the unwind handler
++  __ block_comment("remove_frame and dispatch to the unwind handler");
++  __ remove_frame(initial_frame_size_in_bytes());
++  __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::unwind_exception_id)));
++
++  // Emit the slow path assembly
++  if (stub != NULL) {
++    stub->emit_code(this);
++  }
++
++  return offset;
++}
++
++
++int LIR_Assembler::emit_deopt_handler() {
++  // if the last instruction is a call (typically to do a throw which
++  // is coming at the end after block reordering) the return address
++  // must still point into the code area in order to avoid assertion
++  // failures when searching for the corresponding bci => add a nop
++  // (was bug 5/14/1999 - gri)
++  __ nop();
++
++  // generate code for exception handler
++  address handler_base = __ start_a_stub(deopt_handler_size());
++  if (handler_base == NULL) {
++    // not enough space left for the handler
++    bailout("deopt handler overflow");
++    return -1;
++  }
++
++  int offset = code_offset();
++
++  __ adr(lr, pc());
++  __ far_jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
++  guarantee(code_offset() - offset <= deopt_handler_size(), "overflow");
++  __ end_a_stub();
++
++  return offset;
++}
++
++void LIR_Assembler::add_debug_info_for_branch(address adr, CodeEmitInfo* info) {
++  _masm->code_section()->relocate(adr, relocInfo::poll_type);
++  int pc_offset = code_offset();
++  flush_debug_info(pc_offset);
++  info->record_debug_info(compilation()->debug_info_recorder(), pc_offset);
++  if (info->exception_handlers() != NULL) {
++    compilation()->add_exception_handlers_for_pco(pc_offset, info->exception_handlers());
++  }
++}
++
++void LIR_Assembler::return_op(LIR_Opr result) {
++  assert(result->is_illegal() || !result->is_single_cpu() || result->as_register() == i0, "word returns are in i0,");
++
++  // Pop the stack before the safepoint code
++  __ remove_frame(initial_frame_size_in_bytes());
++
++  if (StackReservedPages > 0 && compilation()->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  address polling_page(os::get_polling_page());
++  __ read_polling_page(rscratch1, polling_page, relocInfo::poll_return_type);
++  __ ret(lr);
++}
++
++int LIR_Assembler::safepoint_poll(LIR_Opr tmp, CodeEmitInfo* info) {
++  address polling_page(os::get_polling_page());
++  guarantee(info != NULL, "Shouldn't be NULL");
++  assert(os::is_poll_address(polling_page), "should be");
++  __ get_polling_page(rscratch1, polling_page, relocInfo::poll_type);
++  add_debug_info_for_branch(info);  // This isn't just debug info:
++                                    // it's the oop map
++  __ read_polling_page(rscratch1, relocInfo::poll_type);
++  return __ offset();
++}
++
++
++void LIR_Assembler::move_regs(Register from_reg, Register to_reg) {
++//  if (from_reg == r31_sp)
++//    from_reg = sp;
++//  if (to_reg == r31_sp)
++//    to_reg = sp;
++//  __ mov(to_reg, from_reg);
++}
++
++void LIR_Assembler::swap_reg(Register a, Register b) { Unimplemented(); }
++
++
++void LIR_Assembler::const2reg(LIR_Opr src, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
++  assert(src->is_constant(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++
++  switch (c->type()) {
++    case T_INT: {
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ movw(dest->as_register(), c->as_jint());
++      break;
++    }
++
++    case T_ADDRESS: {
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ mov(dest->as_register(), c->as_jint());
++      break;
++    }
++
++    case T_LONG: {
++      assert(patch_code == lir_patch_none, "no patching handled here");
++      __ mov(dest->as_register_lo(), (intptr_t)c->as_jlong());
++      break;
++    }
++
++    case T_OBJECT: {
++        if (patch_code == lir_patch_none) {
++          jobject2reg(c->as_jobject(), dest->as_register());
++        } else {
++          jobject2reg_with_patching(dest->as_register(), info);
++        }
++      break;
++    }
++
++    case T_METADATA: {
++      if (patch_code != lir_patch_none) {
++        klass2reg_with_patching(dest->as_register(), info);
++      } else {
++        __ mov_metadata(dest->as_register(), c->as_metadata());
++      }
++      break;
++    }
++
++    case T_FLOAT: {
++      if (__ operand_valid_for_float_immediate(c->as_jfloat())) {
++        __ fmovs(dest->as_float_reg(), (c->as_jfloat()));
++      } else {
++        __ adr(rscratch1, InternalAddress(float_constant(c->as_jfloat())));
++        __ ldrs(dest->as_float_reg(), Address(rscratch1));
++      }
++      break;
++    }
++
++    case T_DOUBLE: {
++      if (__ operand_valid_for_float_immediate(c->as_jdouble())) {
++        __ fmovd(dest->as_double_reg(), (c->as_jdouble()));
++      } else {
++        __ adr(rscratch1, InternalAddress(double_constant(c->as_jdouble())));
++        __ ldrd(dest->as_double_reg(), Address(rscratch1));
++      }
++      break;
++    }
++
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2stack(LIR_Opr src, LIR_Opr dest) {
++  LIR_Const* c = src->as_constant_ptr();
++  switch (c->type()) {
++  case T_OBJECT:
++    {
++      if (! c->as_jobject())
++        __ str(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
++      else {
++        const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL);
++        reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false);
++      }
++    }
++    break;
++  case T_ADDRESS:
++    {
++      const2reg(src, FrameMap::rscratch1_opr, lir_patch_none, NULL);
++      reg2stack(FrameMap::rscratch1_opr, dest, c->type(), false);
++    }
++  case T_INT:
++  case T_FLOAT:
++    {
++      Register reg = zr;
++      if (c->as_jint_bits() == 0)
++        __ strw(zr, frame_map()->address_for_slot(dest->single_stack_ix()));
++      else {
++        __ movw(rscratch1, c->as_jint_bits());
++        __ strw(rscratch1, frame_map()->address_for_slot(dest->single_stack_ix()));
++      }
++    }
++    break;
++  case T_LONG:
++  case T_DOUBLE:
++    {
++      Register reg = zr;
++      if (c->as_jlong_bits() == 0)
++        __ str(zr, frame_map()->address_for_slot(dest->double_stack_ix(),
++                                                 lo_word_offset_in_bytes));
++      else {
++        __ mov(rscratch1, (intptr_t)c->as_jlong_bits());
++        __ str(rscratch1, frame_map()->address_for_slot(dest->double_stack_ix(),
++                                                        lo_word_offset_in_bytes));
++      }
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::const2mem(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info, bool wide) {
++  assert(src->is_constant(), "should not call otherwise");
++  LIR_Const* c = src->as_constant_ptr();
++  LIR_Address* to_addr = dest->as_address_ptr();
++
++  void (Assembler::* insn)(Register Rt, const Address &adr);
++
++  switch (type) {
++  case T_ADDRESS:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::str;
++    break;
++  case T_LONG:
++    assert(c->as_jlong() == 0, "should be");
++    insn = &Assembler::str;
++    break;
++  case T_INT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::strw;
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    assert(c->as_jobject() == 0, "should be");
++    if (UseCompressedOops && !wide) {
++      insn = &Assembler::strw;
++    } else {
++      insn = &Assembler::str;
++    }
++    break;
++  case T_CHAR:
++  case T_SHORT:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::strh;
++    break;
++  case T_BOOLEAN:
++  case T_BYTE:
++    assert(c->as_jint() == 0, "should be");
++    insn = &Assembler::strb;
++    break;
++  default:
++    ShouldNotReachHere();
++    insn = &Assembler::str;  // unreachable
++  }
++
++  if (info) add_debug_info_for_null_check_here(info);
++  (_masm->*insn)(zr, as_Address(to_addr, rscratch1));
++}
++
++void LIR_Assembler::reg2reg(LIR_Opr src, LIR_Opr dest) {
++  assert(src->is_register(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++
++  // move between cpu-registers
++  if (dest->is_single_cpu()) {
++    if (src->type() == T_LONG) {
++      // Can do LONG -> OBJECT
++      move_regs(src->as_register_lo(), dest->as_register());
++      return;
++    }
++    assert(src->is_single_cpu(), "must match");
++    if (src->type() == T_OBJECT) {
++      __ verify_oop(src->as_register());
++    }
++    move_regs(src->as_register(), dest->as_register());
++
++  } else if (dest->is_double_cpu()) {
++    if (src->type() == T_OBJECT || src->type() == T_ARRAY) {
++      // Surprising to me but we can see move of a long to t_object
++      __ verify_oop(src->as_register());
++      move_regs(src->as_register(), dest->as_register_lo());
++      return;
++    }
++    assert(src->is_double_cpu(), "must match");
++    Register f_lo = src->as_register_lo();
++    Register f_hi = src->as_register_hi();
++    Register t_lo = dest->as_register_lo();
++    Register t_hi = dest->as_register_hi();
++    assert(f_hi == f_lo, "must be same");
++    assert(t_hi == t_lo, "must be same");
++    move_regs(f_lo, t_lo);
++
++  } else if (dest->is_single_fpu()) {
++    __ fmovs(dest->as_float_reg(), src->as_float_reg());
++
++  } else if (dest->is_double_fpu()) {
++    __ fmovd(dest->as_double_reg(), src->as_double_reg());
++
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::reg2stack(LIR_Opr src, LIR_Opr dest, BasicType type, bool pop_fpu_stack) {
++  if (src->is_single_cpu()) {
++    if (type == T_ARRAY || type == T_OBJECT) {
++      __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
++      __ verify_oop(src->as_register());
++    } else if (type == T_METADATA || type == T_DOUBLE || type == T_ADDRESS) {
++      __ str(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
++    } else {
++      __ strw(src->as_register(), frame_map()->address_for_slot(dest->single_stack_ix()));
++    }
++
++  } else if (src->is_double_cpu()) {
++    Address dest_addr_LO = frame_map()->address_for_slot(dest->double_stack_ix(), lo_word_offset_in_bytes);
++    __ str(src->as_register_lo(), dest_addr_LO);
++
++  } else if (src->is_single_fpu()) {
++    Address dest_addr = frame_map()->address_for_slot(dest->single_stack_ix());
++    __ strs(src->as_float_reg(), dest_addr);
++
++  } else if (src->is_double_fpu()) {
++    Address dest_addr = frame_map()->address_for_slot(dest->double_stack_ix());
++    __ strd(src->as_double_reg(), dest_addr);
++
++  } else {
++    ShouldNotReachHere();
++  }
++
++}
++
++
++void LIR_Assembler::reg2mem(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool pop_fpu_stack, bool wide, bool /* unaligned */) {
++  LIR_Address* to_addr = dest->as_address_ptr();
++  PatchingStub* patch = NULL;
++  Register compressed_src = rscratch1;
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (type == T_ARRAY || type == T_OBJECT) {
++    __ verify_oop(src->as_register());
++
++    if (UseCompressedOops && !wide) {
++      __ encode_heap_oop(compressed_src, src->as_register());
++    } else {
++      compressed_src = src->as_register();
++    }
++  }
++
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT: {
++      __ strs(src->as_float_reg(), as_Address(to_addr));
++      break;
++    }
++
++    case T_DOUBLE: {
++      __ strd(src->as_double_reg(), as_Address(to_addr));
++      break;
++    }
++
++    case T_ARRAY:   // fall through
++    case T_OBJECT:  // fall through
++      if (UseCompressedOops && !wide) {
++        __ strw(compressed_src, as_Address(to_addr, rscratch2));
++      } else {
++         __ str(compressed_src, as_Address(to_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ str(src->as_register(), as_Address(to_addr));
++      break;
++    case T_ADDRESS:
++      __ str(src->as_register(), as_Address(to_addr));
++      break;
++    case T_INT:
++      __ strw(src->as_register(), as_Address(to_addr));
++      break;
++
++    case T_LONG: {
++      __ str(src->as_register_lo(), as_Address_lo(to_addr));
++      break;
++    }
++
++    case T_BYTE:    // fall through
++    case T_BOOLEAN: {
++      __ strb(src->as_register(), as_Address(to_addr));
++      break;
++    }
++
++    case T_CHAR:    // fall through
++    case T_SHORT:
++      __ strh(src->as_register(), as_Address(to_addr));
++      break;
++
++    default:
++      ShouldNotReachHere();
++  }
++  if (info != NULL) {
++    add_debug_info_for_null_check(null_check_here, info);
++  }
++}
++
++
++void LIR_Assembler::stack2reg(LIR_Opr src, LIR_Opr dest, BasicType type) {
++  assert(src->is_stack(), "should not call otherwise");
++  assert(dest->is_register(), "should not call otherwise");
++
++  if (dest->is_single_cpu()) {
++    if (type == T_ARRAY || type == T_OBJECT) {
++      __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
++      __ verify_oop(dest->as_register());
++    } else if (type == T_METADATA || type == T_ADDRESS) {
++      __ ldr(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
++    } else {
++      __ ldrw(dest->as_register(), frame_map()->address_for_slot(src->single_stack_ix()));
++    }
++
++  } else if (dest->is_double_cpu()) {
++    Address src_addr_LO = frame_map()->address_for_slot(src->double_stack_ix(), lo_word_offset_in_bytes);
++    __ ldr(dest->as_register_lo(), src_addr_LO);
++
++  } else if (dest->is_single_fpu()) {
++    Address src_addr = frame_map()->address_for_slot(src->single_stack_ix());
++    __ ldrs(dest->as_float_reg(), src_addr);
++
++  } else if (dest->is_double_fpu()) {
++    Address src_addr = frame_map()->address_for_slot(src->double_stack_ix());
++    __ ldrd(dest->as_double_reg(), src_addr);
++
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void LIR_Assembler::klass2reg_with_patching(Register reg, CodeEmitInfo* info) {
++  address target = NULL;
++  relocInfo::relocType reloc_type = relocInfo::none;
++
++  switch (patching_id(info)) {
++  case PatchingStub::access_field_id:
++    target = Runtime1::entry_for(Runtime1::access_field_patching_id);
++    reloc_type = relocInfo::section_word_type;
++    break;
++  case PatchingStub::load_klass_id:
++    target = Runtime1::entry_for(Runtime1::load_klass_patching_id);
++    reloc_type = relocInfo::metadata_type;
++    break;
++  case PatchingStub::load_mirror_id:
++    target = Runtime1::entry_for(Runtime1::load_mirror_patching_id);
++    reloc_type = relocInfo::oop_type;
++    break;
++  case PatchingStub::load_appendix_id:
++    target = Runtime1::entry_for(Runtime1::load_appendix_patching_id);
++    reloc_type = relocInfo::oop_type;
++    break;
++  default: ShouldNotReachHere();
++  }
++
++  __ far_call(RuntimeAddress(target));
++  add_call_info_here(info);
++}
++
++void LIR_Assembler::stack2stack(LIR_Opr src, LIR_Opr dest, BasicType type) {
++
++  LIR_Opr temp;
++  if (type == T_LONG || type == T_DOUBLE)
++    temp = FrameMap::rscratch1_long_opr;
++  else
++    temp = FrameMap::rscratch1_opr;
++
++  stack2reg(src, temp, src->type());
++  reg2stack(temp, dest, dest->type(), false);
++}
++
++
++void LIR_Assembler::mem2reg(LIR_Opr src, LIR_Opr dest, BasicType type, LIR_PatchCode patch_code, CodeEmitInfo* info, bool wide, bool /* unaligned */) {
++  LIR_Address* addr = src->as_address_ptr();
++  LIR_Address* from_addr = src->as_address_ptr();
++
++  if (addr->base()->type() == T_OBJECT) {
++    __ verify_oop(addr->base()->as_pointer_register());
++  }
++
++  if (patch_code != lir_patch_none) {
++    deoptimize_trap(info);
++    return;
++  }
++
++  if (info != NULL) {
++    add_debug_info_for_null_check_here(info);
++  }
++  int null_check_here = code_offset();
++  switch (type) {
++    case T_FLOAT: {
++      __ ldrs(dest->as_float_reg(), as_Address(from_addr));
++      break;
++    }
++
++    case T_DOUBLE: {
++      __ ldrd(dest->as_double_reg(), as_Address(from_addr));
++      break;
++    }
++
++    case T_ARRAY:   // fall through
++    case T_OBJECT:  // fall through
++      if (UseCompressedOops && !wide) {
++        __ ldrw(dest->as_register(), as_Address(from_addr));
++      } else {
++         __ ldr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_METADATA:
++      // We get here to store a method pointer to the stack to pass to
++      // a dtrace runtime call. This can't work on 64 bit with
++      // compressed klass ptrs: T_METADATA can be a compressed klass
++      // ptr or a 64 bit method pointer.
++      ShouldNotReachHere();
++      __ ldr(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_ADDRESS:
++      // FIXME: OMG this is a horrible kludge.  Any offset from an
++      // address that matches klass_offset_in_bytes() will be loaded
++      // as a word, not a long.
++      if (UseCompressedClassPointers && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++        __ ldrw(dest->as_register(), as_Address(from_addr));
++      } else {
++        __ ldr(dest->as_register(), as_Address(from_addr));
++      }
++      break;
++    case T_INT:
++      __ ldrw(dest->as_register(), as_Address(from_addr));
++      break;
++
++    case T_LONG: {
++      __ ldr(dest->as_register_lo(), as_Address_lo(from_addr));
++      break;
++    }
++
++    case T_BYTE:
++      __ ldrsb(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_BOOLEAN: {
++      __ ldrb(dest->as_register(), as_Address(from_addr));
++      break;
++    }
++
++    case T_CHAR:
++      __ ldrh(dest->as_register(), as_Address(from_addr));
++      break;
++    case T_SHORT:
++      __ ldrsh(dest->as_register(), as_Address(from_addr));
++      break;
++
++    default:
++      ShouldNotReachHere();
++  }
++
++  if (type == T_ARRAY || type == T_OBJECT) {
++    if (UseCompressedOops && !wide) {
++      __ decode_heap_oop(dest->as_register());
++    }
++    __ verify_oop(dest->as_register());
++  } else if (type == T_ADDRESS && addr->disp() == oopDesc::klass_offset_in_bytes()) {
++    if (UseCompressedClassPointers) {
++      __ decode_klass_not_null(dest->as_register());
++    }
++  }
++}
++
++
++int LIR_Assembler::array_element_size(BasicType type) const {
++  int elem_size = type2aelembytes(type);
++  return exact_log2(elem_size);
++}
++
++void LIR_Assembler::arithmetic_idiv(LIR_Op3* op, bool is_irem) {
++  Register Rdividend = op->in_opr1()->as_register();
++  Register Rdivisor  = op->in_opr2()->as_register();
++  Register Rscratch  = op->in_opr3()->as_register();
++  Register Rresult   = op->result_opr()->as_register();
++  int divisor = -1;
++
++  /*
++  TODO: For some reason, using the Rscratch that gets passed in is
++  not possible because the register allocator does not see the tmp reg
++  as used, and assignes it the same register as Rdividend. We use rscratch1
++   instead.
++
++  assert(Rdividend != Rscratch, "");
++  assert(Rdivisor  != Rscratch, "");
++  */
++
++  if (Rdivisor == noreg && is_power_of_2(divisor)) {
++    // convert division by a power of two into some shifts and logical operations
++  }
++
++  __ corrected_idivl(Rresult, Rdividend, Rdivisor, is_irem, rscratch1);
++}
++
++void LIR_Assembler::emit_op3(LIR_Op3* op) {
++  switch (op->code()) {
++  case lir_idiv:
++    arithmetic_idiv(op, false);
++    break;
++  case lir_irem:
++    arithmetic_idiv(op, true);
++    break;
++  case lir_fmad:
++    __ fmaddd(op->result_opr()->as_double_reg(),
++              op->in_opr1()->as_double_reg(),
++              op->in_opr2()->as_double_reg(),
++              op->in_opr3()->as_double_reg());
++    break;
++  case lir_fmaf:
++    __ fmadds(op->result_opr()->as_float_reg(),
++              op->in_opr1()->as_float_reg(),
++              op->in_opr2()->as_float_reg(),
++              op->in_opr3()->as_float_reg());
++    break;
++  default:      ShouldNotReachHere(); break;
++  }
++}
++
++void LIR_Assembler::emit_opBranch(LIR_OpBranch* op) {
++#ifdef ASSERT
++  assert(op->block() == NULL || op->block()->label() == op->label(), "wrong label");
++  if (op->block() != NULL)  _branch_target_blocks.append(op->block());
++  if (op->ublock() != NULL) _branch_target_blocks.append(op->ublock());
++#endif
++
++  if (op->cond() == lir_cond_always) {
++    if (op->info() != NULL) add_debug_info_for_branch(op->info());
++    __ b(*(op->label()));
++  } else {
++    Assembler::Condition acond;
++    if (op->code() == lir_cond_float_branch) {
++      bool is_unordered = (op->ublock() == op->block());
++      // Assembler::EQ does not permit unordered branches, so we add
++      // another branch here.  Likewise, Assembler::NE does not permit
++      // ordered branches.
++      if ((is_unordered && op->cond() == lir_cond_equal)
++          || (!is_unordered && op->cond() == lir_cond_notEqual))
++        __ br(Assembler::VS, *(op->ublock()->label()));
++      switch(op->cond()) {
++      case lir_cond_equal:        acond = Assembler::EQ; break;
++      case lir_cond_notEqual:     acond = Assembler::NE; break;
++      case lir_cond_less:         acond = (is_unordered ? Assembler::LT : Assembler::LO); break;
++      case lir_cond_lessEqual:    acond = (is_unordered ? Assembler::LE : Assembler::LS); break;
++      case lir_cond_greaterEqual: acond = (is_unordered ? Assembler::HS : Assembler::GE); break;
++      case lir_cond_greater:      acond = (is_unordered ? Assembler::HI : Assembler::GT); break;
++      default:                    ShouldNotReachHere();
++        acond = Assembler::EQ;  // unreachable
++      }
++    } else {
++      switch (op->cond()) {
++        case lir_cond_equal:        acond = Assembler::EQ; break;
++        case lir_cond_notEqual:     acond = Assembler::NE; break;
++        case lir_cond_less:         acond = Assembler::LT; break;
++        case lir_cond_lessEqual:    acond = Assembler::LE; break;
++        case lir_cond_greaterEqual: acond = Assembler::GE; break;
++        case lir_cond_greater:      acond = Assembler::GT; break;
++        case lir_cond_belowEqual:   acond = Assembler::LS; break;
++        case lir_cond_aboveEqual:   acond = Assembler::HS; break;
++        default:                    ShouldNotReachHere();
++          acond = Assembler::EQ;  // unreachable
++      }
++    }
++    __ br(acond,*(op->label()));
++  }
++}
++
++
++
++void LIR_Assembler::emit_opConvert(LIR_OpConvert* op) {
++  LIR_Opr src  = op->in_opr();
++  LIR_Opr dest = op->result_opr();
++
++  switch (op->bytecode()) {
++    case Bytecodes::_i2f:
++      {
++        __ scvtfws(dest->as_float_reg(), src->as_register());
++        break;
++      }
++    case Bytecodes::_i2d:
++      {
++        __ scvtfwd(dest->as_double_reg(), src->as_register());
++        break;
++      }
++    case Bytecodes::_l2d:
++      {
++        __ scvtfd(dest->as_double_reg(), src->as_register_lo());
++        break;
++      }
++    case Bytecodes::_l2f:
++      {
++        __ scvtfs(dest->as_float_reg(), src->as_register_lo());
++        break;
++      }
++    case Bytecodes::_f2d:
++      {
++        __ fcvts(dest->as_double_reg(), src->as_float_reg());
++        break;
++      }
++    case Bytecodes::_d2f:
++      {
++        __ fcvtd(dest->as_float_reg(), src->as_double_reg());
++        break;
++      }
++    case Bytecodes::_i2c:
++      {
++        __ ubfx(dest->as_register(), src->as_register(), 0, 16);
++        break;
++      }
++    case Bytecodes::_i2l:
++      {
++        __ sxtw(dest->as_register_lo(), src->as_register());
++        break;
++      }
++    case Bytecodes::_i2s:
++      {
++        __ sxth(dest->as_register(), src->as_register());
++        break;
++      }
++    case Bytecodes::_i2b:
++      {
++        __ sxtb(dest->as_register(), src->as_register());
++        break;
++      }
++    case Bytecodes::_l2i:
++      {
++        _masm->block_comment("FIXME: This could be a no-op");
++        __ uxtw(dest->as_register(), src->as_register_lo());
++        break;
++      }
++    case Bytecodes::_d2l:
++      {
++        __ fcvtzd(dest->as_register_lo(), src->as_double_reg());
++        break;
++      }
++    case Bytecodes::_f2i:
++      {
++        __ fcvtzsw(dest->as_register(), src->as_float_reg());
++        break;
++      }
++    case Bytecodes::_f2l:
++      {
++        __ fcvtzs(dest->as_register_lo(), src->as_float_reg());
++        break;
++      }
++    case Bytecodes::_d2i:
++      {
++        __ fcvtzdw(dest->as_register(), src->as_double_reg());
++        break;
++      }
++    default: ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::emit_alloc_obj(LIR_OpAllocObj* op) {
++  if (op->init_check()) {
++    __ ldrb(rscratch1, Address(op->klass()->as_register(),
++                               InstanceKlass::init_state_offset()));
++    __ cmpw(rscratch1, InstanceKlass::fully_initialized);
++    add_debug_info_for_null_check_here(op->stub()->info());
++    __ br(Assembler::NE, *op->stub()->entry());
++  }
++  __ allocate_object(op->obj()->as_register(),
++                     op->tmp1()->as_register(),
++                     op->tmp2()->as_register(),
++                     op->header_size(),
++                     op->object_size(),
++                     op->klass()->as_register(),
++                     *op->stub()->entry());
++  __ BIND(*op->stub()->continuation());
++}
++
++void LIR_Assembler::emit_alloc_array(LIR_OpAllocArray* op) {
++  Register len =  op->len()->as_register();
++  __ uxtw(len, len);
++
++  if (UseSlowPath ||
++      (!UseFastNewObjectArray && (op->type() == T_OBJECT || op->type() == T_ARRAY)) ||
++      (!UseFastNewTypeArray   && (op->type() != T_OBJECT && op->type() != T_ARRAY))) {
++    __ b(*op->stub()->entry());
++  } else {
++    Register tmp1 = op->tmp1()->as_register();
++    Register tmp2 = op->tmp2()->as_register();
++    Register tmp3 = op->tmp3()->as_register();
++    if (len == tmp1) {
++      tmp1 = tmp3;
++    } else if (len == tmp2) {
++      tmp2 = tmp3;
++    } else if (len == tmp3) {
++      // everything is ok
++    } else {
++      __ mov(tmp3, len);
++    }
++    __ allocate_array(op->obj()->as_register(),
++                      len,
++                      tmp1,
++                      tmp2,
++                      arrayOopDesc::header_size(op->type()),
++                      array_element_size(op->type()),
++                      op->klass()->as_register(),
++                      *op->stub()->entry());
++  }
++  __ BIND(*op->stub()->continuation());
++}
++
++void LIR_Assembler::type_profile_helper(Register mdo,
++                                        ciMethodData *md, ciProfileData *data,
++                                        Register recv, Label* update_done) {
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    // See if the receiver is receiver[n].
++    __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    __ ldr(rscratch1, Address(rscratch2));
++    __ cmp(recv, rscratch1);
++    __ br(Assembler::NE, next_test);
++    Address data_addr(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i)));
++    __ addptr(data_addr, DataLayout::counter_increment);
++    __ b(*update_done);
++    __ BIND(next_test);
++  }
++
++  // Didn't find receiver; find next empty slot and fill it in
++  for (uint i = 0; i < ReceiverTypeData::row_limit(); i++) {
++    Label next_test;
++    __ lea(rscratch2,
++           Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_offset(i))));
++    Address recv_addr(rscratch2);
++    __ ldr(rscratch1, recv_addr);
++    __ cbnz(rscratch1, next_test);
++    __ str(recv, recv_addr);
++    __ mov(rscratch1, DataLayout::counter_increment);
++    __ lea(rscratch2, Address(mdo, md->byte_offset_of_slot(data, ReceiverTypeData::receiver_count_offset(i))));
++    __ str(rscratch1, Address(rscratch2));
++    __ b(*update_done);
++    __ BIND(next_test);
++  }
++}
++
++void LIR_Assembler::emit_typecheck_helper(LIR_OpTypeCheck *op, Label* success, Label* failure, Label* obj_is_null) {
++  // we always need a stub for the failure case.
++  CodeStub* stub = op->stub();
++  Register obj = op->object()->as_register();
++  Register k_RInfo = op->tmp1()->as_register();
++  Register klass_RInfo = op->tmp2()->as_register();
++  Register dst = op->result_opr()->as_register();
++  ciKlass* k = op->klass();
++  Register Rtmp1 = noreg;
++
++  // check if it needs to be profiled
++  ciMethodData* md;
++  ciProfileData* data;
++
++  const bool should_profile = op->should_profile();
++
++  if (should_profile) {
++    ciMethod* method = op->profiled_method();
++    assert(method != NULL, "Should have method");
++    int bci = op->profiled_bci();
++    md = method->method_data_or_null();
++    assert(md != NULL, "Sanity");
++    data = md->bci_to_data(bci);
++    assert(data != NULL,                "need data for type check");
++    assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++  }
++  Label profile_cast_success, profile_cast_failure;
++  Label *success_target = should_profile ? &profile_cast_success : success;
++  Label *failure_target = should_profile ? &profile_cast_failure : failure;
++
++  if (obj == k_RInfo) {
++    k_RInfo = dst;
++  } else if (obj == klass_RInfo) {
++    klass_RInfo = dst;
++  }
++  if (k->is_loaded() && !UseCompressedClassPointers) {
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo);
++  } else {
++    Rtmp1 = op->tmp3()->as_register();
++    select_different_registers(obj, dst, k_RInfo, klass_RInfo, Rtmp1);
++  }
++
++  assert_different_registers(obj, k_RInfo, klass_RInfo);
++
++    if (should_profile) {
++      Label not_null;
++      __ cbnz(obj, not_null);
++      // Object is null; update MDO and exit
++      Register mdo  = klass_RInfo;
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address data_addr
++        = __ form_address(rscratch2, mdo,
++                          md->byte_offset_of_slot(data, DataLayout::flags_offset()),
++                          0);
++      __ ldrb(rscratch1, data_addr);
++      __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant());
++      __ strb(rscratch1, data_addr);
++      __ b(*obj_is_null);
++      __ BIND(not_null);
++    } else {
++      __ cbz(obj, *obj_is_null);
++    }
++
++  if (!k->is_loaded()) {
++    klass2reg_with_patching(k_RInfo, op->info_for_patch());
++  } else {
++    __ mov_metadata(k_RInfo, k->constant_encoding());
++  }
++  __ verify_oop(obj);
++
++  if (op->fast_check()) {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(rscratch1, obj);
++    __ cmp( rscratch1, k_RInfo);
++
++    __ br(Assembler::NE, *failure_target);
++    // successful cast, fall through to profile or jump
++  } else {
++    // get object class
++    // not a safepoint as obj null check happens earlier
++    __ load_klass(klass_RInfo, obj);
++    if (k->is_loaded()) {
++      // See if we get an immediate positive hit
++      __ ldr(rscratch1, Address(klass_RInfo, long(k->super_check_offset())));
++      __ cmp(k_RInfo, rscratch1);
++      if ((juint)in_bytes(Klass::secondary_super_cache_offset()) != k->super_check_offset()) {
++        __ br(Assembler::NE, *failure_target);
++        // successful cast, fall through to profile or jump
++      } else {
++        // See if we get an immediate positive hit
++        __ br(Assembler::EQ, *success_target);
++        // check for self
++        __ cmp(klass_RInfo, k_RInfo);
++        __ br(Assembler::EQ, *success_target);
++
++        __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
++        __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
++        __ ldr(klass_RInfo, Address(__ post(sp, 2 * wordSize)));
++        // result is a boolean
++        __ cbzw(klass_RInfo, *failure_target);
++        // successful cast, fall through to profile or jump
++      }
++    } else {
++      // perform the fast part of the checking logic
++      __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++      // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++      __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
++      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
++      __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
++      // result is a boolean
++      __ cbz(k_RInfo, *failure_target);
++      // successful cast, fall through to profile or jump
++    }
++  }
++  if (should_profile) {
++    Register mdo  = klass_RInfo, recv = k_RInfo;
++    __ BIND(profile_cast_success);
++    __ mov_metadata(mdo, md->constant_encoding());
++    __ load_klass(recv, obj);
++    Label update_done;
++    type_profile_helper(mdo, md, data, recv, success);
++    __ b(*success);
++
++    __ BIND(profile_cast_failure);
++    __ mov_metadata(mdo, md->constant_encoding());
++    Address counter_addr
++      = __ form_address(rscratch2, mdo,
++                        md->byte_offset_of_slot(data, CounterData::count_offset()),
++                        0);
++    __ ldr(rscratch1, counter_addr);
++    __ sub(rscratch1, rscratch1, DataLayout::counter_increment);
++    __ str(rscratch1, counter_addr);
++    __ b(*failure);
++  }
++  __ b(*success);
++}
++
++
++void LIR_Assembler::emit_opTypeCheck(LIR_OpTypeCheck* op) {
++  const bool should_profile = op->should_profile();
++
++  LIR_Code code = op->code();
++  if (code == lir_store_check) {
++    Register value = op->object()->as_register();
++    Register array = op->array()->as_register();
++    Register k_RInfo = op->tmp1()->as_register();
++    Register klass_RInfo = op->tmp2()->as_register();
++    Register Rtmp1 = op->tmp3()->as_register();
++
++    CodeStub* stub = op->stub();
++
++    // check if it needs to be profiled
++    ciMethodData* md;
++    ciProfileData* data;
++
++    if (should_profile) {
++      ciMethod* method = op->profiled_method();
++      assert(method != NULL, "Should have method");
++      int bci = op->profiled_bci();
++      md = method->method_data_or_null();
++      assert(md != NULL, "Sanity");
++      data = md->bci_to_data(bci);
++      assert(data != NULL,                "need data for type check");
++      assert(data->is_ReceiverTypeData(), "need ReceiverTypeData for type check");
++    }
++    Label profile_cast_success, profile_cast_failure, done;
++    Label *success_target = should_profile ? &profile_cast_success : &done;
++    Label *failure_target = should_profile ? &profile_cast_failure : stub->entry();
++
++    if (should_profile) {
++      Label not_null;
++      __ cbnz(value, not_null);
++      // Object is null; update MDO and exit
++      Register mdo  = klass_RInfo;
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address data_addr
++        = __ form_address(rscratch2, mdo,
++                          md->byte_offset_of_slot(data, DataLayout::flags_offset()),
++                          0);
++      __ ldrb(rscratch1, data_addr);
++      __ orr(rscratch1, rscratch1, BitData::null_seen_byte_constant());
++      __ strb(rscratch1, data_addr);
++      __ b(done);
++      __ BIND(not_null);
++    } else {
++      __ cbz(value, done);
++    }
++
++    add_debug_info_for_null_check_here(op->info_for_exception());
++    __ load_klass(k_RInfo, array);
++    __ load_klass(klass_RInfo, value);
++
++    // get instance klass (it's already uncompressed)
++    __ ldr(k_RInfo, Address(k_RInfo, ObjArrayKlass::element_klass_offset()));
++    // perform the fast part of the checking logic
++    __ check_klass_subtype_fast_path(klass_RInfo, k_RInfo, Rtmp1, success_target, failure_target, NULL);
++    // call out-of-line instance of __ check_klass_subtype_slow_path(...):
++    __ stp(klass_RInfo, k_RInfo, Address(__ pre(sp, -2 * wordSize)));
++    __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
++    __ ldp(k_RInfo, klass_RInfo, Address(__ post(sp, 2 * wordSize)));
++    // result is a boolean
++    __ cbzw(k_RInfo, *failure_target);
++    // fall through to the success case
++
++    if (should_profile) {
++      Register mdo  = klass_RInfo, recv = k_RInfo;
++      __ BIND(profile_cast_success);
++      __ mov_metadata(mdo, md->constant_encoding());
++      __ load_klass(recv, value);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &done);
++      __ b(done);
++
++      __ BIND(profile_cast_failure);
++      __ mov_metadata(mdo, md->constant_encoding());
++      Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++      __ lea(rscratch2, counter_addr);
++      __ ldr(rscratch1, Address(rscratch2));
++      __ sub(rscratch1, rscratch1, DataLayout::counter_increment);
++      __ str(rscratch1, Address(rscratch2));
++      __ b(*stub->entry());
++    }
++
++    __ BIND(done);
++  } else if (code == lir_checkcast) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success;
++    emit_typecheck_helper(op, &success, op->stub()->entry(), &success);
++    __ BIND(success);
++    if (dst != obj) {
++      __ mov(dst, obj);
++    }
++  } else if (code == lir_instanceof) {
++    Register obj = op->object()->as_register();
++    Register dst = op->result_opr()->as_register();
++    Label success, failure, done;
++    emit_typecheck_helper(op, &success, &failure, &failure);
++    __ BIND(failure);
++    __ mov(dst, zr);
++    __ b(done);
++    __ BIND(success);
++    __ mov(dst, 1);
++    __ BIND(done);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::casw(Register addr, Register newval, Register cmpval) {
++  __ cmpxchg(addr, cmpval, newval, Assembler::word, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1);
++  __ cset(rscratch1, Assembler::NE);
++  __ membar(__ AnyAny);
++}
++
++void LIR_Assembler::casl(Register addr, Register newval, Register cmpval) {
++  __ cmpxchg(addr, cmpval, newval, Assembler::xword, /* acquire*/ true, /* release*/ true, /* weak*/ false, rscratch1);
++  __ cset(rscratch1, Assembler::NE);
++  __ membar(__ AnyAny);
++}
++
++
++void LIR_Assembler::emit_compare_and_swap(LIR_OpCompareAndSwap* op) {
++  assert(VM_Version::supports_cx8(), "wrong machine");
++  Register addr;
++  if (op->addr()->is_register()) {
++    addr = as_reg(op->addr());
++  } else {
++    assert(op->addr()->is_address(), "what else?");
++    LIR_Address* addr_ptr = op->addr()->as_address_ptr();
++    assert(addr_ptr->disp() == 0, "need 0 disp");
++    assert(addr_ptr->index() == LIR_OprDesc::illegalOpr(), "need 0 index");
++    addr = as_reg(addr_ptr->base());
++  }
++  Register newval = as_reg(op->new_value());
++  Register cmpval = as_reg(op->cmp_value());
++  Label succeed, fail, around;
++
++  if (op->code() == lir_cas_obj) {
++    if (UseCompressedOops) {
++      Register t1 = op->tmp1()->as_register();
++      assert(op->tmp1()->is_valid(), "must be");
++      __ encode_heap_oop(t1, cmpval);
++      cmpval = t1;
++      __ encode_heap_oop(rscratch2, newval);
++      newval = rscratch2;
++      casw(addr, newval, cmpval);
++    } else {
++      casl(addr, newval, cmpval);
++    }
++  } else if (op->code() == lir_cas_int) {
++    casw(addr, newval, cmpval);
++  } else {
++    casl(addr, newval, cmpval);
++  }
++}
++
++
++void LIR_Assembler::cmove(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Opr result, BasicType type) {
++
++  Assembler::Condition acond, ncond;
++  switch (condition) {
++  case lir_cond_equal:        acond = Assembler::EQ; ncond = Assembler::NE; break;
++  case lir_cond_notEqual:     acond = Assembler::NE; ncond = Assembler::EQ; break;
++  case lir_cond_less:         acond = Assembler::LT; ncond = Assembler::GE; break;
++  case lir_cond_lessEqual:    acond = Assembler::LE; ncond = Assembler::GT; break;
++  case lir_cond_greaterEqual: acond = Assembler::GE; ncond = Assembler::LT; break;
++  case lir_cond_greater:      acond = Assembler::GT; ncond = Assembler::LE; break;
++  case lir_cond_belowEqual:
++  case lir_cond_aboveEqual:
++  default:                    ShouldNotReachHere();
++    acond = Assembler::EQ; ncond = Assembler::NE;  // unreachable
++  }
++
++  assert(result->is_single_cpu() || result->is_double_cpu(),
++         "expect single register for result");
++  if (opr1->is_constant() && opr2->is_constant()
++      && opr1->type() == T_INT && opr2->type() == T_INT) {
++    jint val1 = opr1->as_jint();
++    jint val2 = opr2->as_jint();
++    if (val1 == 0 && val2 == 1) {
++      __ cset(result->as_register(), ncond);
++      return;
++    } else if (val1 == 1 && val2 == 0) {
++      __ cset(result->as_register(), acond);
++      return;
++    }
++  }
++
++  if (opr1->is_constant() && opr2->is_constant()
++      && opr1->type() == T_LONG && opr2->type() == T_LONG) {
++    jlong val1 = opr1->as_jlong();
++    jlong val2 = opr2->as_jlong();
++    if (val1 == 0 && val2 == 1) {
++      __ cset(result->as_register_lo(), ncond);
++      return;
++    } else if (val1 == 1 && val2 == 0) {
++      __ cset(result->as_register_lo(), acond);
++      return;
++    }
++  }
++
++  if (opr1->is_stack()) {
++    stack2reg(opr1, FrameMap::rscratch1_opr, result->type());
++    opr1 = FrameMap::rscratch1_opr;
++  } else if (opr1->is_constant()) {
++    LIR_Opr tmp
++      = opr1->type() == T_LONG ? FrameMap::rscratch1_long_opr : FrameMap::rscratch1_opr;
++    const2reg(opr1, tmp, lir_patch_none, NULL);
++    opr1 = tmp;
++  }
++
++  if (opr2->is_stack()) {
++    stack2reg(opr2, FrameMap::rscratch2_opr, result->type());
++    opr2 = FrameMap::rscratch2_opr;
++  } else if (opr2->is_constant()) {
++    LIR_Opr tmp
++      = opr2->type() == T_LONG ? FrameMap::rscratch2_long_opr : FrameMap::rscratch2_opr;
++    const2reg(opr2, tmp, lir_patch_none, NULL);
++    opr2 = tmp;
++  }
++
++  if (result->type() == T_LONG)
++    __ csel(result->as_register_lo(), opr1->as_register_lo(), opr2->as_register_lo(), acond);
++  else
++    __ csel(result->as_register(), opr1->as_register(), opr2->as_register(), acond);
++}
++
++void LIR_Assembler::arith_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dest, CodeEmitInfo* info, bool pop_fpu_stack) {
++  assert(info == NULL, "should never be used, idiv/irem and ldiv/lrem not handled by this method");
++
++  if (left->is_single_cpu()) {
++    Register lreg = left->as_register();
++    Register dreg = as_reg(dest);
++
++    if (right->is_single_cpu()) {
++      // cpu register - cpu register
++
++      assert(left->type() == T_INT && right->type() == T_INT && dest->type() == T_INT,
++             "should be");
++      Register rreg = right->as_register();
++      switch (code) {
++      case lir_add: __ addw (dest->as_register(), lreg, rreg); break;
++      case lir_sub: __ subw (dest->as_register(), lreg, rreg); break;
++      case lir_mul: __ mulw (dest->as_register(), lreg, rreg); break;
++      default:      ShouldNotReachHere();
++      }
++
++    } else if (right->is_double_cpu()) {
++      Register rreg = right->as_register_lo();
++      // single_cpu + double_cpu: can happen with obj+long
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      switch (code) {
++      case lir_add: __ add(dreg, lreg, rreg); break;
++      case lir_sub: __ sub(dreg, lreg, rreg); break;
++      default: ShouldNotReachHere();
++      }
++    } else if (right->is_constant()) {
++      // cpu register - constant
++      jlong c;
++
++      // FIXME.  This is fugly: we really need to factor all this logic.
++      switch(right->type()) {
++      case T_LONG:
++        c = right->as_constant_ptr()->as_jlong();
++        break;
++      case T_INT:
++      case T_ADDRESS:
++        c = right->as_constant_ptr()->as_jint();
++        break;
++      default:
++        ShouldNotReachHere();
++        c = 0;  // unreachable
++        break;
++      }
++
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      if (c == 0 && dreg == lreg) {
++        COMMENT("effective nop elided");
++        return;
++      }
++      switch(left->type()) {
++      case T_INT:
++        switch (code) {
++        case lir_add: __ addw(dreg, c, lreg); break;
++        case lir_sub: __ subw(dreg, c, lreg); break;
++        default: ShouldNotReachHere();
++        }
++        break;
++      case T_OBJECT:
++      case T_ADDRESS:
++        switch (code) {
++        case lir_add: __ add(dreg, lreg, c); break;
++        case lir_sub: __ sub(dreg, lreg, c); break;
++        default: ShouldNotReachHere();
++        }
++        break;
++        ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++
++  } else if (left->is_double_cpu()) {
++    Register lreg_lo = left->as_register_lo();
++
++    if (right->is_double_cpu()) {
++      // cpu register - cpu register
++      Register rreg_lo = right->as_register_lo();
++      switch (code) {
++      case lir_add: __ add (dest->as_register_lo(), lreg_lo, rreg_lo); break;
++      case lir_sub: __ sub (dest->as_register_lo(), lreg_lo, rreg_lo); break;
++      case lir_mul: __ mul (dest->as_register_lo(), lreg_lo, rreg_lo); break;
++      case lir_div: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, false, rscratch1); break;
++      case lir_rem: __ corrected_idivq(dest->as_register_lo(), lreg_lo, rreg_lo, true, rscratch1); break;
++      default:
++        ShouldNotReachHere();
++      }
++
++    } else if (right->is_constant()) {
++      jlong c = right->as_constant_ptr()->as_jlong_bits();
++      Register dreg = as_reg(dest);
++      assert(code == lir_add || code == lir_sub, "mismatched arithmetic op");
++      if (c == 0 && dreg == lreg_lo) {
++        COMMENT("effective nop elided");
++        return;
++      }
++      switch (code) {
++        case lir_add: __ add(dreg, lreg_lo, c); break;
++        case lir_sub: __ sub(dreg, lreg_lo, c); break;
++        default:
++          ShouldNotReachHere();
++      }
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_fpu()) {
++    assert(right->is_single_fpu(), "right hand side of float arithmetics needs to be float register");
++    switch (code) {
++    case lir_add: __ fadds (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_sub: __ fsubs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_mul_strictfp: // fall through
++    case lir_mul: __ fmuls (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    case lir_div_strictfp: // fall through
++    case lir_div: __ fdivs (dest->as_float_reg(), left->as_float_reg(), right->as_float_reg()); break;
++    default:
++      ShouldNotReachHere();
++    }
++  } else if (left->is_double_fpu()) {
++    if (right->is_double_fpu()) {
++      // fpu register - fpu register
++      switch (code) {
++      case lir_add: __ faddd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_sub: __ fsubd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_mul_strictfp: // fall through
++      case lir_mul: __ fmuld (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      case lir_div_strictfp: // fall through
++      case lir_div: __ fdivd (dest->as_double_reg(), left->as_double_reg(), right->as_double_reg()); break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else {
++      if (right->is_constant()) {
++        ShouldNotReachHere();
++      }
++      ShouldNotReachHere();
++    }
++  } else if (left->is_single_stack() || left->is_address()) {
++    assert(left == dest, "left and dest must be equal");
++    ShouldNotReachHere();
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack) { Unimplemented(); }
++
++
++void LIR_Assembler::intrinsic_op(LIR_Code code, LIR_Opr value, LIR_Opr unused, LIR_Opr dest, LIR_Op* op) {
++  switch(code) {
++  case lir_abs : __ fabsd(dest->as_double_reg(), value->as_double_reg()); break;
++  case lir_sqrt: __ fsqrtd(dest->as_double_reg(), value->as_double_reg()); break;
++  default      : ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::logic_op(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst) {
++
++  assert(left->is_single_cpu() || left->is_double_cpu(), "expect single or double register");
++  Register Rleft = left->is_single_cpu() ? left->as_register() :
++                                           left->as_register_lo();
++   if (dst->is_single_cpu()) {
++     Register Rdst = dst->as_register();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and: __ andw (Rdst, Rleft, right->as_jint()); break;
++         case lir_logic_or:  __ orrw (Rdst, Rleft, right->as_jint()); break;
++         case lir_logic_xor: __ eorw (Rdst, Rleft, right->as_jint()); break;
++         default: ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() :
++                                                  right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ andw (Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __ orrw (Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ eorw (Rdst, Rleft, Rright); break;
++         default: ShouldNotReachHere(); break;
++       }
++     }
++   } else {
++     Register Rdst = dst->as_register_lo();
++     if (right->is_constant()) {
++       switch (code) {
++         case lir_logic_and: __ andr (Rdst, Rleft, right->as_jlong()); break;
++         case lir_logic_or:  __ orr (Rdst, Rleft, right->as_jlong()); break;
++         case lir_logic_xor: __ eor (Rdst, Rleft, right->as_jlong()); break;
++         default: ShouldNotReachHere(); break;
++       }
++     } else {
++       Register Rright = right->is_single_cpu() ? right->as_register() :
++                                                  right->as_register_lo();
++       switch (code) {
++         case lir_logic_and: __ andr (Rdst, Rleft, Rright); break;
++         case lir_logic_or:  __ orr (Rdst, Rleft, Rright); break;
++         case lir_logic_xor: __ eor (Rdst, Rleft, Rright); break;
++         default: ShouldNotReachHere(); break;
++       }
++     }
++   }
++}
++
++
++
++void LIR_Assembler::arithmetic_idiv(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr temp, LIR_Opr result, CodeEmitInfo* info) { Unimplemented(); }
++
++
++void LIR_Assembler::comp_op(LIR_Condition condition, LIR_Opr opr1, LIR_Opr opr2, LIR_Op2* op) {
++  if (opr1->is_constant() && opr2->is_single_cpu()) {
++    // tableswitch
++    Register reg = as_reg(opr2);
++    struct tableswitch &table = switches[opr1->as_constant_ptr()->as_jint()];
++    __ tableswitch(reg, table._first_key, table._last_key, table._branches, table._after);
++  } else if (opr1->is_single_cpu() || opr1->is_double_cpu()) {
++    Register reg1 = as_reg(opr1);
++    if (opr2->is_single_cpu()) {
++      // cpu register - cpu register
++      Register reg2 = opr2->as_register();
++      if (opr1->type() == T_OBJECT || opr1->type() == T_ARRAY) {
++        __ cmpoop(reg1, reg2);
++      } else {
++        assert(opr2->type() != T_OBJECT && opr2->type() != T_ARRAY, "cmp int, oop?");
++        __ cmpw(reg1, reg2);
++      }
++      return;
++    }
++    if (opr2->is_double_cpu()) {
++      // cpu register - cpu register
++      Register reg2 = opr2->as_register_lo();
++      __ cmp(reg1, reg2);
++      return;
++    }
++
++    if (opr2->is_constant()) {
++      bool is_32bit = false; // width of register operand
++      jlong imm;
++
++      switch(opr2->type()) {
++      case T_INT:
++        imm = opr2->as_constant_ptr()->as_jint();
++        is_32bit = true;
++        break;
++      case T_LONG:
++        imm = opr2->as_constant_ptr()->as_jlong();
++        break;
++      case T_ADDRESS:
++        imm = opr2->as_constant_ptr()->as_jint();
++        break;
++      case T_METADATA:
++        imm = (intptr_t)(opr2->as_constant_ptr()->as_metadata());
++        break;
++      case T_OBJECT:
++      case T_ARRAY:
++        jobject2reg(opr2->as_constant_ptr()->as_jobject(), rscratch1);
++        __ cmpoop(reg1, rscratch1);
++        return;
++      default:
++        ShouldNotReachHere();
++        imm = 0;  // unreachable
++        break;
++      }
++
++      if (Assembler::operand_valid_for_add_sub_immediate(imm)) {
++        if (is_32bit)
++          __ cmpw(reg1, imm);
++        else
++          __ cmp(reg1, imm);
++        return;
++      } else {
++        __ mov(rscratch1, imm);
++        if (is_32bit)
++          __ cmpw(reg1, rscratch1);
++        else
++          __ cmp(reg1, rscratch1);
++        return;
++      }
++    } else
++      ShouldNotReachHere();
++  } else if (opr1->is_single_fpu()) {
++    FloatRegister reg1 = opr1->as_float_reg();
++    assert(opr2->is_single_fpu(), "expect single float register");
++    FloatRegister reg2 = opr2->as_float_reg();
++    __ fcmps(reg1, reg2);
++  } else if (opr1->is_double_fpu()) {
++    FloatRegister reg1 = opr1->as_double_reg();
++    assert(opr2->is_double_fpu(), "expect double float register");
++    FloatRegister reg2 = opr2->as_double_reg();
++    __ fcmpd(reg1, reg2);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void LIR_Assembler::comp_fl2i(LIR_Code code, LIR_Opr left, LIR_Opr right, LIR_Opr dst, LIR_Op2* op){
++  if (code == lir_cmp_fd2i || code == lir_ucmp_fd2i) {
++    bool is_unordered_less = (code == lir_ucmp_fd2i);
++    if (left->is_single_fpu()) {
++      __ float_cmp(true, is_unordered_less ? -1 : 1, left->as_float_reg(), right->as_float_reg(), dst->as_register());
++    } else if (left->is_double_fpu()) {
++      __ float_cmp(false, is_unordered_less ? -1 : 1, left->as_double_reg(), right->as_double_reg(), dst->as_register());
++    } else {
++      ShouldNotReachHere();
++    }
++  } else if (code == lir_cmp_l2i) {
++    Label done;
++    __ cmp(left->as_register_lo(), right->as_register_lo());
++    __ mov(dst->as_register(), (u_int64_t)-1L);
++    __ br(Assembler::LT, done);
++    __ csinc(dst->as_register(), zr, zr, Assembler::EQ);
++    __ BIND(done);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++void LIR_Assembler::align_call(LIR_Code code) {  }
++
++
++void LIR_Assembler::call(LIR_OpJavaCall* op, relocInfo::relocType rtype) {
++  address call = __ trampoline_call(Address(op->addr(), rtype));
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++
++void LIR_Assembler::ic_call(LIR_OpJavaCall* op) {
++  address call = __ ic_call(op->addr());
++  if (call == NULL) {
++    bailout("trampoline stub overflow");
++    return;
++  }
++  add_call_info(code_offset(), op->info());
++}
++
++
++/* Currently, vtable-dispatch is only enabled for sparc platforms */
++void LIR_Assembler::vtable_call(LIR_OpJavaCall* op) {
++  ShouldNotReachHere();
++}
++
++
++void LIR_Assembler::emit_static_call_stub() {
++  address call_pc = __ pc();
++  address stub = __ start_a_stub(call_stub_size());
++  if (stub == NULL) {
++    bailout("static call stub overflow");
++    return;
++  }
++
++  int start = __ offset();
++
++  __ relocate(static_stub_Relocation::spec(call_pc));
++  __ emit_static_call_stub();
++
++  assert(__ offset() - start + CompiledStaticCall::to_trampoline_stub_size()
++        <= call_stub_size(), "stub too big");
++  __ end_a_stub();
++}
++
++
++void LIR_Assembler::throw_op(LIR_Opr exceptionPC, LIR_Opr exceptionOop, CodeEmitInfo* info) {
++  assert(exceptionOop->as_register() == i0, "must match");
++  assert(exceptionPC->as_register() == i3, "must match");
++
++  // exception object is not added to oop map by LinearScan
++  // (LinearScan assumes that no oops are in fixed registers)
++  info->add_register_oop(exceptionOop);
++  Runtime1::StubID unwind_id;
++
++  // get current pc information
++  // pc is only needed if the method has an exception handler, the unwind code does not need it.
++  int pc_for_athrow_offset = __ offset();
++  InternalAddress pc_for_athrow(__ pc());
++  __ adr(exceptionPC->as_register(), pc_for_athrow);
++  add_call_info(pc_for_athrow_offset, info); // for exception handler
++
++  __ verify_not_null_oop(i0);
++  // search an exception handler (i0: exception oop, r3: throwing pc)
++  if (compilation()->has_fpu_code()) {
++    unwind_id = Runtime1::handle_exception_id;
++  } else {
++    unwind_id = Runtime1::handle_exception_nofpu_id;
++  }
++  __ far_call(RuntimeAddress(Runtime1::entry_for(unwind_id)));
++
++  // FIXME: enough room for two byte trap   ????
++  __ nop();
++}
++
++
++void LIR_Assembler::unwind_op(LIR_Opr exceptionOop) {
++  assert(exceptionOop->as_register() == i0, "must match");
++
++  __ b(_unwind_handler_entry);
++}
++
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, LIR_Opr count, LIR_Opr dest, LIR_Opr tmp) {
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++      case lir_shl:  __ lslvw (dreg, lreg, count->as_register()); break;
++      case lir_shr:  __ asrvw (dreg, lreg, count->as_register()); break;
++      case lir_ushr: __ lsrvw (dreg, lreg, count->as_register()); break;
++      default:
++        ShouldNotReachHere();
++        break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++      case lir_shl:  __ lslv (dreg, lreg, count->as_register()); break;
++      case lir_shr:  __ asrv (dreg, lreg, count->as_register()); break;
++      case lir_ushr: __ lsrv (dreg, lreg, count->as_register()); break;
++      default:
++        ShouldNotReachHere();
++        break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++
++void LIR_Assembler::shift_op(LIR_Code code, LIR_Opr left, jint count, LIR_Opr dest) {
++  Register dreg = dest->is_single_cpu() ? dest->as_register() : dest->as_register_lo();
++  Register lreg = left->is_single_cpu() ? left->as_register() : left->as_register_lo();
++
++  switch (left->type()) {
++    case T_INT: {
++      switch (code) {
++      case lir_shl:  __ lslw (dreg, lreg, count); break;
++      case lir_shr:  __ asrw (dreg, lreg, count); break;
++      case lir_ushr: __ lsrw (dreg, lreg, count); break;
++      default:
++        ShouldNotReachHere();
++        break;
++      }
++      break;
++    case T_LONG:
++    case T_ADDRESS:
++    case T_OBJECT:
++      switch (code) {
++      case lir_shl:  __ lsl (dreg, lreg, count); break;
++      case lir_shr:  __ asr (dreg, lreg, count); break;
++      case lir_ushr: __ lsr (dreg, lreg, count); break;
++      default:
++        ShouldNotReachHere();
++        break;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++}
++
++
++void LIR_Assembler::store_parameter(Register r, int offset_from_rsp_in_words) {
++  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
++  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
++  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ str (r, Address(sp, offset_from_rsp_in_bytes));
++}
++
++
++void LIR_Assembler::store_parameter(jint c,     int offset_from_rsp_in_words) {
++  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
++  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
++  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ mov (rscratch1, c);
++  __ str (rscratch1, Address(sp, offset_from_rsp_in_bytes));
++}
++
++
++void LIR_Assembler::store_parameter(jobject o,  int offset_from_rsp_in_words) {
++  ShouldNotReachHere();
++  assert(offset_from_rsp_in_words >= 0, "invalid offset from rsp");
++  int offset_from_rsp_in_bytes = offset_from_rsp_in_words * BytesPerWord;
++  assert(offset_from_rsp_in_bytes < frame_map()->reserved_argument_area_size(), "invalid offset");
++  __ lea(rscratch1, __ constant_oop_address(o));
++  __ str(rscratch1, Address(sp, offset_from_rsp_in_bytes));
++}
++
++
++// This code replaces a call to arraycopy; no exception may
++// be thrown in this code, they must be thrown in the System.arraycopy
++// activation frame; we could save some checks if this would not be the case
++void LIR_Assembler::emit_arraycopy(LIR_OpArrayCopy* op) {
++  ciArrayKlass* default_type = op->expected_type();
++  Register src = op->src()->as_register();
++  Register dst = op->dst()->as_register();
++  Register src_pos = op->src_pos()->as_register();
++  Register dst_pos = op->dst_pos()->as_register();
++  Register length  = op->length()->as_register();
++  Register tmp = op->tmp()->as_register();
++
++  CodeStub* stub = op->stub();
++  int flags = op->flags();
++  BasicType basic_type = default_type != NULL ? default_type->element_type()->basic_type() : T_ILLEGAL;
++  if (basic_type == T_ARRAY) basic_type = T_OBJECT;
++
++  // if we don't know anything, just go through the generic arraycopy
++  if (default_type == NULL // || basic_type == T_OBJECT
++      ) {
++    Label done;
++    assert(src == i1 && src_pos == i2, "mismatch in calling convention");
++
++    // Save the arguments in case the generic arraycopy fails and we
++    // have to fall back to the JNI stub
++    __ stp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
++    __ stp(length,  src_pos, Address(sp, 2*BytesPerWord));
++    __ str(src,              Address(sp, 4*BytesPerWord));
++
++    address copyfunc_addr = StubRoutines::generic_arraycopy();
++    assert(copyfunc_addr != NULL, "generic arraycopy stub required");
++
++    // The arguments are in java calling convention so we shift them
++    // to C convention
++    assert_different_registers(c_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4);
++    __ mov(c_rarg0, j_rarg0);
++    assert_different_registers(c_rarg1, j_rarg2, j_rarg3, j_rarg4);
++    __ mov(c_rarg1, j_rarg1);
++    assert_different_registers(c_rarg2, j_rarg3, j_rarg4);
++    __ mov(c_rarg2, j_rarg2);
++    assert_different_registers(c_rarg3, j_rarg4);
++    __ mov(c_rarg3, j_rarg3);
++    __ mov(c_rarg4, j_rarg4);
++#ifndef PRODUCT
++    if (PrintC1Statistics) {
++      __ incrementw(ExternalAddress((address)&Runtime1::_generic_arraycopystub_cnt));
++    }
++#endif
++    __ far_call(RuntimeAddress(copyfunc_addr));
++
++    __ cbz(i0, *stub->continuation());
++
++    // Reload values from the stack so they are where the stub
++    // expects them.
++    __ ldp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
++    __ ldp(length,  src_pos, Address(sp, 2*BytesPerWord));
++    __ ldr(src,              Address(sp, 4*BytesPerWord));
++
++    // i0 is -1^K where K == partial copied count
++    __ eonw(rscratch1, i0, zr);
++    // adjust length down and src/end pos up by partial copied count
++    __ subw(length, length, rscratch1);
++    __ addw(src_pos, src_pos, rscratch1);
++    __ addw(dst_pos, dst_pos, rscratch1);
++    __ b(*stub->entry());
++
++    __ BIND(*stub->continuation());
++    return;
++  }
++
++  assert(default_type != NULL && default_type->is_array_klass() && default_type->is_loaded(), "must be true at this point");
++
++  int elem_size = type2aelembytes(basic_type);
++  int shift_amount;
++  int scale = exact_log2(elem_size);
++
++  Address src_length_addr = Address(src, arrayOopDesc::length_offset_in_bytes());
++  Address dst_length_addr = Address(dst, arrayOopDesc::length_offset_in_bytes());
++  Address src_klass_addr = Address(src, oopDesc::klass_offset_in_bytes());
++  Address dst_klass_addr = Address(dst, oopDesc::klass_offset_in_bytes());
++
++  // test for NULL
++  if (flags & LIR_OpArrayCopy::src_null_check) {
++    __ cbz(src, *stub->entry());
++  }
++  if (flags & LIR_OpArrayCopy::dst_null_check) {
++    __ cbz(dst, *stub->entry());
++  }
++
++  // If the compiler was not able to prove that exact type of the source or the destination
++  // of the arraycopy is an array type, check at runtime if the source or the destination is
++  // an instance type.
++  if (flags & LIR_OpArrayCopy::type_check) {
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::dst_objarray)) {
++      __ load_klass(tmp, dst);
++      __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ cmpw(rscratch1, Klass::_lh_neutral_value);
++      __ br(Assembler::GE, *stub->entry());
++    }
++
++    if (!(flags & LIR_OpArrayCopy::LIR_OpArrayCopy::src_objarray)) {
++      __ load_klass(tmp, src);
++      __ ldrw(rscratch1, Address(tmp, in_bytes(Klass::layout_helper_offset())));
++      __ cmpw(rscratch1, Klass::_lh_neutral_value);
++      __ br(Assembler::GE, *stub->entry());
++    }
++  }
++
++  // check if negative
++  if (flags & LIR_OpArrayCopy::src_pos_positive_check) {
++    __ cmpw(src_pos, 0);
++    __ br(Assembler::LT, *stub->entry());
++  }
++  if (flags & LIR_OpArrayCopy::dst_pos_positive_check) {
++    __ cmpw(dst_pos, 0);
++    __ br(Assembler::LT, *stub->entry());
++  }
++
++  if (flags & LIR_OpArrayCopy::length_positive_check) {
++    __ cmpw(length, 0);
++    __ br(Assembler::LT, *stub->entry());
++  }
++
++  if (flags & LIR_OpArrayCopy::src_range_check) {
++    __ addw(tmp, src_pos, length);
++    __ ldrw(rscratch1, src_length_addr);
++    __ cmpw(tmp, rscratch1);
++    __ br(Assembler::HI, *stub->entry());
++  }
++  if (flags & LIR_OpArrayCopy::dst_range_check) {
++    __ addw(tmp, dst_pos, length);
++    __ ldrw(rscratch1, dst_length_addr);
++    __ cmpw(tmp, rscratch1);
++    __ br(Assembler::HI, *stub->entry());
++  }
++
++  if (flags & LIR_OpArrayCopy::type_check) {
++    // We don't know the array types are compatible
++    if (basic_type != T_OBJECT) {
++      // Simple test for basic type arrays
++      if (UseCompressedClassPointers) {
++        __ ldrw(tmp, src_klass_addr);
++        __ ldrw(rscratch1, dst_klass_addr);
++        __ cmpw(tmp, rscratch1);
++      } else {
++        __ ldr(tmp, src_klass_addr);
++        __ ldr(rscratch1, dst_klass_addr);
++        __ cmp(tmp, rscratch1);
++      }
++      __ br(Assembler::NE, *stub->entry());
++    } else {
++      // For object arrays, if src is a sub class of dst then we can
++      // safely do the copy.
++      Label cont, slow;
++
++#define PUSH(i1, i2)                                    \
++      stp(i1, i2, __ pre(sp, -2 * wordSize));
++
++#define POP(i1, i2)                                     \
++      ldp(i1, i2, __ post(sp, 2 * wordSize));
++
++      __ PUSH(src, dst);
++
++      __ load_klass(src, src);
++      __ load_klass(dst, dst);
++
++      __ check_klass_subtype_fast_path(src, dst, tmp, &cont, &slow, NULL);
++
++      __ PUSH(src, dst);
++      __ far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::slow_subtype_check_id)));
++      __ POP(src, dst);
++
++      __ cbnz(src, cont);
++
++      __ BIND(slow);
++      __ POP(src, dst);
++
++      address copyfunc_addr = StubRoutines::checkcast_arraycopy();
++      if (copyfunc_addr != NULL) { // use stub if available
++        // src is not a sub class of dst so we have to do a
++        // per-element check.
++
++        int mask = LIR_OpArrayCopy::src_objarray|LIR_OpArrayCopy::dst_objarray;
++        if ((flags & mask) != mask) {
++          // Check that at least both of them object arrays.
++          assert(flags & mask, "one of the two should be known to be an object array");
++
++          if (!(flags & LIR_OpArrayCopy::src_objarray)) {
++            __ load_klass(tmp, src);
++          } else if (!(flags & LIR_OpArrayCopy::dst_objarray)) {
++            __ load_klass(tmp, dst);
++          }
++          int lh_offset = in_bytes(Klass::layout_helper_offset());
++          Address klass_lh_addr(tmp, lh_offset);
++          jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++          __ ldrw(rscratch1, klass_lh_addr);
++          __ mov(rscratch2, objArray_lh);
++          __ eorw(rscratch1, rscratch1, rscratch2);
++          __ cbnzw(rscratch1, *stub->entry());
++        }
++
++       // Spill because stubs can use any register they like and it's
++       // easier to restore just those that we care about.
++        __ stp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
++        __ stp(length,  src_pos, Address(sp, 2*BytesPerWord));
++        __ str(src,              Address(sp, 4*BytesPerWord));
++
++        __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale)));
++        __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(c_rarg0, dst, dst_pos, length);
++        __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale)));
++        __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
++        assert_different_registers(c_rarg1, dst, length);
++        __ uxtw(c_rarg2, length);
++        assert_different_registers(c_rarg2, dst);
++
++        __ load_klass(c_rarg4, dst);
++        __ ldr(c_rarg4, Address(c_rarg4, ObjArrayKlass::element_klass_offset()));
++        __ ldrw(c_rarg3, Address(c_rarg4, Klass::super_check_offset_offset()));
++        __ far_call(RuntimeAddress(copyfunc_addr));
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          Label failed;
++          __ cbnz(i0, failed);
++          __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_cnt));
++          __ BIND(failed);
++        }
++#endif
++
++        __ cbz(i0, *stub->continuation());
++
++#ifndef PRODUCT
++        if (PrintC1Statistics) {
++          __ incrementw(ExternalAddress((address)&Runtime1::_arraycopy_checkcast_attempt_cnt));
++        }
++#endif
++        assert_different_registers(dst, dst_pos, length, src_pos, src, i0, rscratch1);
++
++        // Restore previously spilled arguments
++        __ ldp(dst,     dst_pos, Address(sp, 0*BytesPerWord));
++        __ ldp(length,  src_pos, Address(sp, 2*BytesPerWord));
++        __ ldr(src,              Address(sp, 4*BytesPerWord));
++
++        // return value is -1^K where K is partial copied count
++        __ eonw(rscratch1, i0, zr);
++        // adjust length down and src/end pos up by partial copied count
++        __ subw(length, length, rscratch1);
++        __ addw(src_pos, src_pos, rscratch1);
++        __ addw(dst_pos, dst_pos, rscratch1);
++      }
++
++      __ b(*stub->entry());
++
++      __ BIND(cont);
++      __ POP(src, dst);
++    }
++  }
++
++#ifdef ASSERT
++  if (basic_type != T_OBJECT || !(flags & LIR_OpArrayCopy::type_check)) {
++    // Sanity check the known type with the incoming class.  For the
++    // primitive case the types must match exactly with src.klass and
++    // dst.klass each exactly matching the default type.  For the
++    // object array case, if no type check is needed then either the
++    // dst type is exactly the expected type and the src type is a
++    // subtype which we can't check or src is the same array as dst
++    // but not necessarily exactly of type default_type.
++    Label known_ok, halt;
++    __ mov_metadata(tmp, default_type->constant_encoding());
++    if (UseCompressedClassPointers) {
++      __ encode_klass_not_null(tmp);
++    }
++
++    if (basic_type != T_OBJECT) {
++
++      if (UseCompressedClassPointers) {
++        __ ldrw(rscratch1, dst_klass_addr);
++        __ cmpw(tmp, rscratch1);
++      } else {
++        __ ldr(rscratch1, dst_klass_addr);
++        __ cmp(tmp, rscratch1);
++      }
++      __ br(Assembler::NE, halt);
++      if (UseCompressedClassPointers) {
++        __ ldrw(rscratch1, src_klass_addr);
++        __ cmpw(tmp, rscratch1);
++      } else {
++        __ ldr(rscratch1, src_klass_addr);
++        __ cmp(tmp, rscratch1);
++      }
++      __ br(Assembler::EQ, known_ok);
++    } else {
++      if (UseCompressedClassPointers) {
++        __ ldrw(rscratch1, dst_klass_addr);
++        __ cmpw(tmp, rscratch1);
++      } else {
++        __ ldr(rscratch1, dst_klass_addr);
++        __ cmp(tmp, rscratch1);
++      }
++      __ br(Assembler::EQ, known_ok);
++      __ cmp(src, dst);
++      __ br(Assembler::EQ, known_ok);
++    }
++    __ BIND(halt);
++    __ stop("incorrect type information in arraycopy");
++    __ BIND(known_ok);
++  }
++#endif
++
++#ifndef PRODUCT
++  if (PrintC1Statistics) {
++    __ incrementw(ExternalAddress(Runtime1::arraycopy_count_address(basic_type)));
++  }
++#endif
++
++  __ lea(c_rarg0, Address(src, src_pos, Address::uxtw(scale)));
++  __ add(c_rarg0, c_rarg0, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(c_rarg0, dst, dst_pos, length);
++  __ lea(c_rarg1, Address(dst, dst_pos, Address::uxtw(scale)));
++  __ add(c_rarg1, c_rarg1, arrayOopDesc::base_offset_in_bytes(basic_type));
++  assert_different_registers(c_rarg1, dst, length);
++  __ uxtw(c_rarg2, length);
++  assert_different_registers(c_rarg2, dst);
++
++  bool disjoint = (flags & LIR_OpArrayCopy::overlapping) == 0;
++  bool aligned = (flags & LIR_OpArrayCopy::unaligned) == 0;
++  const char *name;
++  address entry = StubRoutines::select_arraycopy_function(basic_type, aligned, disjoint, name, false);
++
++ CodeBlob *cb = CodeCache::find_blob(entry);
++ if (cb) {
++   __ far_call(RuntimeAddress(entry));
++ } else {
++   __ call_VM_leaf(entry, 3);
++ }
++
++  __ BIND(*stub->continuation());
++}
++
++
++
++
++void LIR_Assembler::emit_lock(LIR_OpLock* op) {
++  Register obj = op->obj_opr()->as_register();  // may not be an oop
++  Register hdr = op->hdr_opr()->as_register();
++  Register lock = op->lock_opr()->as_register();
++  if (!UseFastLocking) {
++    __ b(*op->stub()->entry());
++  } else if (op->code() == lir_lock) {
++    Register scratch = noreg;
++    if (UseBiasedLocking) {
++      scratch = op->scratch_opr()->as_register();
++    }
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
++    // add debug info for NullPointerException only if one is possible
++    int null_check_offset = __ lock_object(hdr, obj, lock, scratch, *op->stub()->entry());
++    if (op->info() != NULL) {
++      add_debug_info_for_null_check(null_check_offset, op->info());
++    }
++    // done
++  } else if (op->code() == lir_unlock) {
++    assert(BasicLock::displaced_header_offset_in_bytes() == 0, "lock_reg must point to the displaced header");
++    __ unlock_object(hdr, obj, lock, *op->stub()->entry());
++  } else {
++    Unimplemented();
++  }
++  __ BIND(*op->stub()->continuation());
++}
++
++
++void LIR_Assembler::emit_profile_call(LIR_OpProfileCall* op) {
++  ciMethod* method = op->profiled_method();
++  int bci          = op->profiled_bci();
++  ciMethod* callee = op->profiled_callee();
++
++  // Update counter for all call types
++  ciMethodData* md = method->method_data_or_null();
++  assert(md != NULL, "Sanity");
++  ciProfileData* data = md->bci_to_data(bci);
++  assert(data != NULL && data->is_CounterData(), "need CounterData for calls");
++  assert(op->mdo()->is_single_cpu(),  "mdo must be allocated");
++  Register mdo  = op->mdo()->as_register();
++  __ mov_metadata(mdo, md->constant_encoding());
++  Address counter_addr(mdo, md->byte_offset_of_slot(data, CounterData::count_offset()));
++  // Perform additional virtual call profiling for invokevirtual and
++  // invokeinterface bytecodes
++  if (op->should_profile_receiver_type()) {
++    assert(op->recv()->is_single_cpu(), "recv must be allocated");
++    Register recv = op->recv()->as_register();
++    assert_different_registers(mdo, recv);
++    assert(data->is_VirtualCallData(), "need VirtualCallData for virtual calls");
++    ciKlass* known_klass = op->known_holder();
++    if (C1OptimizeVirtualCallProfiling && known_klass != NULL) {
++      // We know the type that will be seen at this call site; we can
++      // statically update the MethodData* rather than needing to do
++      // dynamic tests on the receiver type
++
++      // NOTE: we should probably put a lock around this search to
++      // avoid collisions by concurrent compilations
++      ciVirtualCallData* vc_data = (ciVirtualCallData*) data;
++      uint i;
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (known_klass->equals(receiver)) {
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ addptr(data_addr, DataLayout::counter_increment);
++          return;
++        }
++      }
++
++      // Receiver type not found in profile data; select an empty slot
++
++      // Note that this is less efficient than it should be because it
++      // always does a write to the receiver part of the
++      // VirtualCallData rather than just the first time
++      for (i = 0; i < VirtualCallData::row_limit(); i++) {
++        ciKlass* receiver = vc_data->receiver(i);
++        if (receiver == NULL) {
++          Address recv_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_offset(i)));
++          __ mov_metadata(rscratch1, known_klass->constant_encoding());
++          __ lea(rscratch2, recv_addr);
++          __ str(rscratch1, Address(rscratch2));
++          Address data_addr(mdo, md->byte_offset_of_slot(data, VirtualCallData::receiver_count_offset(i)));
++          __ addptr(data_addr, DataLayout::counter_increment);
++          return;
++        }
++      }
++    } else {
++      __ load_klass(recv, recv);
++      Label update_done;
++      type_profile_helper(mdo, md, data, recv, &update_done);
++      // Receiver did not match any saved receiver and there is no empty row for it.
++      // Increment total counter to indicate polymorphic case.
++      __ addptr(counter_addr, DataLayout::counter_increment);
++
++      __ BIND(update_done);
++    }
++  } else {
++    // Static call
++    __ addptr(counter_addr, DataLayout::counter_increment);
++  }
++}
++
++
++void LIR_Assembler::emit_delay(LIR_OpDelay*) {
++  Unimplemented();
++}
++
++
++void LIR_Assembler::monitor_address(int monitor_no, LIR_Opr dst) {
++  __ lea(dst->as_register(), frame_map()->address_for_monitor_lock(monitor_no));
++}
++
++void LIR_Assembler::emit_updatecrc32(LIR_OpUpdateCRC32* op) {
++  assert(op->crc()->is_single_cpu(),  "crc must be register");
++  assert(op->val()->is_single_cpu(),  "byte value must be register");
++  assert(op->result_opr()->is_single_cpu(), "result must be register");
++  Register crc = op->crc()->as_register();
++  Register val = op->val()->as_register();
++  Register res = op->result_opr()->as_register();
++
++  assert_different_registers(val, crc, res);
++  unsigned long offset;
++  __ adrp(res, ExternalAddress(StubRoutines::crc_table_addr()), offset);
++  if (offset) __ add(res, res, offset);
++
++  __ mvnw(crc, crc); // ~crc
++  __ update_byte_crc32(crc, val, res);
++  __ mvnw(res, crc); // ~crc
++}
++
++void LIR_Assembler::emit_profile_type(LIR_OpProfileType* op) {
++  COMMENT("emit_profile_type {");
++  Register obj = op->obj()->as_register();
++  Register tmp = op->tmp()->as_pointer_register();
++  Address mdo_addr = as_Address(op->mdp()->as_address_ptr());
++  ciKlass* exact_klass = op->exact_klass();
++  intptr_t current_klass = op->current_klass();
++  bool not_null = op->not_null();
++  bool no_conflict = op->no_conflict();
++
++  Label update, next, none;
++
++  bool do_null = !not_null;
++  bool exact_klass_set = exact_klass != NULL && ciTypeEntries::valid_ciklass(current_klass) == exact_klass;
++  bool do_update = !TypeEntries::is_type_unknown(current_klass) && !exact_klass_set;
++
++  assert(do_null || do_update, "why are we here?");
++  assert(!TypeEntries::was_null_seen(current_klass) || do_update, "why are we here?");
++  assert(mdo_addr.base() != rscratch1, "wrong register");
++
++  __ verify_oop(obj);
++
++  if (tmp != obj) {
++    __ mov(tmp, obj);
++  }
++  if (do_null) {
++    __ cbnz(tmp, update);
++    if (!TypeEntries::was_null_seen(current_klass)) {
++      __ ldr(rscratch2, mdo_addr);
++      __ orr(rscratch2, rscratch2, TypeEntries::null_seen);
++      __ str(rscratch2, mdo_addr);
++    }
++    if (do_update) {
++#ifndef ASSERT
++      __ b(next);
++    }
++#else
++      __ b(next);
++    }
++  } else {
++    __ cbnz(tmp, update);
++    __ stop("unexpected null obj");
++#endif
++  }
++
++  __ BIND(update);
++
++  if (do_update) {
++#ifdef ASSERT
++    if (exact_klass != NULL) {
++      Label ok;
++      __ load_klass(tmp, tmp);
++      __ mov_metadata(rscratch1, exact_klass->constant_encoding());
++      __ eor(rscratch1, tmp, rscratch1);
++      __ cbz(rscratch1, ok);
++      __ stop("exact klass and actual klass differ");
++      __ BIND(ok);
++    }
++#endif
++    if (!no_conflict) {
++      if (exact_klass == NULL || TypeEntries::is_type_none(current_klass)) {
++        if (exact_klass != NULL) {
++          __ mov_metadata(tmp, exact_klass->constant_encoding());
++        } else {
++          __ load_klass(tmp, tmp);
++        }
++
++        __ ldr(rscratch2, mdo_addr);
++        __ eor(tmp, tmp, rscratch2);
++        __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
++        // klass seen before, nothing to do. The unknown bit may have been
++        // set already but no need to check.
++        __ cbz(rscratch1, next);
++
++        __ tbnz(tmp, exact_log2(TypeEntries::type_unknown), next); // already unknown. Nothing to do anymore.
++
++        if (TypeEntries::is_type_none(current_klass)) {
++          __ cbz(rscratch2, none);
++          __ cmp(rscratch2, TypeEntries::null_seen);
++          __ br(Assembler::EQ, none);
++          // There is a chance that the checks above (re-reading profiling
++          // data from memory) fail if another thread has just set the
++          // profiling to this obj's klass
++          __ dmb(Assembler::ISHLD);
++          __ ldr(rscratch2, mdo_addr);
++          __ eor(tmp, tmp, rscratch2);
++          __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
++          __ cbz(rscratch1, next);
++        }
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "conflict only");
++
++        __ ldr(tmp, mdo_addr);
++        __ tbnz(tmp, exact_log2(TypeEntries::type_unknown), next); // already unknown. Nothing to do anymore.
++      }
++
++      // different than before. Cannot keep accurate profile.
++      __ ldr(rscratch2, mdo_addr);
++      __ orr(rscratch2, rscratch2, TypeEntries::type_unknown);
++      __ str(rscratch2, mdo_addr);
++
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ b(next);
++
++        __ BIND(none);
++        // first time here. Set profile type.
++        __ str(tmp, mdo_addr);
++      }
++    } else {
++      // There's a single possible klass at this profile point
++      assert(exact_klass != NULL, "should be");
++      if (TypeEntries::is_type_none(current_klass)) {
++        __ mov_metadata(tmp, exact_klass->constant_encoding());
++        __ ldr(rscratch2, mdo_addr);
++        __ eor(tmp, tmp, rscratch2);
++        __ andr(rscratch1, tmp, TypeEntries::type_klass_mask);
++        __ cbz(rscratch1, next);
++#ifdef ASSERT
++        {
++          Label ok;
++          __ ldr(rscratch1, mdo_addr);
++          __ cbz(rscratch1, ok);
++          __ cmp(rscratch1, TypeEntries::null_seen);
++          __ br(Assembler::EQ, ok);
++          // may have been set by another thread
++          __ dmb(Assembler::ISHLD);
++          __ mov_metadata(rscratch1, exact_klass->constant_encoding());
++          __ ldr(rscratch2, mdo_addr);
++          __ eor(rscratch2, rscratch1, rscratch2);
++          __ andr(rscratch2, rscratch2, TypeEntries::type_mask);
++          __ cbz(rscratch2, ok);
++
++          __ stop("unexpected profiling mismatch");
++          __ BIND(ok);
++        }
++#endif
++        // first time here. Set profile type.
++        __ ldr(tmp, mdo_addr);
++      } else {
++        assert(ciTypeEntries::valid_ciklass(current_klass) != NULL &&
++               ciTypeEntries::valid_ciklass(current_klass) != exact_klass, "inconsistent");
++
++        __ ldr(tmp, mdo_addr);
++        __ tbnz(tmp, exact_log2(TypeEntries::type_unknown), next); // already unknown. Nothing to do anymore.
++
++        __ orr(tmp, tmp, TypeEntries::type_unknown);
++        __ str(tmp, mdo_addr);
++        // FIXME: Write barrier needed here?
++      }
++    }
++
++    __ BIND(next);
++  }
++  COMMENT("} emit_profile_type");
++}
++
++
++void LIR_Assembler::align_backward_branch_target() {
++}
++
++
++void LIR_Assembler::negate(LIR_Opr left, LIR_Opr dest, LIR_Opr tmp) {
++  // tmp must be unused
++  assert(tmp->is_illegal(), "wasting a register if tmp is allocated");
++
++  if (left->is_single_cpu()) {
++    assert(dest->is_single_cpu(), "expect single result reg");
++    __ negw(dest->as_register(), left->as_register());
++  } else if (left->is_double_cpu()) {
++    assert(dest->is_double_cpu(), "expect double result reg");
++    __ neg(dest->as_register_lo(), left->as_register_lo());
++  } else if (left->is_single_fpu()) {
++    assert(dest->is_single_fpu(), "expect single float result reg");
++    __ fnegs(dest->as_float_reg(), left->as_float_reg());
++  } else {
++    assert(left->is_double_fpu(), "expect double float operand reg");
++    assert(dest->is_double_fpu(), "expect double float result reg");
++    __ fnegd(dest->as_double_reg(), left->as_double_reg());
++  }
++}
++
++
++void LIR_Assembler::leal(LIR_Opr addr, LIR_Opr dest, LIR_PatchCode patch_code, CodeEmitInfo* info) {
++  assert(patch_code == lir_patch_none, "Patch code not supported");
++  __ lea(dest->as_register_lo(), as_Address(addr->as_address_ptr()));
++}
++
++
++void LIR_Assembler::rt_call(LIR_Opr result, address dest, const LIR_OprList* args, LIR_Opr tmp, CodeEmitInfo* info) {
++  assert(!tmp->is_valid(), "don't need temporary");
++
++  CodeBlob *cb = CodeCache::find_blob(dest);
++  if (cb) {
++    __ far_call(RuntimeAddress(dest));
++  } else {
++    __ mov(rscratch1, RuntimeAddress(dest));
++    __ blr(rscratch1);
++  }
++
++  if (info != NULL) {
++    add_call_info_here(info);
++  }
++  __ maybe_isb();
++}
++
++void LIR_Assembler::volatile_move_op(LIR_Opr src, LIR_Opr dest, BasicType type, CodeEmitInfo* info) {
++  if (dest->is_address() || src->is_address()) {
++    move_op(src, dest, type, lir_patch_none, info,
++            /*pop_fpu_stack*/false, /*unaligned*/false, /*wide*/false);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++#ifdef ASSERT
++// emit run-time assertion
++void LIR_Assembler::emit_assert(LIR_OpAssert* op) {
++  assert(op->code() == lir_assert, "must be");
++
++  if (op->in_opr1()->is_valid()) {
++    assert(op->in_opr2()->is_valid(), "both operands must be valid");
++    comp_op(op->condition(), op->in_opr1(), op->in_opr2(), op);
++  } else {
++    assert(op->in_opr2()->is_illegal(), "both operands must be illegal");
++    assert(op->condition() == lir_cond_always, "no other conditions allowed");
++  }
++
++  Label ok;
++  if (op->condition() != lir_cond_always) {
++    Assembler::Condition acond = Assembler::AL;
++    switch (op->condition()) {
++      case lir_cond_equal:        acond = Assembler::EQ;  break;
++      case lir_cond_notEqual:     acond = Assembler::NE;  break;
++      case lir_cond_less:         acond = Assembler::LT;  break;
++      case lir_cond_lessEqual:    acond = Assembler::LE;  break;
++      case lir_cond_greaterEqual: acond = Assembler::GE;  break;
++      case lir_cond_greater:      acond = Assembler::GT;  break;
++      case lir_cond_belowEqual:   acond = Assembler::LS;  break;
++      case lir_cond_aboveEqual:   acond = Assembler::HS;  break;
++      default:                    ShouldNotReachHere();
++    }
++    __ br(acond, ok);
++  }
++  if (op->halt()) {
++    const char* str = __ code_string(op->msg());
++    __ stop(str);
++  } else {
++    breakpoint();
++  }
++  __ BIND(ok);
++}
++#endif
++
++#ifndef PRODUCT
++#define COMMENT(x)   do { __ block_comment(x); } while (0)
++#else
++#define COMMENT(x)
++#endif
++
++void LIR_Assembler::membar() {
++  COMMENT("membar");
++  __ membar(MacroAssembler::AnyAny);
++}
++
++void LIR_Assembler::membar_acquire() {
++  __ membar(Assembler::LoadLoad|Assembler::LoadStore);
++}
++
++void LIR_Assembler::membar_release() {
++  __ membar(Assembler::LoadStore|Assembler::StoreStore);
++}
++
++void LIR_Assembler::membar_loadload() {
++  __ membar(Assembler::LoadLoad);
++}
++
++void LIR_Assembler::membar_storestore() {
++  __ membar(MacroAssembler::StoreStore);
++}
++
++void LIR_Assembler::membar_loadstore() { __ membar(MacroAssembler::LoadStore); }
++
++void LIR_Assembler::membar_storeload() { __ membar(MacroAssembler::StoreLoad); }
++
++void LIR_Assembler::on_spin_wait() {
++  Unimplemented();
++}
++
++void LIR_Assembler::get_thread(LIR_Opr result_reg) {
++  __ mov(result_reg->as_register(), rthread);
++}
++
++
++void LIR_Assembler::peephole(LIR_List *lir) {
++#if 0
++  if (tableswitch_count >= max_tableswitches)
++    return;
++
++  /*
++    This finite-state automaton recognizes sequences of compare-and-
++    branch instructions.  We will turn them into a tableswitch.  You
++    could argue that C1 really shouldn't be doing this sort of
++    optimization, but without it the code is really horrible.
++  */
++
++  enum { start_s, cmp1_s, beq_s, cmp_s } state;
++  int first_key, last_key = -2147483648;
++  int next_key = 0;
++  int start_insn = -1;
++  int last_insn = -1;
++  Register reg = noreg;
++  LIR_Opr reg_opr;
++  state = start_s;
++
++  LIR_OpList* inst = lir->instructions_list();
++  for (int i = 0; i < inst->length(); i++) {
++    LIR_Op* op = inst->at(i);
++    switch (state) {
++    case start_s:
++      first_key = -1;
++      start_insn = i;
++      switch (op->code()) {
++      case lir_cmp:
++        LIR_Opr opr1 = op->as_Op2()->in_opr1();
++        LIR_Opr opr2 = op->as_Op2()->in_opr2();
++        if (opr1->is_cpu_register() && opr1->is_single_cpu()
++            && opr2->is_constant()
++            && opr2->type() == T_INT) {
++          reg_opr = opr1;
++          reg = opr1->as_register();
++          first_key = opr2->as_constant_ptr()->as_jint();
++          next_key = first_key + 1;
++          state = cmp_s;
++          goto next_state;
++        }
++        break;
++      }
++      break;
++    case cmp_s:
++      switch (op->code()) {
++      case lir_branch:
++        if (op->as_OpBranch()->cond() == lir_cond_equal) {
++          state = beq_s;
++          last_insn = i;
++          goto next_state;
++        }
++      }
++      state = start_s;
++      break;
++    case beq_s:
++      switch (op->code()) {
++      case lir_cmp: {
++        LIR_Opr opr1 = op->as_Op2()->in_opr1();
++        LIR_Opr opr2 = op->as_Op2()->in_opr2();
++        if (opr1->is_cpu_register() && opr1->is_single_cpu()
++            && opr1->as_register() == reg
++            && opr2->is_constant()
++            && opr2->type() == T_INT
++            && opr2->as_constant_ptr()->as_jint() == next_key) {
++          last_key = next_key;
++          next_key++;
++          state = cmp_s;
++          goto next_state;
++        }
++      }
++      }
++      last_key = next_key;
++      state = start_s;
++      break;
++    default:
++      assert(false, "impossible state");
++    }
++    if (state == start_s) {
++      if (first_key < last_key - 5L && reg != noreg) {
++        {
++          // printf("found run register %d starting at insn %d low value %d high value %d\n",
++          //        reg->encoding(),
++          //        start_insn, first_key, last_key);
++          //   for (int i = 0; i < inst->length(); i++) {
++          //     inst->at(i)->print();
++          //     tty->print("\n");
++          //   }
++          //   tty->print("\n");
++        }
++
++        struct tableswitch *sw = &switches[tableswitch_count];
++        sw->_insn_index = start_insn, sw->_first_key = first_key,
++          sw->_last_key = last_key, sw->_reg = reg;
++        inst->insert_before(last_insn + 1, new LIR_OpLabel(&sw->_after));
++        {
++          // Insert the new table of branches
++          int offset = last_insn;
++          for (int n = first_key; n < last_key; n++) {
++            inst->insert_before
++              (last_insn + 1,
++               new LIR_OpBranch(lir_cond_always, T_ILLEGAL,
++                                inst->at(offset)->as_OpBranch()->label()));
++            offset -= 2, i++;
++          }
++        }
++        // Delete all the old compare-and-branch instructions
++        for (int n = first_key; n < last_key; n++) {
++          inst->remove_at(start_insn);
++          inst->remove_at(start_insn);
++        }
++        // Insert the tableswitch instruction
++        inst->insert_before(start_insn,
++                            new LIR_Op2(lir_cmp, lir_cond_always,
++                                        LIR_OprFact::intConst(tableswitch_count),
++                                        reg_opr));
++        inst->insert_before(start_insn + 1, new LIR_OpLabel(&sw->_branches));
++        tableswitch_count++;
++      }
++      reg = noreg;
++      last_key = -2147483648;
++    }
++  next_state:
++    ;
++  }
++#endif
++}
++
++void LIR_Assembler::atomic_op(LIR_Code code, LIR_Opr src, LIR_Opr data, LIR_Opr dest, LIR_Opr tmp_op) {
++  Address addr = as_Address(src->as_address_ptr());
++  BasicType type = src->type();
++  bool is_oop = type == T_OBJECT || type == T_ARRAY;
++
++  void (MacroAssembler::* add)(Register prev, RegisterOrConstant incr, Register addr);
++  void (MacroAssembler::* xchg)(Register prev, Register newv, Register addr);
++
++  switch(type) {
++  case T_INT:
++    xchg = &MacroAssembler::atomic_xchgalw;
++////    add = &MacroAssembler::atomic_addalw;
++    break;
++  case T_LONG:
++    xchg = &MacroAssembler::atomic_xchgal;
++    add = &MacroAssembler::atomic_addal;
++    break;
++  case T_OBJECT:
++  case T_ARRAY:
++    if (UseCompressedOops) {
++      xchg = &MacroAssembler::atomic_xchgalw;
++////      add = &MacroAssembler::atomic_addalw;
++    } else {
++      xchg = &MacroAssembler::atomic_xchgal;
++      add = &MacroAssembler::atomic_addal;
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++    xchg = &MacroAssembler::atomic_xchgal;
++    add = &MacroAssembler::atomic_addal; // unreachable
++  }
++
++  switch (code) {
++  case lir_xadd:
++    {
++      RegisterOrConstant inc;
++      Register tmp = as_reg(tmp_op);
++      Register dst = as_reg(dest);
++      if (data->is_constant()) {
++        inc = RegisterOrConstant(as_long(data));
++        assert_different_registers(dst, addr.base(), tmp,
++                                   rscratch1, rscratch2);
++      } else {
++        inc = RegisterOrConstant(as_reg(data));
++        assert_different_registers(inc.as_register(), dst, addr.base(), tmp,
++                                   rscratch1, rscratch2);
++      }
++      __ lea(tmp, addr);
++      (_masm->*add)(dst, inc, tmp);
++      break;
++    }
++  case lir_xchg:
++    {
++      Register tmp = tmp_op->as_register();
++      Register obj = as_reg(data);
++      Register dst = as_reg(dest);
++      if (is_oop && UseCompressedOops) {
++        __ encode_heap_oop(rscratch2, obj);
++        obj = rscratch2;
++      }
++      assert_different_registers(obj, addr.base(), tmp, rscratch1, dst);
++      __ lea(tmp, addr);
++      (_masm->*xchg)(dst, obj, tmp);
++      if (is_oop && UseCompressedOops) {
++        __ decode_heap_oop(dst);
++      }
++    }
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++  __ membar(__ AnyAny);
++}
++
++#undef __
+diff --git a/src/hotspot/cpu/sw64/c1_LIRAssembler_sw64.hpp b/src/hotspot/cpu/sw64/c1_LIRAssembler_sw64.hpp
+new file mode 100644
+index 0000000000..620037f516
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_LIRAssembler_sw64.hpp
+@@ -0,0 +1,88 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C1_LIRASSEMBLER_SW64_HPP
++#define CPU_SW64_VM_C1_LIRASSEMBLER_SW64_HPP
++
++// ArrayCopyStub needs access to bailout
++friend class ArrayCopyStub;
++
++ private:
++
++  int array_element_size(BasicType type) const;
++
++  void arith_fpu_implementation(LIR_Code code, int left_index, int right_index, int dest_index, bool pop_fpu_stack);
++
++  // helper functions which checks for overflow and sets bailout if it
++  // occurs.  Always returns a valid embeddable pointer but in the
++  // bailout case the pointer won't be to unique storage.
++  address float_constant(float f);
++  address double_constant(double d);
++
++  address int_constant(jlong n);
++
++  bool is_literal_address(LIR_Address* addr);
++
++  // When we need to use something other than rscratch1 use this
++  // method.
++  Address as_Address(LIR_Address* addr, Register tmp);
++
++  // Record the type of the receiver in ReceiverTypeData
++  void type_profile_helper(Register mdo,
++                           ciMethodData *md, ciProfileData *data,
++                           Register recv, Label* update_done);
++  void add_debug_info_for_branch(address adr, CodeEmitInfo* info);
++
++  void casw(Register addr, Register newval, Register cmpval);
++  void casl(Register addr, Register newval, Register cmpval);
++
++  void poll_for_safepoint(relocInfo::relocType rtype, CodeEmitInfo* info = NULL);
++
++  static const int max_tableswitches = 20;
++  struct tableswitch switches[max_tableswitches];
++  int tableswitch_count;
++
++  void init() { tableswitch_count = 0; }
++
++  void deoptimize_trap(CodeEmitInfo *info);
++
++  enum {
++    // call stub: CompiledStaticCall::to_interp_stub_size() +
++    //            CompiledStaticCall::to_trampoline_stub_size()
++    _call_stub_size = 13, //// * NativeInstruction::instruction_size,  //DJX need fix
++    _call_aot_stub_size = 0,
++    _exception_handler_size = DEBUG_ONLY(1*K) NOT_DEBUG(175),
++    _deopt_handler_size = 7 //// * NativeInstruction::instruction_size  //DJX need fix
++  };
++
++  void arithmetic_idiv(LIR_Op3* op, bool is_irem);
++
++public:
++
++  void store_parameter(Register r, int offset_from_esp_in_words);
++  void store_parameter(jint c,     int offset_from_esp_in_words);
++  void store_parameter(jobject c,  int offset_from_esp_in_words);
++
++#endif // CPU_SW64_VM_C1_LIRASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/c1_LIRGenerator_sw64.cpp b/src/hotspot/cpu/sw64/c1_LIRGenerator_sw64.cpp
+new file mode 100644
+index 0000000000..ee2fbac957
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_LIRGenerator_sw64.cpp
+@@ -0,0 +1,1386 @@
++/*
++ * Copyright (c) 2005, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "c1/c1_Compilation.hpp"
++#include "c1/c1_FrameMap.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_LIRGenerator.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "c1/c1_ValueStack.hpp"
++#include "ci/ciArray.hpp"
++#include "ci/ciObjArrayKlass.hpp"
++#include "ci/ciTypeArrayKlass.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "utilities/macros.hpp"
++#include "vmreg_sw64.inline.hpp"
++
++#ifdef ASSERT
++#define __ gen()->lir(__FILE__, __LINE__)->
++#else
++#define __ gen()->lir()->
++#endif
++
++// Item will be loaded into a byte register; Intel only
++void LIRItem::load_byte_item() {
++  load_item();
++}
++
++
++void LIRItem::load_nonconstant() {
++  LIR_Opr r = value()->operand();
++  if (r->is_constant()) {
++    _result = r;
++  } else {
++    load_item();
++  }
++}
++
++//--------------------------------------------------------------
++//               LIRGenerator
++//--------------------------------------------------------------
++
++
++LIR_Opr LIRGenerator::exceptionOopOpr() { return FrameMap::r0_oop_opr; }
++LIR_Opr LIRGenerator::exceptionPcOpr()  { return FrameMap::r3_opr; }
++LIR_Opr LIRGenerator::divInOpr()        { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::divOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::remOutOpr()       { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::shiftCountOpr()   { Unimplemented(); return LIR_OprFact::illegalOpr; }
++LIR_Opr LIRGenerator::syncLockOpr()     { return new_register(T_INT); }
++LIR_Opr LIRGenerator::syncTempOpr()     { return FrameMap::r0_opr; }
++LIR_Opr LIRGenerator::getThreadTemp()   { return LIR_OprFact::illegalOpr; }
++
++
++LIR_Opr LIRGenerator::result_register_for(ValueType* type, bool callee) {
++  LIR_Opr opr;
++  switch (type->tag()) {
++    case intTag:     opr = FrameMap::r0_opr;          break;
++    case objectTag:  opr = FrameMap::r0_oop_opr;      break;
++    case longTag:    opr = FrameMap::long0_opr;        break;
++    case floatTag:   opr = FrameMap::fpu0_float_opr;  break;
++    case doubleTag:  opr = FrameMap::fpu0_double_opr;  break;
++
++    case addressTag:
++    default: ShouldNotReachHere(); return LIR_OprFact::illegalOpr;
++  }
++
++  assert(opr->type_field() == as_OprType(as_BasicType(type)), "type mismatch");
++  return opr;
++}
++
++
++LIR_Opr LIRGenerator::rlock_byte(BasicType type) {
++  LIR_Opr reg = new_register(T_INT);
++  set_vreg_flag(reg, LIRGenerator::byte_reg);
++  return reg;
++}
++
++
++//--------- loading items into registers --------------------------------
++
++
++bool LIRGenerator::can_store_as_constant(Value v, BasicType type) const {
++  if (v->type()->as_IntConstant() != NULL) {
++    return v->type()->as_IntConstant()->value() == 0L;
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++bool LIRGenerator::can_inline_as_constant(Value v) const {
++  // FIXME: Just a guess
++  if (v->type()->as_IntConstant() != NULL) {
++    return Assembler::operand_valid_for_add_sub_immediate(v->type()->as_IntConstant()->value());
++  } else if (v->type()->as_LongConstant() != NULL) {
++    return v->type()->as_LongConstant()->value() == 0L;
++  } else if (v->type()->as_ObjectConstant() != NULL) {
++    return v->type()->as_ObjectConstant()->value()->is_null_object();
++  } else {
++    return false;
++  }
++}
++
++
++bool LIRGenerator::can_inline_as_constant(LIR_Const* c) const { return false; }
++
++
++LIR_Opr LIRGenerator::safepoint_poll_register() {
++  return LIR_OprFact::illegalOpr;
++}
++
++
++LIR_Address* LIRGenerator::generate_address(LIR_Opr base, LIR_Opr index,
++                                            int shift, int disp, BasicType type) {
++  assert(base->is_register(), "must be");
++  intx large_disp = disp;
++
++  // accumulate fixed displacements
++  if (index->is_constant()) {
++    LIR_Const *constant = index->as_constant_ptr();
++    if (constant->type() == T_INT) {
++      large_disp += index->as_jint() << shift;
++    } else {
++      assert(constant->type() == T_LONG, "should be");
++      jlong c = index->as_jlong() << shift;
++      if ((jlong)((jint)c) == c) {
++        large_disp += c;
++        index = LIR_OprFact::illegalOpr;
++      } else {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ move(index, tmp);
++        index = tmp;
++        // apply shift and displacement below
++      }
++    }
++  }
++
++  if (index->is_register()) {
++    // apply the shift and accumulate the displacement
++    if (shift > 0) {
++      LIR_Opr tmp = new_pointer_register();
++      __ shift_left(index, shift, tmp);
++      index = tmp;
++    }
++    if (large_disp != 0) {
++      LIR_Opr tmp = new_pointer_register();
++      if (Assembler::operand_valid_for_add_sub_immediate(large_disp)) {
++        __ add(tmp, tmp, LIR_OprFact::intptrConst(large_disp));
++        index = tmp;
++      } else {
++        __ move(tmp, LIR_OprFact::intptrConst(large_disp));
++        __ add(tmp, index, tmp);
++        index = tmp;
++      }
++      large_disp = 0;
++    }
++  } else if (large_disp != 0 && !Address::offset_ok_for_immed(large_disp, shift)) {
++    // index is illegal so replace it with the displacement loaded into a register
++    index = new_pointer_register();
++    __ move(LIR_OprFact::intptrConst(large_disp), index);
++    large_disp = 0;
++  }
++
++  // at this point we either have base + index or base + displacement
++  if (large_disp == 0) {
++    return new LIR_Address(base, index, type);
++  } else {
++    assert(Address::offset_ok_for_immed(large_disp, 0), "must be");
++    return new LIR_Address(base, large_disp, type);
++  }
++}
++
++LIR_Address* LIRGenerator::emit_array_address(LIR_Opr array_opr, LIR_Opr index_opr,
++                                              BasicType type) {
++  int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type);
++  int elem_size = type2aelembytes(type);
++  int shift = exact_log2(elem_size);
++
++  LIR_Address* addr;
++  if (index_opr->is_constant()) {
++    addr = new LIR_Address(array_opr,
++                           offset_in_bytes + (intx)(index_opr->as_jint()) * elem_size, type);
++  } else {
++    if (offset_in_bytes) {
++      LIR_Opr tmp = new_pointer_register();
++      __ add(array_opr, LIR_OprFact::intConst(offset_in_bytes), tmp);
++      array_opr = tmp;
++      offset_in_bytes = 0;
++    }
++    addr =  new LIR_Address(array_opr,
++                            index_opr,
++                            LIR_Address::scale(type),
++                            offset_in_bytes, type);
++  }
++  return addr;
++}
++
++LIR_Opr LIRGenerator::load_immediate(int x, BasicType type) {
++  LIR_Opr r;
++  if (type == T_LONG) {
++    r = LIR_OprFact::longConst(x);
++    if (!Assembler::operand_valid_for_logical_immediate(false, x)) {
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else if (type == T_INT) {
++    r = LIR_OprFact::intConst(x);
++    if (!Assembler::operand_valid_for_logical_immediate(true, x)) {
++      // This is all rather nasty.  We don't know whether our constant
++      // is required for a logical or an arithmetic operation, wo we
++      // don't know what the range of valid values is!!
++      LIR_Opr tmp = new_register(type);
++      __ move(r, tmp);
++      return tmp;
++    }
++  } else {
++    ShouldNotReachHere();
++    r = NULL;  // unreachable
++  }
++  return r;
++}
++
++
++
++void LIRGenerator::increment_counter(address counter, BasicType type, int step) {
++  LIR_Opr pointer = new_pointer_register();
++  __ move(LIR_OprFact::intptrConst(counter), pointer);
++  LIR_Address* addr = new LIR_Address(pointer, type);
++  increment_counter(addr, step);
++}
++
++
++void LIRGenerator::increment_counter(LIR_Address* addr, int step) {
++  LIR_Opr imm = NULL;
++  switch(addr->type()) {
++  case T_INT:
++    imm = LIR_OprFact::intConst(step);
++    break;
++  case T_LONG:
++    imm = LIR_OprFact::longConst(step);
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++  LIR_Opr reg = new_register(addr->type());
++  __ load(addr, reg);
++  __ add(reg, imm, reg);
++  __ store(reg, addr);
++}
++
++void LIRGenerator::cmp_mem_int(LIR_Condition condition, LIR_Opr base, int disp, int c, CodeEmitInfo* info) {
++  LIR_Opr reg = new_register(T_INT);
++  __ load(generate_address(base, disp, T_INT), reg, info);
++  __ cmp(condition, reg, LIR_OprFact::intConst(c));
++}
++
++void LIRGenerator::cmp_reg_mem(LIR_Condition condition, LIR_Opr reg, LIR_Opr base, int disp, BasicType type, CodeEmitInfo* info) {
++  LIR_Opr reg1 = new_register(T_INT);
++  __ load(generate_address(base, disp, type), reg1, info);
++  __ cmp(condition, reg, reg1);
++}
++
++
++bool LIRGenerator::strength_reduce_multiply(LIR_Opr left, int c, LIR_Opr result, LIR_Opr tmp) {
++
++  if (is_power_of_2(c - 1)) {
++    __ shift_left(left, exact_log2(c - 1), tmp);
++    __ add(tmp, left, result);
++    return true;
++  } else if (is_power_of_2(c + 1)) {
++    __ shift_left(left, exact_log2(c + 1), tmp);
++    __ sub(tmp, left, result);
++    return true;
++  } else {
++    return false;
++  }
++}
++
++void LIRGenerator::store_stack_parameter (LIR_Opr item, ByteSize offset_from_sp) {
++  BasicType type = item->type();
++  __ store(item, new LIR_Address(FrameMap::sp_opr, in_bytes(offset_from_sp), type));
++}
++
++void LIRGenerator::array_store_check(LIR_Opr value, LIR_Opr array, CodeEmitInfo* store_check_info, ciMethod* profiled_method, int profiled_bci) {
++    LIR_Opr tmp1 = new_register(objectType);
++    LIR_Opr tmp2 = new_register(objectType);
++    LIR_Opr tmp3 = new_register(objectType);
++    __ store_check(value, array, tmp1, tmp2, tmp3, store_check_info, profiled_method, profiled_bci);
++}
++
++//----------------------------------------------------------------------
++//             visitor functions
++//----------------------------------------------------------------------
++
++void LIRGenerator::do_MonitorEnter(MonitorEnter* x) {
++  assert(x->is_pinned(),"");
++  LIRItem obj(x->obj(), this);
++  obj.load_item();
++
++  set_no_result(x);
++
++  // "lock" stores the address of the monitor stack slot, so this is not an oop
++  LIR_Opr lock = new_register(T_INT);
++  // Need a scratch register for biased locking
++  LIR_Opr scratch = LIR_OprFact::illegalOpr;
++  if (UseBiasedLocking) {
++    scratch = new_register(T_INT);
++  }
++
++  CodeEmitInfo* info_for_exception = NULL;
++  if (x->needs_null_check()) {
++    info_for_exception = state_for(x);
++  }
++  // this CodeEmitInfo must not have the xhandlers because here the
++  // object is already locked (xhandlers expect object to be unlocked)
++  CodeEmitInfo* info = state_for(x, x->state(), true);
++  monitor_enter(obj.result(), lock, syncTempOpr(), scratch,
++                        x->monitor_no(), info_for_exception, info);
++}
++
++
++void LIRGenerator::do_MonitorExit(MonitorExit* x) {
++  assert(x->is_pinned(),"");
++
++  LIRItem obj(x->obj(), this);
++  obj.dont_load_item();
++
++  LIR_Opr lock = new_register(T_INT);
++  LIR_Opr obj_temp = new_register(T_INT);
++  set_no_result(x);
++  monitor_exit(obj_temp, lock, syncTempOpr(), LIR_OprFact::illegalOpr, x->monitor_no());
++}
++
++
++void LIRGenerator::do_NegateOp(NegateOp* x) {
++
++  LIRItem from(x->x(), this);
++  from.load_item();
++  LIR_Opr result = rlock_result(x);
++  __ negate (from.result(), result);
++
++}
++
++// for  _fadd, _fmul, _fsub, _fdiv, _frem
++//      _dadd, _dmul, _dsub, _ddiv, _drem
++void LIRGenerator::do_ArithmeticOp_FPU(ArithmeticOp* x) {
++
++  if (x->op() == Bytecodes::_frem || x->op() == Bytecodes::_drem) {
++    // float remainder is implemented as a direct call into the runtime
++    LIRItem right(x->x(), this);
++    LIRItem left(x->y(), this);
++
++    BasicTypeList signature(2);
++    if (x->op() == Bytecodes::_frem) {
++      signature.append(T_FLOAT);
++      signature.append(T_FLOAT);
++    } else {
++      signature.append(T_DOUBLE);
++      signature.append(T_DOUBLE);
++    }
++    CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++
++    const LIR_Opr result_reg = result_register_for(x->type());
++    left.load_item_force(cc->at(1));
++    right.load_item();
++
++    __ move(right.result(), cc->at(0));
++
++    address entry;
++    if (x->op() == Bytecodes::_frem) {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::frem);
++    } else {
++      entry = CAST_FROM_FN_PTR(address, SharedRuntime::drem);
++    }
++
++    LIR_Opr result = rlock_result(x);
++    __ call_runtime_leaf(entry, getThreadTemp(), result_reg, cc->args());
++    __ move(result_reg, result);
++
++    return;
++  }
++
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg  = &left;
++  LIRItem* right_arg = &right;
++
++  // Always load right hand side.
++  right.load_item();
++
++  if (!left.is_register())
++    left.load_item();
++
++  LIR_Opr reg = rlock(x);
++  LIR_Opr tmp = LIR_OprFact::illegalOpr;
++  if (x->is_strictfp() && (x->op() == Bytecodes::_dmul || x->op() == Bytecodes::_ddiv)) {
++    tmp = new_register(T_DOUBLE);
++  }
++
++  arithmetic_op_fpu(x->op(), reg, left.result(), right.result(), x->is_strictfp());
++
++  set_result(x, round_item(reg));
++}
++
++// for  _ladd, _lmul, _lsub, _ldiv, _lrem
++void LIRGenerator::do_ArithmeticOp_Long(ArithmeticOp* x) {
++
++  // missing test if instr is commutative and if we should swap
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++
++  if (x->op() == Bytecodes::_ldiv || x->op() == Bytecodes::_lrem) {
++
++    // the check for division by zero destroys the right operand
++    right.set_destroys_register();
++
++    // check for division by zero (destroys registers of right operand!)
++    CodeEmitInfo* info = state_for(x);
++
++    left.load_item();
++    right.load_item();
++
++    __ cmp(lir_cond_equal, right.result(), LIR_OprFact::longConst(0));
++    __ branch(lir_cond_equal, T_LONG, new DivByZeroStub(info));
++
++    rlock_result(x);
++    switch (x->op()) {
++    case Bytecodes::_lrem:
++      __ rem (left.result(), right.result(), x->operand());
++      break;
++    case Bytecodes::_ldiv:
++      __ div (left.result(), right.result(), x->operand());
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++
++
++  } else {
++    assert (x->op() == Bytecodes::_lmul || x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub,
++            "expect lmul, ladd or lsub");
++    // add, sub, mul
++    left.load_item();
++    if (! right.is_register()) {
++      if (x->op() == Bytecodes::_lmul
++          || ! right.is_constant()
++          || ! Assembler::operand_valid_for_add_sub_immediate(right.get_jlong_constant())) {
++        right.load_item();
++      } else { // add, sub
++        assert (x->op() == Bytecodes::_ladd || x->op() == Bytecodes::_lsub, "expect ladd or lsub");
++        // don't load constants to save register
++        right.load_nonconstant();
++      }
++    }
++    rlock_result(x);
++    arithmetic_op_long(x->op(), x->operand(), left.result(), right.result(), NULL);
++  }
++}
++
++// for: _iadd, _imul, _isub, _idiv, _irem
++void LIRGenerator::do_ArithmeticOp_Int(ArithmeticOp* x) {
++
++  // Test if instr is commutative and if we should swap
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++  LIRItem* left_arg = &left;
++  LIRItem* right_arg = &right;
++  if (x->is_commutative() && left.is_stack() && right.is_register()) {
++    // swap them if left is real stack (or cached) and right is real register(not cached)
++    left_arg = &right;
++    right_arg = &left;
++  }
++
++  left_arg->load_item();
++
++  // do not need to load right, as we can handle stack and constants
++  if (x->op() == Bytecodes::_idiv || x->op() == Bytecodes::_irem) {
++
++    right_arg->load_item();
++    rlock_result(x);
++
++    CodeEmitInfo* info = state_for(x);
++    LIR_Opr tmp = new_register(T_INT);
++    __ cmp(lir_cond_equal, right_arg->result(), LIR_OprFact::longConst(0));
++    __ branch(lir_cond_equal, T_INT, new DivByZeroStub(info));
++    info = state_for(x);
++
++    if (x->op() == Bytecodes::_irem) {
++      __ irem(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
++    } else if (x->op() == Bytecodes::_idiv) {
++      __ idiv(left_arg->result(), right_arg->result(), x->operand(), tmp, NULL);
++    }
++
++  } else if (x->op() == Bytecodes::_iadd || x->op() == Bytecodes::_isub) {
++    if (right.is_constant()
++        && Assembler::operand_valid_for_add_sub_immediate(right.get_jint_constant())) {
++      right.load_nonconstant();
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), LIR_OprFact::illegalOpr);
++  } else {
++    assert (x->op() == Bytecodes::_imul, "expect imul");
++    if (right.is_constant()) {
++      jint c = right.get_jint_constant();
++      if (c > 0 && c < max_jint && (is_power_of_2(c) || is_power_of_2(c - 1) || is_power_of_2(c + 1))) {
++        right_arg->dont_load_item();
++      } else {
++        // Cannot use constant op.
++        right_arg->load_item();
++      }
++    } else {
++      right.load_item();
++    }
++    rlock_result(x);
++    arithmetic_op_int(x->op(), x->operand(), left_arg->result(), right_arg->result(), new_register(T_INT));
++  }
++}
++
++void LIRGenerator::do_ArithmeticOp(ArithmeticOp* x) {
++  // when an operand with use count 1 is the left operand, then it is
++  // likely that no move for 2-operand-LIR-form is necessary
++  if (x->is_commutative() && x->y()->as_Constant() == NULL && x->x()->use_count() > x->y()->use_count()) {
++    x->swap_operands();
++  }
++
++  ValueTag tag = x->type()->tag();
++  assert(x->x()->type()->tag() == tag && x->y()->type()->tag() == tag, "wrong parameters");
++  switch (tag) {
++    case floatTag:
++    case doubleTag:  do_ArithmeticOp_FPU(x);  return;
++    case longTag:    do_ArithmeticOp_Long(x); return;
++    case intTag:     do_ArithmeticOp_Int(x);  return;
++  }
++  ShouldNotReachHere();
++}
++
++// _ishl, _lshl, _ishr, _lshr, _iushr, _lushr
++void LIRGenerator::do_ShiftOp(ShiftOp* x) {
++
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()) {
++    right.dont_load_item();
++
++    switch (x->op()) {
++    case Bytecodes::_ishl: {
++      int c = right.get_jint_constant() & 0x1f;
++      __ shift_left(left.result(), c, x->operand());
++      break;
++    }
++    case Bytecodes::_ishr: {
++      int c = right.get_jint_constant() & 0x1f;
++      __ shift_right(left.result(), c, x->operand());
++      break;
++    }
++    case Bytecodes::_iushr: {
++      int c = right.get_jint_constant() & 0x1f;
++      __ unsigned_shift_right(left.result(), c, x->operand());
++      break;
++    }
++    case Bytecodes::_lshl: {
++      int c = right.get_jint_constant() & 0x3f;
++      __ shift_left(left.result(), c, x->operand());
++      break;
++    }
++    case Bytecodes::_lshr: {
++      int c = right.get_jint_constant() & 0x3f;
++      __ shift_right(left.result(), c, x->operand());
++      break;
++    }
++    case Bytecodes::_lushr: {
++      int c = right.get_jint_constant() & 0x3f;
++      __ unsigned_shift_right(left.result(), c, x->operand());
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++    }
++  } else {
++    right.load_item();
++    LIR_Opr tmp = new_register(T_INT);
++    switch (x->op()) {
++    case Bytecodes::_ishl: {
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    }
++    case Bytecodes::_ishr: {
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    }
++    case Bytecodes::_iushr: {
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x1f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    }
++    case Bytecodes::_lshl: {
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_left(left.result(), tmp, x->operand(), tmp);
++      break;
++    }
++    case Bytecodes::_lshr: {
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    }
++    case Bytecodes::_lushr: {
++      __ logical_and(right.result(), LIR_OprFact::intConst(0x3f), tmp);
++      __ unsigned_shift_right(left.result(), tmp, x->operand(), tmp);
++      break;
++    }
++    default:
++      ShouldNotReachHere();
++    }
++  }
++}
++
++// _iand, _land, _ior, _lor, _ixor, _lxor
++void LIRGenerator::do_LogicOp(LogicOp* x) {
++
++  LIRItem left(x->x(),  this);
++  LIRItem right(x->y(), this);
++
++  left.load_item();
++
++  rlock_result(x);
++  if (right.is_constant()
++      && ((right.type()->tag() == intTag
++           && Assembler::operand_valid_for_logical_immediate(true, right.get_jint_constant()))
++          || (right.type()->tag() == longTag
++              && Assembler::operand_valid_for_logical_immediate(false, right.get_jlong_constant()))))  {
++    right.dont_load_item();
++  } else {
++    right.load_item();
++  }
++  switch (x->op()) {
++  case Bytecodes::_iand:
++  case Bytecodes::_land:
++    __ logical_and(left.result(), right.result(), x->operand()); break;
++  case Bytecodes::_ior:
++  case Bytecodes::_lor:
++    __ logical_or (left.result(), right.result(), x->operand()); break;
++  case Bytecodes::_ixor:
++  case Bytecodes::_lxor:
++    __ logical_xor(left.result(), right.result(), x->operand()); break;
++  default: Unimplemented();
++  }
++}
++
++// _lcmp, _fcmpl, _fcmpg, _dcmpl, _dcmpg
++void LIRGenerator::do_CompareOp(CompareOp* x) {
++  LIRItem left(x->x(), this);
++  LIRItem right(x->y(), this);
++  ValueTag tag = x->x()->type()->tag();
++  if (tag == longTag) {
++    left.set_destroys_register();
++  }
++  left.load_item();
++  right.load_item();
++  LIR_Opr reg = rlock_result(x);
++
++  if (x->x()->type()->is_float_kind()) {
++    Bytecodes::Code code = x->op();
++    __ fcmp2int(left.result(), right.result(), reg, (code == Bytecodes::_fcmpl || code == Bytecodes::_dcmpl));
++  } else if (x->x()->type()->tag() == longTag) {
++    __ lcmp2int(left.result(), right.result(), reg);
++  } else {
++    Unimplemented();
++  }
++}
++
++LIR_Opr LIRGenerator::atomic_cmpxchg(BasicType type, LIR_Opr addr, LIRItem& cmp_value, LIRItem& new_value) {
++  LIR_Opr ill = LIR_OprFact::illegalOpr;  // for convenience
++  new_value.load_item();
++  cmp_value.load_item();
++  LIR_Opr result = new_register(T_INT);
++  if (type == T_OBJECT || type == T_ARRAY) {
++    __ cas_obj(addr, cmp_value.result(), new_value.result(), new_register(T_INT), new_register(T_INT), result);
++  } else if (type == T_INT) {
++    __ cas_int(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
++  } else if (type == T_LONG) {
++    __ cas_long(addr->as_address_ptr()->base(), cmp_value.result(), new_value.result(), ill, ill);
++  } else {
++    ShouldNotReachHere();
++    Unimplemented();
++  }
++  __ logical_xor(FrameMap::r8_opr, LIR_OprFact::intConst(1), result);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_xchg(BasicType type, LIR_Opr addr, LIRItem& value) {
++  bool is_oop = type == T_OBJECT || type == T_ARRAY;
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT || is_oop LP64_ONLY( || type == T_LONG ), "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xchg(addr, value.result(), result, tmp);
++  return result;
++}
++
++LIR_Opr LIRGenerator::atomic_add(BasicType type, LIR_Opr addr, LIRItem& value) {
++  LIR_Opr result = new_register(type);
++  value.load_item();
++  assert(type == T_INT LP64_ONLY( || type == T_LONG ), "unexpected type");
++  LIR_Opr tmp = new_register(T_INT);
++  __ xadd(addr, value.result(), result, tmp);
++  return result;
++}
++
++void LIRGenerator::do_MathIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 1 || (x->number_of_arguments() == 2 && x->id() == vmIntrinsics::_dpow), "wrong type");
++  if (x->id() == vmIntrinsics::_dexp || x->id() == vmIntrinsics::_dlog ||
++      x->id() == vmIntrinsics::_dpow || x->id() == vmIntrinsics::_dcos ||
++      x->id() == vmIntrinsics::_dsin || x->id() == vmIntrinsics::_dtan ||
++      x->id() == vmIntrinsics::_dlog10) {
++    do_LibmIntrinsic(x);
++    return;
++  }
++  switch (x->id()) {
++    case vmIntrinsics::_dabs:
++    case vmIntrinsics::_dsqrt: {
++      assert(x->number_of_arguments() == 1, "wrong type");
++      LIRItem value(x->argument_at(0), this);
++      value.load_item();
++      LIR_Opr dst = rlock_result(x);
++
++      switch (x->id()) {
++        case vmIntrinsics::_dsqrt: {
++          __ sqrt(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        }
++        case vmIntrinsics::_dabs: {
++          __ abs(value.result(), dst, LIR_OprFact::illegalOpr);
++          break;
++        }
++      }
++      break;
++    }
++  }
++}
++
++void LIRGenerator::do_LibmIntrinsic(Intrinsic* x) {
++  LIRItem value(x->argument_at(0), this);
++  value.set_destroys_register();
++
++  LIR_Opr calc_result = rlock_result(x);
++  LIR_Opr result_reg = result_register_for(x->type());
++
++  CallingConvention* cc = NULL;
++
++  if (x->id() == vmIntrinsics::_dpow) {
++    LIRItem value1(x->argument_at(1), this);
++
++    value1.set_destroys_register();
++
++    BasicTypeList signature(2);
++    signature.append(T_DOUBLE);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++    value1.load_item_force(cc->at(1));
++  } else {
++    BasicTypeList signature(1);
++    signature.append(T_DOUBLE);
++    cc = frame_map()->c_calling_convention(&signature);
++    value.load_item_force(cc->at(0));
++  }
++
++  switch (x->id()) {
++    case vmIntrinsics::_dexp:
++      if (StubRoutines::dexp() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dexp(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dexp), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog:
++      if (StubRoutines::dlog() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dlog10:
++      if (StubRoutines::dlog10() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dlog10(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dpow:
++      if (StubRoutines::dpow() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dpow(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dpow), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dsin:
++      if (StubRoutines::dsin() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dsin(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dsin), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dcos:
++      if (StubRoutines::dcos() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dcos(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dcos), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    case vmIntrinsics::_dtan:
++      if (StubRoutines::dtan() != NULL) {
++        __ call_runtime_leaf(StubRoutines::dtan(), getThreadTemp(), result_reg, cc->args());
++      } else {
++        __ call_runtime_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtan), getThreadTemp(), result_reg, cc->args());
++      }
++      break;
++    default:  ShouldNotReachHere();
++  }
++  __ move(result_reg, calc_result);
++}
++
++
++void LIRGenerator::do_ArrayCopy(Intrinsic* x) {
++  assert(x->number_of_arguments() == 5, "wrong type");
++
++  // Make all state_for calls early since they can emit code
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem src(x->argument_at(0), this);
++  LIRItem src_pos(x->argument_at(1), this);
++  LIRItem dst(x->argument_at(2), this);
++  LIRItem dst_pos(x->argument_at(3), this);
++  LIRItem length(x->argument_at(4), this);
++
++  // operands for arraycopy must use fixed registers, otherwise
++  // LinearScan will fail allocation (because arraycopy always needs a
++  // call)
++
++  // The java calling convention will give us enough registers
++  // so that on the stub side the args will be perfect already.
++  // On the other slow/special case side we call C and the arg
++  // positions are not similar enough to pick one as the best.
++  // Also because the java calling convention is a "shifted" version
++  // of the C convention we can process the java args trivially into C
++  // args without worry of overwriting during the xfer
++
++  src.load_item_force     (FrameMap::as_oop_opr(j_rarg0));
++  src_pos.load_item_force (FrameMap::as_opr(j_rarg1));
++  dst.load_item_force     (FrameMap::as_oop_opr(j_rarg2));
++  dst_pos.load_item_force (FrameMap::as_opr(j_rarg3));
++  length.load_item_force  (FrameMap::as_opr(j_rarg4));
++
++  LIR_Opr tmp =           FrameMap::as_opr(j_rarg5);
++
++  set_no_result(x);
++
++  int flags;
++  ciArrayKlass* expected_type;
++  arraycopy_helper(x, &flags, &expected_type);
++
++  __ arraycopy(src.result(), src_pos.result(), dst.result(), dst_pos.result(), length.result(), tmp, expected_type, flags, info); // does add_safepoint
++}
++
++void LIRGenerator::do_update_CRC32(Intrinsic* x) {
++  assert(UseCRC32Intrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateCRC32: {
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem val(x->argument_at(1), this);
++      // val is destroyed by update_crc32
++      val.set_destroys_register();
++      crc.load_item();
++      val.load_item();
++      __ update_crc32(crc.result(), val.result(), result);
++      break;
++    }
++    case vmIntrinsics::_updateBytesCRC32:
++    case vmIntrinsics::_updateByteBufferCRC32: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32);
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem len(x->argument_at(3), this);
++      buf.load_item();
++      off.load_nonconstant();
++
++      LIR_Opr index = off.result();
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++       offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op,
++                                       index,
++                                       offset,
++                                       T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      len.load_item_force(cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_update_CRC32C(Intrinsic* x) {
++  assert(UseCRC32CIntrinsics, "why are we here?");
++  // Make all state_for calls early since they can emit code
++  LIR_Opr result = rlock_result(x);
++  int flags = 0;
++  switch (x->id()) {
++    case vmIntrinsics::_updateBytesCRC32C:
++    case vmIntrinsics::_updateDirectByteBufferCRC32C: {
++      bool is_updateBytes = (x->id() == vmIntrinsics::_updateBytesCRC32C);
++      int offset = is_updateBytes ? arrayOopDesc::base_offset_in_bytes(T_BYTE) : 0;
++
++      LIRItem crc(x->argument_at(0), this);
++      LIRItem buf(x->argument_at(1), this);
++      LIRItem off(x->argument_at(2), this);
++      LIRItem end(x->argument_at(3), this);
++
++      buf.load_item();
++      off.load_nonconstant();
++      end.load_nonconstant();
++
++      // len = end - off
++      LIR_Opr len  = end.result();
++      LIR_Opr tmpA = new_register(T_INT);
++      LIR_Opr tmpB = new_register(T_INT);
++      __ move(end.result(), tmpA);
++      __ move(off.result(), tmpB);
++      __ sub(tmpA, tmpB, tmpA);
++      len = tmpA;
++
++      LIR_Opr index = off.result();
++      if(off.result()->is_constant()) {
++        index = LIR_OprFact::illegalOpr;
++        offset += off.result()->as_jint();
++      }
++      LIR_Opr base_op = buf.result();
++
++      if (index->is_valid()) {
++        LIR_Opr tmp = new_register(T_LONG);
++        __ convert(Bytecodes::_i2l, index, tmp);
++        index = tmp;
++      }
++
++      if (offset) {
++        LIR_Opr tmp = new_pointer_register();
++        __ add(base_op, LIR_OprFact::intConst(offset), tmp);
++        base_op = tmp;
++        offset = 0;
++      }
++
++      LIR_Address* a = new LIR_Address(base_op,
++                                       index,
++                                       offset,
++                                       T_BYTE);
++      BasicTypeList signature(3);
++      signature.append(T_INT);
++      signature.append(T_ADDRESS);
++      signature.append(T_INT);
++      CallingConvention* cc = frame_map()->c_calling_convention(&signature);
++      const LIR_Opr result_reg = result_register_for(x->type());
++
++      LIR_Opr addr = new_pointer_register();
++      __ leal(LIR_OprFact::address(a), addr);
++
++      crc.load_item_force(cc->at(0));
++      __ move(addr, cc->at(1));
++      __ move(len, cc->at(2));
++
++      __ call_runtime_leaf(StubRoutines::updateBytesCRC32C(), getThreadTemp(), result_reg, cc->args());
++      __ move(result_reg, result);
++
++      break;
++    }
++    default: {
++      ShouldNotReachHere();
++    }
++  }
++}
++
++void LIRGenerator::do_FmaIntrinsic(Intrinsic* x) {
++  assert(x->number_of_arguments() == 3, "wrong type");
++  assert(UseFMA, "Needs FMA instructions support.");
++  LIRItem value(x->argument_at(0), this);
++  LIRItem value1(x->argument_at(1), this);
++  LIRItem value2(x->argument_at(2), this);
++
++  value.load_item();
++  value1.load_item();
++  value2.load_item();
++
++  LIR_Opr calc_input = value.result();
++  LIR_Opr calc_input1 = value1.result();
++  LIR_Opr calc_input2 = value2.result();
++  LIR_Opr calc_result = rlock_result(x);
++
++  switch (x->id()) {
++  case vmIntrinsics::_fmaD:   __ fmad(calc_input, calc_input1, calc_input2, calc_result); break;
++  case vmIntrinsics::_fmaF:   __ fmaf(calc_input, calc_input1, calc_input2, calc_result); break;
++  default:                    ShouldNotReachHere();
++  }
++}
++
++void LIRGenerator::do_vectorizedMismatch(Intrinsic* x) {
++  fatal("vectorizedMismatch intrinsic is not implemented on this platform");
++}
++
++// _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f
++// _i2b, _i2c, _i2s
++void LIRGenerator::do_Convert(Convert* x) {
++  LIRItem value(x->value(), this);
++  value.load_item();
++  LIR_Opr input = value.result();
++  LIR_Opr result = rlock(x);
++
++  // arguments of lir_convert
++  LIR_Opr conv_input = input;
++  LIR_Opr conv_result = result;
++  ConversionStub* stub = NULL;
++
++  __ convert(x->op(), conv_input, conv_result);
++
++  assert(result->is_virtual(), "result must be virtual register");
++  set_result(x, result);
++}
++
++void LIRGenerator::do_NewInstance(NewInstance* x) {
++#ifndef PRODUCT
++  if (PrintNotLoaded && !x->klass()->is_loaded()) {
++    tty->print_cr("   ###class not loaded at new bci %d", x->printable_bci());
++  }
++#endif
++  CodeEmitInfo* info = state_for(x, x->state());
++  LIR_Opr reg = result_register_for(x->type());
++  new_instance(reg, x->klass(), x->is_unresolved(),
++                       FrameMap::r2_oop_opr,
++                       FrameMap::r5_oop_opr,
++                       FrameMap::r4_oop_opr,
++                       LIR_OprFact::illegalOpr,
++                       FrameMap::r3_metadata_opr, info);
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewTypeArray(NewTypeArray* x) {
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIRItem length(x->length(), this);
++  length.load_item_force(FrameMap::r19_opr);
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::r2_oop_opr;
++  LIR_Opr tmp2 = FrameMap::r4_oop_opr;
++  LIR_Opr tmp3 = FrameMap::r5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
++  LIR_Opr len = length.result();
++  BasicType elem_type = x->elt_type();
++
++  __ metadata2reg(ciTypeArrayKlass::make(elem_type)->constant_encoding(), klass_reg);
++
++  CodeStub* slow_path = new NewTypeArrayStub(klass_reg, len, reg, info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, elem_type, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_NewObjectArray(NewObjectArray* x) {
++  LIRItem length(x->length(), this);
++  // in case of patching (i.e., object class is not yet loaded), we need to reexecute the instruction
++  // and therefore provide the state before the parameters have been consumed
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info =  state_for(x, x->state_before());
++  }
++
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  LIR_Opr reg = result_register_for(x->type());
++  LIR_Opr tmp1 = FrameMap::r2_oop_opr;
++  LIR_Opr tmp2 = FrameMap::r4_oop_opr;
++  LIR_Opr tmp3 = FrameMap::r5_oop_opr;
++  LIR_Opr tmp4 = reg;
++  LIR_Opr klass_reg = FrameMap::r3_metadata_opr;
++
++  length.load_item_force(FrameMap::r19_opr);
++  LIR_Opr len = length.result();
++
++  CodeStub* slow_path = new NewObjectArrayStub(klass_reg, len, reg, info);
++  ciKlass* obj = (ciKlass*) ciObjArrayKlass::make(x->klass());
++  if (obj == ciEnv::unloaded_ciobjarrayklass()) {
++    BAILOUT("encountered unloaded_ciobjarrayklass due to out of memory error");
++  }
++  klass2reg_with_patching(klass_reg, obj, patching_info);
++  __ allocate_array(reg, len, tmp1, tmp2, tmp3, tmp4, T_OBJECT, klass_reg, slow_path);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++
++void LIRGenerator::do_NewMultiArray(NewMultiArray* x) {
++  Values* dims = x->dims();
++  int i = dims->length();
++  LIRItemList* items = new LIRItemList(i, i, NULL);
++  while (i-- > 0) {
++    LIRItem* size = new LIRItem(dims->at(i), this);
++    items->at_put(i, size);
++  }
++
++  // Evaluate state_for early since it may emit code.
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || PatchALot) {
++    patching_info = state_for(x, x->state_before());
++
++    // Cannot re-use same xhandlers for multiple CodeEmitInfos, so
++    // clone all handlers (NOTE: Usually this is handled transparently
++    // by the CodeEmitInfo cloning logic in CodeStub constructors but
++    // is done explicitly here because a stub isn't being used).
++    x->set_exception_handlers(new XHandlers(x->exception_handlers()));
++  }
++  CodeEmitInfo* info = state_for(x, x->state());
++
++  i = dims->length();
++  while (i-- > 0) {
++    LIRItem* size = items->at(i);
++    size->load_item();
++
++    store_stack_parameter(size->result(), in_ByteSize(i*4));
++  }
++
++  LIR_Opr klass_reg = FrameMap::r0_metadata_opr;
++  klass2reg_with_patching(klass_reg, x->klass(), patching_info);
++
++  LIR_Opr rank = FrameMap::r19_opr;
++  __ move(LIR_OprFact::intConst(x->rank()), rank);
++  LIR_Opr varargs = FrameMap::r2_opr;
++  __ move(FrameMap::sp_opr, varargs);
++  LIR_OprList* args = new LIR_OprList(3);
++  args->append(klass_reg);
++  args->append(rank);
++  args->append(varargs);
++  LIR_Opr reg = result_register_for(x->type());
++  __ call_runtime(Runtime1::entry_for(Runtime1::new_multi_array_id),
++                  LIR_OprFact::illegalOpr,
++                  reg, args, info);
++
++  LIR_Opr result = rlock_result(x);
++  __ move(reg, result);
++}
++
++void LIRGenerator::do_BlockBegin(BlockBegin* x) {
++  // nothing to do for now
++}
++
++void LIRGenerator::do_CheckCast(CheckCast* x) {
++  LIRItem obj(x->obj(), this);
++
++  CodeEmitInfo* patching_info = NULL;
++  if (!x->klass()->is_loaded() || (PatchALot && !x->is_incompatible_class_change_check() && !x->is_invokespecial_receiver_check())) {
++    // must do this before locking the destination register as an oop register,
++    // and before the obj is loaded (the latter is for deoptimization)
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++
++  // info for exceptions
++  CodeEmitInfo* info_for_exception =
++      (x->needs_exception_state() ? state_for(x) :
++                                    state_for(x, x->state_before(), true /*ignore_xhandler*/));
++
++  CodeStub* stub;
++  if (x->is_incompatible_class_change_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new SimpleExceptionStub(Runtime1::throw_incompatible_class_change_error_id, LIR_OprFact::illegalOpr, info_for_exception);
++  } else if (x->is_invokespecial_receiver_check()) {
++    assert(patching_info == NULL, "can't patch this");
++    stub = new DeoptimizeStub(info_for_exception,
++                              Deoptimization::Reason_class_check,
++                              Deoptimization::Action_none);
++  } else {
++    stub = new SimpleExceptionStub(Runtime1::throw_class_cast_exception_id, obj.result(), info_for_exception);
++  }
++  LIR_Opr reg = rlock_result(x);
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ checkcast(reg, obj.result(), x->klass(),
++               new_register(objectType), new_register(objectType), tmp3,
++               x->direct_compare(), info_for_exception, patching_info, stub,
++               x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_InstanceOf(InstanceOf* x) {
++  LIRItem obj(x->obj(), this);
++
++  // result and test object may not be in same register
++  LIR_Opr reg = rlock_result(x);
++  CodeEmitInfo* patching_info = NULL;
++  if ((!x->klass()->is_loaded() || PatchALot)) {
++    // must do this before locking the destination register as an oop register
++    patching_info = state_for(x, x->state_before());
++  }
++  obj.load_item();
++  LIR_Opr tmp3 = LIR_OprFact::illegalOpr;
++  if (!x->klass()->is_loaded() || UseCompressedClassPointers) {
++    tmp3 = new_register(objectType);
++  }
++  __ instanceof(reg, obj.result(), x->klass(),
++                new_register(objectType), new_register(objectType), tmp3,
++                x->direct_compare(), patching_info, x->profiled_method(), x->profiled_bci());
++}
++
++void LIRGenerator::do_If(If* x) {
++  assert(x->number_of_sux() == 2, "inconsistency");
++  ValueTag tag = x->x()->type()->tag();
++  bool is_safepoint = x->is_safepoint();
++
++  If::Condition cond = x->cond();
++
++  LIRItem xitem(x->x(), this);
++  LIRItem yitem(x->y(), this);
++  LIRItem* xin = &xitem;
++  LIRItem* yin = &yitem;
++
++  if (tag == longTag) {
++    // for longs, only conditions "eql", "neq", "lss", "geq" are valid;
++    // mirror for other conditions
++    if (cond == If::gtr || cond == If::leq) {
++      cond = Instruction::mirror(cond);
++      xin = &yitem;
++      yin = &xitem;
++    }
++    xin->set_destroys_register();
++  }
++  xin->load_item();
++
++  if (tag == longTag) {
++    if (yin->is_constant()
++        && Assembler::operand_valid_for_add_sub_immediate(yin->get_jlong_constant())) {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else if (tag == intTag) {
++    if (yin->is_constant()
++        && Assembler::operand_valid_for_add_sub_immediate(yin->get_jint_constant()))  {
++      yin->dont_load_item();
++    } else {
++      yin->load_item();
++    }
++  } else {
++    yin->load_item();
++  }
++
++  set_no_result(x);
++
++  LIR_Opr left = xin->result();
++  LIR_Opr right = yin->result();
++
++  // add safepoint before generating condition code so it can be recomputed
++  if (x->is_safepoint()) {
++    // increment backedge counter if needed
++    increment_backedge_counter_conditionally(lir_cond(cond), left, right, state_for(x, x->state_before()),
++        x->tsux()->bci(), x->fsux()->bci(), x->profiled_bci());
++    __ safepoint(LIR_OprFact::illegalOpr, state_for(x, x->state_before()));
++  }
++
++  __ cmp(lir_cond(cond), left, right);
++  // Generate branch profiling. Profiling code doesn't kill flags.
++  profile_branch(x, cond);
++  move_to_phi(x->state());
++  if (x->x()->type()->is_float_kind()) {
++    __ branch(lir_cond(cond), right->type(), x->tsux(), x->usux());
++  } else {
++    __ branch(lir_cond(cond), right->type(), x->tsux());
++  }
++  assert(x->default_sux() == x->fsux(), "wrong destination above");
++  __ jump(x->default_sux());
++}
++
++LIR_Opr LIRGenerator::getThreadPointer() {
++   return FrameMap::as_pointer_opr(rthread);
++}
++
++void LIRGenerator::trace_block_entry(BlockBegin* block) { Unimplemented(); }
++
++void LIRGenerator::volatile_field_store(LIR_Opr value, LIR_Address* address,
++                                        CodeEmitInfo* info) {
++  __ volatile_store_mem_reg(value, address, info);
++}
++
++void LIRGenerator::volatile_field_load(LIR_Address* address, LIR_Opr result,
++                                       CodeEmitInfo* info) {
++  // 8179954: We need to make sure that the code generated for
++  // volatile accesses forms a sequentially-consistent set of
++  // operations when combined with STLR and LDAR.  Without a leading
++  // membar it's possible for a simple Dekker test to fail if loads
++  // use LD;DMB but stores use STLR.  This can happen if C2 compiles
++  // the stores in one method and C1 compiles the loads in another.
++  if (! UseBarriersForVolatile) {
++    __ membar();
++  }
++
++  __ volatile_load_mem_reg(address, result, info);
++}
+diff --git a/src/hotspot/cpu/sw64/c1_LIR_sw64.cpp b/src/hotspot/cpu/sw64/c1_LIR_sw64.cpp
+new file mode 100644
+index 0000000000..ce75dc552a
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_LIR_sw64.cpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/register.hpp"
++#include "c1/c1_LIR.hpp"
++
++FloatRegister LIR_OprDesc::as_float_reg() const {
++  return as_FloatRegister(fpu_regnr());
++}
++
++FloatRegister LIR_OprDesc::as_double_reg() const {
++  return as_FloatRegister(fpu_regnrLo());
++}
++
++// Reg2 unused.
++LIR_Opr LIR_OprFact::double_fpu(int reg1, int reg2) {
++  assert(as_FloatRegister(reg2) == fnoreg, "Not used on this platform");
++  return (LIR_Opr)(intptr_t)((reg1 << LIR_OprDesc::reg1_shift) |
++                             (reg1 << LIR_OprDesc::reg2_shift) |
++                             LIR_OprDesc::double_type          |
++                             LIR_OprDesc::fpu_register         |
++                             LIR_OprDesc::double_size);
++}
++
++#ifndef PRODUCT
++void LIR_Address::verify() const {
++  assert(base()->is_cpu_register(), "wrong base operand");
++  assert(index()->is_illegal() || index()->is_double_cpu() || index()->is_single_cpu(), "wrong index operand");
++  assert(base()->type() == T_OBJECT || base()->type() == T_LONG || base()->type() == T_METADATA,
++         "wrong type for addresses");
++}
++#endif // PRODUCT
+diff --git a/src/hotspot/cpu/sw64/c1_LinearScan_sw64.cpp b/src/hotspot/cpu/sw64/c1_LinearScan_sw64.cpp
+new file mode 100644
+index 0000000000..7a8a304e8c
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_LinearScan_sw64.cpp
+@@ -0,0 +1,32 @@
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_Instruction.hpp"
++#include "c1/c1_LinearScan.hpp"
++#include "utilities/bitMap.inline.hpp"
++
++void LinearScan::allocate_fpu_stack() {
++  // No FPU stack on Sw64
++}
+diff --git a/src/hotspot/cpu/sw64/c1_LinearScan_sw64.hpp b/src/hotspot/cpu/sw64/c1_LinearScan_sw64.hpp
+new file mode 100644
+index 0000000000..27e357e4d4
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_LinearScan_sw64.hpp
+@@ -0,0 +1,76 @@
++/*
++ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C1_LINEARSCAN_HPP
++#define CPU_SW64_VM_C1_LINEARSCAN_HPP
++
++inline bool LinearScan::is_processed_reg_num(int reg_num) {
++  return reg_num <= FrameMap::last_cpu_reg() || reg_num >= pd_nof_cpu_regs_frame_map;
++}
++
++inline int LinearScan::num_physical_regs(BasicType type) {
++  return 1;
++}
++
++
++inline bool LinearScan::requires_adjacent_regs(BasicType type) {
++  return false;
++}
++
++inline bool LinearScan::is_caller_save(int assigned_reg) {
++  assert(assigned_reg >= 0 && assigned_reg < nof_regs, "should call this only for registers");
++  if (assigned_reg < pd_first_callee_saved_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_reg && assigned_reg < pd_first_callee_saved_fpu_reg)
++    return true;
++  if (assigned_reg > pd_last_callee_saved_fpu_reg && assigned_reg < pd_last_fpu_reg)
++    return true;
++  return false;
++}
++
++
++inline void LinearScan::pd_add_temps(LIR_Op* op) {
++  // FIXME ??
++}
++
++
++// Implementation of LinearScanWalker
++
++inline bool LinearScanWalker::pd_init_regs_for_alloc(Interval* cur) {
++  if (allocator()->gen()->is_vreg_flag_set(cur->reg_num(), LIRGenerator::callee_saved)) {
++    assert(cur->type() != T_FLOAT && cur->type() != T_DOUBLE, "cpu regs only");
++    _first_reg = pd_first_callee_saved_reg;
++    _last_reg = pd_last_callee_saved_reg;
++    return true;
++  } else if (cur->type() == T_INT || cur->type() == T_LONG || cur->type() == T_OBJECT || cur->type() == T_ADDRESS || cur->type() == T_METADATA) {
++    _first_reg = pd_first_cpu_reg;
++    _last_reg = pd_last_allocatable_cpu_reg;
++    return true;
++  }
++  return false;
++}
++
++
++#endif // CPU_SW64_VM_C1_LINEARSCAN_HPP
+diff --git a/src/hotspot/cpu/sw64/c1_MacroAssembler_sw64.cpp b/src/hotspot/cpu/sw64/c1_MacroAssembler_sw64.cpp
+new file mode 100644
+index 0000000000..fed63c27dd
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_MacroAssembler_sw64.cpp
+@@ -0,0 +1,399 @@
++/*
++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interpreter.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/os.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void C1_MacroAssembler::float_cmp(bool is_float, int unordered_result,
++                                  FloatRegister f0, FloatRegister f1,
++                                  Register result)
++{
++  Label done;
++  if (is_float) {
++    fcmps(f0, f1);
++  } else {
++    fcmpd(f0, f1);
++  }
++  if (unordered_result < 0) {
++    // we want -1 for unordered or less than, 0 for equal and 1 for
++    // greater than.
++    cset(result, NE);  // Not equal or unordered
++    cneg(result, result, LT);  // Less than or unordered
++  } else {
++    // we want -1 for less than, 0 for equal and 1 for unordered or
++    // greater than.
++    cset(result, NE);  // Not equal or unordered
++    cneg(result, result, LO);  // Less than
++  }
++}
++
++int C1_MacroAssembler::lock_object(Register hdr, Register obj, Register disp_hdr, Register scratch, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  Label done, fail;
++  int null_check_offset = -1;
++
++  verify_oop(obj);
++
++  // save object being locked into the BasicObjectLock
++  str(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++
++  if (UseBiasedLocking) {
++    assert(scratch != noreg, "should have scratch register at this point");
++    null_check_offset = biased_locking_enter(disp_hdr, obj, hdr, scratch, false, done, &slow_case);
++  } else {
++    null_check_offset = offset();
++  }
++
++  // Load object header
++  ldr(hdr, Address(obj, hdr_offset));
++  // and mark it as unlocked
++  orr(hdr, hdr, markOopDesc::unlocked_value);
++  // save unlocked object header into the displaced header location on the stack
++  str(hdr, Address(disp_hdr, 0));
++  // test if object header is still the same (i.e. unlocked), and if so, store the
++  // displaced header address in the object header - if it is not the same, get the
++  // object header instead
++  lea(rscratch2, Address(obj, hdr_offset));
++  cmpxchgptr(hdr, disp_hdr, rscratch2, rscratch1, done, /*fallthough*/NULL);
++  // if the object header was the same, we're done
++  // if the object header was not the same, it is now in the hdr register
++  // => test if it is a stack pointer into the same stack (recursive locking), i.e.:
++  //
++  // 1) (hdr & aligned_mask) == 0
++  // 2) sp <= hdr
++  // 3) hdr <= sp + page_size
++  //
++  // these 3 tests can be done by evaluating the following expression:
++  //
++  // (hdr - sp) & (aligned_mask - page_size)
++  //
++  // assuming both the stack pointer and page_size have their least
++  // significant 2 bits cleared and page_size is a power of 2
++  mov(rscratch1, sp);
++  sub(hdr, hdr, rscratch1);
++  ands(hdr, hdr, aligned_mask - os::vm_page_size());
++  // for recursive locking, the result is zero => save it in the displaced header
++  // location (NULL in the displaced hdr location indicates recursive locking)
++  str(hdr, Address(disp_hdr, 0));
++  // otherwise we don't care about the result and handle locking via runtime call
++  cbnz(hdr, slow_case);
++  // done
++  BIND(done);
++  if (PrintBiasedLockingStatistics) {
++    lea(rscratch2, ExternalAddress((address)BiasedLocking::fast_path_entry_count_addr()));
++    addmw(Address(rscratch2, 0), 1, rscratch1);
++  }
++  return null_check_offset;
++}
++
++
++void C1_MacroAssembler::unlock_object(Register hdr, Register obj, Register disp_hdr, Label& slow_case) {
++  const int aligned_mask = BytesPerWord -1;
++  const int hdr_offset = oopDesc::mark_offset_in_bytes();
++  assert(hdr != obj && hdr != disp_hdr && obj != disp_hdr, "registers must be different");
++  Label done;
++
++  if (UseBiasedLocking) {
++    // load object
++    ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++    biased_locking_exit(obj, hdr, done);
++  }
++
++  // load displaced header
++  ldr(hdr, Address(disp_hdr, 0));
++  // if the loaded hdr is NULL we had recursive locking
++  // if we had recursive locking, we are done
++  cbz(hdr, done);
++  if (!UseBiasedLocking) {
++    // load object
++    ldr(obj, Address(disp_hdr, BasicObjectLock::obj_offset_in_bytes()));
++  }
++  verify_oop(obj);
++  // test if object header is pointing to the displaced header, and if so, restore
++  // the displaced header in the object - if the object header is not pointing to
++  // the displaced header, get the object header instead
++  // if the object header was not pointing to the displaced header,
++  // we do unlocking via runtime call
++  if (hdr_offset) {
++    lea(rscratch1, Address(obj, hdr_offset));
++    cmpxchgptr(disp_hdr, hdr, rscratch1, rscratch2, done, &slow_case);
++  } else {
++    cmpxchgptr(disp_hdr, hdr, obj, rscratch2, done, &slow_case);
++  }
++  // done
++  BIND(done);
++}
++
++
++// Defines obj, preserves var_size_in_bytes
++void C1_MacroAssembler::try_allocate(Register obj, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, Label& slow_case) {
++  if (UseTLAB) {
++    tlab_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++  } else {
++    eden_allocate(obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++  }
++}
++
++void C1_MacroAssembler::initialize_header(Register obj, Register klass, Register len, Register t1, Register t2) {
++  assert_different_registers(obj, klass, len);
++  if (UseBiasedLocking && !len->is_valid()) {
++    assert_different_registers(obj, klass, len, t1, t2);
++    ldr(t1, Address(klass, Klass::prototype_header_offset()));
++  } else {
++    // This assumes that all prototype bits fit in an int32_t
++    mov(t1, (int32_t)(intptr_t)markOopDesc::prototype());
++  }
++  str(t1, Address(obj, oopDesc::mark_offset_in_bytes()));
++
++  if (UseCompressedClassPointers) { // Take care not to kill klass
++    encode_klass_not_null(t1, klass);
++    strw(t1, Address(obj, oopDesc::klass_offset_in_bytes()));
++  } else {
++    str(klass, Address(obj, oopDesc::klass_offset_in_bytes()));
++  }
++
++  if (len->is_valid()) {
++    strw(len, Address(obj, arrayOopDesc::length_offset_in_bytes()));
++  } else if (UseCompressedClassPointers) {
++    store_klass_gap(obj, zr);
++  }
++}
++
++// preserves obj, destroys len_in_bytes
++void C1_MacroAssembler::initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1) {
++  assert(hdr_size_in_bytes >= 0, "header size must be positive or 0");
++  Label done;
++
++  // len_in_bytes is positive and ptr sized
++  subs(len_in_bytes, len_in_bytes, hdr_size_in_bytes);
++  br(Assembler::EQ, done);
++
++  // Preserve obj
++  if (hdr_size_in_bytes)
++    add(obj, obj, hdr_size_in_bytes);
++  zero_memory(obj, len_in_bytes, t1);
++  if (hdr_size_in_bytes)
++    sub(obj, obj, hdr_size_in_bytes);
++
++  BIND(done);
++}
++
++
++void C1_MacroAssembler::allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case) {
++  assert_different_registers(obj, t1, t2); // XXX really?
++  assert(header_size >= 0 && object_size >= header_size, "illegal sizes");
++
++  try_allocate(obj, noreg, object_size * BytesPerWord, t1, t2, slow_case);
++
++  initialize_object(obj, klass, noreg, object_size * HeapWordSize, t1, t2, UseTLAB);
++}
++
++void C1_MacroAssembler::initialize_object(Register obj, Register klass, Register var_size_in_bytes, int con_size_in_bytes, Register t1, Register t2, bool is_tlab_allocated) {
++  assert((con_size_in_bytes & MinObjAlignmentInBytesMask) == 0,
++         "con_size_in_bytes is not multiple of alignment");
++  const int hdr_size_in_bytes = instanceOopDesc::header_size() * HeapWordSize;
++
++  initialize_header(obj, klass, noreg, t1, t2);
++
++  if (!(UseTLAB && ZeroTLAB && is_tlab_allocated)) {
++     // clear rest of allocated space
++     const Register index = t2;
++     const int threshold = 16 * BytesPerWord;   // approximate break even point for code size (see comments below)
++     if (var_size_in_bytes != noreg) {
++       mov(index, var_size_in_bytes);
++       initialize_body(obj, index, hdr_size_in_bytes, t1);
++     } else if (con_size_in_bytes <= threshold) {
++       // use explicit null stores
++       int i = hdr_size_in_bytes;
++       if (i < con_size_in_bytes && (con_size_in_bytes % (2 * BytesPerWord))) {
++         str(zr, Address(obj, i));
++         i += BytesPerWord;
++       }
++       for (; i < con_size_in_bytes; i += 2 * BytesPerWord)
++         stp(zr, zr, Address(obj, i));
++     } else if (con_size_in_bytes > hdr_size_in_bytes) {
++       block_comment("zero memory");
++      // use loop to null out the fields
++
++       int words = (con_size_in_bytes - hdr_size_in_bytes) / BytesPerWord;
++       mov(index,  words / 8);
++
++       const int unroll = 8; // Number of str(zr) instructions we'll unroll
++       int remainder = words % unroll;
++       lea(rscratch1, Address(obj, hdr_size_in_bytes + remainder * BytesPerWord));
++
++       Label entry_point, loop;
++       b(entry_point);
++
++       BIND(loop);
++       sub(index, index, 1);
++       for (int i = -unroll; i < 0; i++) {
++         if (-i == remainder)
++           BIND(entry_point);
++         str(zr, Address(rscratch1, i * wordSize));
++       }
++       if (remainder == 0)
++         BIND(entry_point);
++       add(rscratch1, rscratch1, unroll * wordSize);
++       cbnz(index, loop);
++
++     }
++  }
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == i0, "must be");
++    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
++  }
++
++  verify_oop(obj);
++}
++void C1_MacroAssembler::allocate_array(Register obj, Register len, Register t1, Register t2, int header_size, int f, Register klass, Label& slow_case) {
++  assert_different_registers(obj, len, t1, t2, klass);
++
++  // determine alignment mask
++  assert(!(BytesPerWord & 1), "must be a multiple of 2 for masking code to work");
++
++  // check for negative or excessive length
++  mov(rscratch1, (int32_t)max_array_allocation_length);
++  cmp(len, rscratch1);
++  br(Assembler::HS, slow_case);
++
++  const Register arr_size = t2; // okay to be the same
++  // align object end
++  mov(arr_size, (int32_t)header_size * BytesPerWord + MinObjAlignmentInBytesMask);
++  add(arr_size, arr_size, len, ext::uxtw, f);
++  andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
++
++  try_allocate(obj, arr_size, 0, t1, t2, slow_case);
++
++  initialize_header(obj, klass, len, t1, t2);
++
++  // clear rest of allocated space
++  const Register len_zero = len;
++  initialize_body(obj, arr_size, header_size * BytesPerWord, len_zero);
++
++  membar(StoreStore);
++
++  if (CURRENT_ENV->dtrace_alloc_probes()) {
++    assert(obj == i0, "must be");
++    far_call(RuntimeAddress(Runtime1::entry_for(Runtime1::dtrace_object_alloc_id)));
++  }
++
++  verify_oop(obj);
++}
++
++
++void C1_MacroAssembler::inline_cache_check(Register receiver, Register iCache) {
++  verify_oop(receiver);
++  // explicit NULL check not needed since load from [klass_offset] causes a trap
++  // check against inline cache
++  assert(!MacroAssembler::needs_explicit_null_check(oopDesc::klass_offset_in_bytes()), "must add explicit null check");
++
++  cmp_klass(receiver, iCache, rscratch1);
++}
++
++
++void C1_MacroAssembler::build_frame(int framesize, int bang_size_in_bytes) {
++  // If we have to make this method not-entrant we'll overwrite its
++  // first instruction with a jump.  For this action to be legal we
++  // must ensure that this first instruction is a B, BL, NOP, BKPT,
++  // SVC, HVC, or SMC.  Make it a NOP.
++  nop();
++  assert(bang_size_in_bytes >= framesize, "stack bang size incorrect");
++  // Make sure there is enough stack space for this method's activation.
++  // Note that we do this before doing an enter().
++  generate_stack_overflow_check(bang_size_in_bytes);
++  MacroAssembler::build_frame(framesize + 2 * wordSize);
++}
++
++void C1_MacroAssembler::remove_frame(int framesize) {
++  MacroAssembler::remove_frame(framesize + 2 * wordSize);
++}
++
++
++void C1_MacroAssembler::verified_entry() {
++}
++
++void C1_MacroAssembler::load_parameter(int offset_in_words, Register reg) {
++  // rbp, + 0: link
++  //     + 1: return address
++  //     + 2: argument with offset 0
++  //     + 3: argument with offset 1
++  //     + 4: ...
++
++  ldr(reg, Address(rfp, (offset_in_words + 2) * BytesPerWord));
++}
++
++#ifndef PRODUCT
++
++void C1_MacroAssembler::verify_stack_oop(int stack_offset) {
++  if (!VerifyOops) return;
++  verify_oop_addr(Address(sp, stack_offset), "oop");
++}
++
++void C1_MacroAssembler::verify_not_null_oop(Register r) {
++  if (!VerifyOops) return;
++  Label not_null;
++  cbnz(r, not_null);
++  stop("non-null oop required");
++  BIND(not_null);
++  verify_oop(r);
++}
++
++void C1_MacroAssembler::invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) {
++#ifdef ASSERT
++  static int nn;
++  if (inv_r0) mov(i0, 0xDEAD);
++  if (inv_r19) mov(i19, 0xDEAD);
++  if (inv_r2) mov(i2, nn++);
++  if (inv_r3) mov(i3, 0xDEAD);
++  if (inv_r4) mov(i4, 0xDEAD);
++  if (inv_r5) mov(i5, 0xDEAD);
++#endif
++}
++#endif // ifndef PRODUCT
+diff --git a/src/hotspot/cpu/sw64/c1_MacroAssembler_sw64.hpp b/src/hotspot/cpu/sw64/c1_MacroAssembler_sw64.hpp
+new file mode 100644
+index 0000000000..98cd405034
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_MacroAssembler_sw64.hpp
+@@ -0,0 +1,114 @@
++/*
++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C1_MACROASSEMBLER_SW64_HPP
++#define CPU_SW64_VM_C1_MACROASSEMBLER_SW64_HPP
++
++using MacroAssembler::build_frame;
++using MacroAssembler::null_check;
++
++// C1_MacroAssembler contains high-level macros for C1
++
++ private:
++  int _rsp_offset;    // track rsp changes
++  // initialization
++  void pd_init() { _rsp_offset = 0; }
++
++
++ public:
++  void try_allocate(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++
++  void initialize_header(Register obj, Register klass, Register len, Register t1, Register t2);
++  void initialize_body(Register obj, Register len_in_bytes, int hdr_size_in_bytes, Register t1);
++
++  void float_cmp(bool is_float, int unordered_result,
++                 FloatRegister f0, FloatRegister f1,
++                 Register result);
++
++  // locking
++  // hdr     : must be r0, contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must point to the displaced header location, contents preserved
++  // scratch : scratch register, contents destroyed
++  // returns code offset at which to add null check debug information
++  int lock_object  (Register swap, Register obj, Register disp_hdr, Register scratch, Label& slow_case);
++
++  // unlocking
++  // hdr     : contents destroyed
++  // obj     : must point to the object to lock, contents preserved
++  // disp_hdr: must be r0 & must point to the displaced header location, contents destroyed
++  void unlock_object(Register swap, Register obj, Register lock, Label& slow_case);
++
++  void initialize_object(
++    Register obj,                      // result: pointer to object after successful allocation
++    Register klass,                    // object klass
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                        // temp register
++    bool     is_tlab_allocated         // the object was allocated in a TLAB; relevant for the implementation of ZeroTLAB
++  );
++
++  // allocation of fixed-size objects
++  // (can also be used to allocate fixed-size arrays, by setting
++  // hdr_size correctly and storing the array length afterwards)
++  // obj        : will contain pointer to allocated object
++  // t1, t2     : scratch registers - contents destroyed
++  // header_size: size of object header in words
++  // object_size: total size of object in words
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_object(Register obj, Register t1, Register t2, int header_size, int object_size, Register klass, Label& slow_case);
++
++  enum {
++    max_array_allocation_length = 0x00FFFFFF
++  };
++
++  // allocation of arrays
++  // obj        : will contain pointer to allocated object
++  // len        : array length in number of elements
++  // t          : scratch register - contents destroyed
++  // header_size: size of object header in words
++  // f          : element scale factor
++  // slow_case  : exit to slow case implementation if fast allocation fails
++  void allocate_array(Register obj, Register len, Register t, Register t2, int header_size, int f, Register klass, Label& slow_case);
++
++  int  rsp_offset() const { return _rsp_offset; }
++  void set_rsp_offset(int n) { _rsp_offset = n; }
++
++  void invalidate_registers(bool inv_r0, bool inv_r19, bool inv_r2, bool inv_r3, bool inv_r4, bool inv_r5) PRODUCT_RETURN;
++
++  // This platform only uses signal-based null checks. The Label is not needed.
++  void null_check(Register r, Label *Lnull = NULL) { MacroAssembler::null_check(r); }
++
++  void load_parameter(int offset_in_words, Register reg);
++
++#endif // CPU_SW64_VM_C1_MACROASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/c1_Runtime1_sw64.cpp b/src/hotspot/cpu/sw64/c1_Runtime1_sw64.cpp
+new file mode 100644
+index 0000000000..212f3251c8
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_Runtime1_sw64.cpp
+@@ -0,0 +1,1157 @@
++/*
++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "c1/c1_CodeStubs.hpp"
++#include "c1/c1_Defs.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "c1/c1_Runtime1.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_sw64.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "register_sw64.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/vframe.hpp"
++#include "runtime/vframeArray.hpp"
++#include "vmreg_sw64.inline.hpp"
++
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++// Implementation of StubAssembler
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, int args_size) {
++  // setup registers
++  assert(!(oop_result1->is_valid() || metadata_result->is_valid()) || oop_result1 != metadata_result, "registers must be different");
++  assert(oop_result1 != rthread && metadata_result != rthread, "registers must be different");
++  assert(args_size >= 0, "illegal args_size");
++  bool align_stack = false;
++
++  mov(c_rarg0, rthread);
++  set_num_rt_args(0); // Nothing on stack
++
++  Label retaddr;
++  set_last_Java_frame(sp, rfp, retaddr, rscratch1);
++
++  // do the call
++  lea(rscratch1, RuntimeAddress(entry));
++  blr(rscratch1);
++  BIND(retaddr);
++  int call_offset = offset();
++  // verify callee-saved register
++#ifdef ASSERT
++  push(i0, sp);
++  { Label L;
++    get_thread(i0);
++    cmp(rthread, i0);
++    br(Assembler::EQ, L);
++    stop("StubAssembler::call_RT: rthread not callee saved?");
++    BIND(L);
++  }
++  pop(i0, sp);
++#endif
++  reset_last_Java_frame(true);
++  maybe_isb();
++
++  // check for pending exceptions
++  { Label L;
++    // check for pending exceptions (java_thread is set upon return)
++    ldr(rscratch1, Address(rthread, in_bytes(Thread::pending_exception_offset())));
++    cbz(rscratch1, L);
++    // exception pending => remove activation and forward to exception handler
++    // make sure that the vm_results are cleared
++    if (oop_result1->is_valid()) {
++      str(zr, Address(rthread, JavaThread::vm_result_offset()));
++    }
++    if (metadata_result->is_valid()) {
++      str(zr, Address(rthread, JavaThread::vm_result_2_offset()));
++    }
++    if (frame_size() == no_frame_size) {
++      leave();
++      far_jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++    } else if (_stub_id == Runtime1::forward_exception_id) {
++      should_not_reach_here();
++    } else {
++      far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
++    }
++    BIND(L);
++  }
++  // get oop results if there are any and reset the values in the thread
++  if (oop_result1->is_valid()) {
++    get_vm_result(oop_result1, rthread);
++  }
++  if (metadata_result->is_valid()) {
++    get_vm_result_2(metadata_result, rthread);
++  }
++  return call_offset;
++}
++
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1) {
++  mov(c_rarg1, arg1);
++  return call_RT(oop_result1, metadata_result, entry, 1);
++}
++
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2) {
++  if (c_rarg1 == arg2) {
++    if (c_rarg2 == arg1) {
++      mov(rscratch1, arg1);
++      mov(arg1, arg2);
++      mov(arg2, rscratch1);
++    } else {
++      mov(c_rarg2, arg2);
++      mov(c_rarg1, arg1);
++    }
++  } else {
++    mov(c_rarg1, arg1);
++    mov(c_rarg2, arg2);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 2);
++}
++
++
++int StubAssembler::call_RT(Register oop_result1, Register metadata_result, address entry, Register arg1, Register arg2, Register arg3) {
++  // if there is any conflict use the stack
++  if (arg1 == c_rarg2 || arg1 == c_rarg3 ||
++      arg2 == c_rarg1 || arg1 == c_rarg3 ||
++      arg3 == c_rarg1 || arg1 == c_rarg2) {
++    stp(arg3, arg2, Address(pre(sp, 2 * wordSize)));
++    stp(arg1, zr, Address(pre(sp, -2 * wordSize)));
++    ldp(c_rarg1, zr, Address(post(sp, 2 * wordSize)));
++    ldp(c_rarg3, c_rarg2, Address(post(sp, 2 * wordSize)));
++  } else {
++    mov(c_rarg1, arg1);
++    mov(c_rarg2, arg2);
++    mov(c_rarg3, arg3);
++  }
++  return call_RT(oop_result1, metadata_result, entry, 3);
++}
++
++// Implementation of StubFrame
++
++class StubFrame: public StackObj {
++ private:
++  StubAssembler* _sasm;
++
++ public:
++  StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments);
++  void load_argument(int offset_in_words, Register reg);
++
++  ~StubFrame();
++};;
++
++void StubAssembler::prologue(const char* name, bool must_gc_arguments) {
++  set_info(name, must_gc_arguments);
++  enter();
++}
++
++void StubAssembler::epilogue() {
++  leave();
++  ret(lr);
++}
++#undef BLOCK_COMMENT
++#undef BIND
++#define __ _sasm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++StubFrame::StubFrame(StubAssembler* sasm, const char* name, bool must_gc_arguments) {
++  _sasm = sasm;
++  __ prologue(name, must_gc_arguments);
++}
++
++// load parameters that were stored with LIR_Assembler::store_parameter
++// Note: offsets for store_parameter and load_argument must match
++void StubFrame::load_argument(int offset_in_words, Register reg) {
++  __ load_parameter(offset_in_words, reg);
++}
++
++
++StubFrame::~StubFrame() {
++  __ epilogue();
++}
++
++#undef __
++
++
++// Implementation of Runtime1
++
++#define __ sasm->
++
++const int float_regs_as_doubles_size_in_slots = pd_nof_fpu_regs_frame_map * 2;
++
++// Stack layout for saving/restoring  all the registers needed during a runtime
++// call (this includes deoptimization)
++// Note: note that users of this frame may well have arguments to some runtime
++// while these values are on the stack. These positions neglect those arguments
++// but the code in save_live_registers will take the argument count into
++// account.
++//
++
++enum reg_save_layout {
++  reg_save_frame_size = 32 /* float */ + 32 /* integer */
++};
++
++// Save off registers which might be killed by calls into the runtime.
++// Tries to smart of about FP registers.  In particular we separate
++// saving and describing the FPU registers for deoptimization since we
++// have to save the FPU registers twice if we describe them.  The
++// deopt blob is the only thing which needs to describe FPU registers.
++// In all other cases it should be sufficient to simply save their
++// current value.
++
++static int cpu_reg_save_offsets[FrameMap::nof_cpu_regs];
++static int fpu_reg_save_offsets[FrameMap::nof_fpu_regs];
++static int reg_save_size_in_words;
++static int frame_size_in_bytes = -1;
++
++static OopMap* generate_oop_map(StubAssembler* sasm, bool save_fpu_registers) {
++  int frame_size_in_bytes = reg_save_frame_size * BytesPerWord;
++  sasm->set_frame_size(frame_size_in_bytes / BytesPerWord);
++  int frame_size_in_slots = frame_size_in_bytes / sizeof(jint);
++  OopMap* oop_map = new OopMap(frame_size_in_slots, 0);
++
++  for (int i = 0; i < FrameMap::nof_cpu_regs; i++) {
++    Register r = as_Register(i);
++    if (i <= 18 && i != rscratch1->encoding() && i != rscratch2->encoding()) {
++      int sp_offset = cpu_reg_save_offsets[i];
++      oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
++                                r->as_VMReg());
++    }
++  }
++
++  if (save_fpu_registers) {
++    for (int i = 0; i < FrameMap::nof_fpu_regs; i++) {
++      FloatRegister r = as_FloatRegister(i);
++      {
++        int sp_offset = fpu_reg_save_offsets[i];
++        oop_map->set_callee_saved(VMRegImpl::stack2reg(sp_offset),
++                                  r->as_VMReg());
++      }
++    }
++  }
++  return oop_map;
++}
++
++static OopMap* save_live_registers(StubAssembler* sasm,
++                                   bool save_fpu_registers = true) {
++  __ block_comment("save_live_registers");
++
++  __ push(RegSet::range(i0, i29), sp);         // integer registers except lr & sp
++
++  if (save_fpu_registers) {
++    for (int i = 31; i>= 0; i -= 4) {
++      __ sub(sp, sp, 4 * wordSize); // no pre-increment for st1. Emulate it without modifying other registers
++      __ st1(as_FloatRegister(i-3), as_FloatRegister(i-2), as_FloatRegister(i-1),
++          as_FloatRegister(i), __ T1D, Address(sp));
++    }
++  } else {
++    __ add(sp, sp, -32 * wordSize);
++  }
++
++  return generate_oop_map(sasm, save_fpu_registers);
++}
++
++static void restore_live_registers(StubAssembler* sasm, bool restore_fpu_registers = true) {
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i += 4)
++      __ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
++          as_FloatRegister(i+3), __ T1D, Address(__ post(sp, 4 * wordSize)));
++  } else {
++    __ add(sp, sp, 32 * wordSize);
++  }
++
++  __ pop(RegSet::range(i0, i29), sp);
++}
++
++static void restore_live_registers_except_r0(StubAssembler* sasm, bool restore_fpu_registers = true)  {
++
++  if (restore_fpu_registers) {
++    for (int i = 0; i < 32; i += 4)
++      __ ld1(as_FloatRegister(i), as_FloatRegister(i+1), as_FloatRegister(i+2),
++          as_FloatRegister(i+3), __ T1D, Address(__ post(sp, 4 * wordSize)));
++  } else {
++    __ add(sp, sp, 32 * wordSize);
++  }
++
++  __ ldp(zr, i1, Address(__ post(sp, 16)));
++  __ pop(RegSet::range(i2, i29), sp);
++}
++
++
++
++void Runtime1::initialize_pd() {
++  int i;
++  int sp_offset = 0;
++
++  // all float registers are saved explicitly
++  assert(FrameMap::nof_fpu_regs == 32, "double registers not handled here");
++  for (i = 0; i < FrameMap::nof_fpu_regs; i++) {
++    fpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2;   // SP offsets are in halfwords
++  }
++
++  for (i = 0; i < FrameMap::nof_cpu_regs; i++) {
++    Register r = as_Register(i);
++    cpu_reg_save_offsets[i] = sp_offset;
++    sp_offset += 2;   // SP offsets are in halfwords
++  }
++}
++
++
++// target: the entry point of the method that creates and posts the exception oop
++// has_argument: true if the exception needs arguments (passed in rscratch1 and rscratch2)
++
++OopMapSet* Runtime1::generate_exception_throw(StubAssembler* sasm, address target, bool has_argument) {
++  // make a frame and preserve the caller's caller-save registers
++  OopMap* oop_map = save_live_registers(sasm);
++  int call_offset;
++  if (!has_argument) {
++    call_offset = __ call_RT(noreg, noreg, target);
++  } else {
++    __ mov(c_rarg1, rscratch1);
++    __ mov(c_rarg2, rscratch2);
++    call_offset = __ call_RT(noreg, noreg, target);
++  }
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(call_offset, oop_map);
++
++  __ should_not_reach_here();
++  return oop_maps;
++}
++
++
++OopMapSet* Runtime1::generate_handle_exception(StubID id, StubAssembler *sasm) {
++  __ block_comment("generate_handle_exception");
++
++  // incoming parameters
++  const Register exception_oop = i0;
++  const Register exception_pc  = i3;
++  // other registers used in this stub
++
++  // Save registers, if required.
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* oop_map = NULL;
++  switch (id) {
++  case forward_exception_id:
++    // We're handling an exception in the context of a compiled frame.
++    // The registers have been saved in the standard places.  Perform
++    // an exception lookup in the caller and dispatch to the handler
++    // if found.  Otherwise unwind and dispatch to the callers
++    // exception handler.
++    oop_map = generate_oop_map(sasm, 1 /*thread*/);
++
++    // load and clear pending exception oop into i0
++    __ ldr(exception_oop, Address(rthread, Thread::pending_exception_offset()));
++    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
++
++    // load issuing PC (the return address for this stub) into i3
++    __ ldr(exception_pc, Address(rfp, 1*BytesPerWord));
++
++    // make sure that the vm_results are cleared (may be unnecessary)
++    __ str(zr, Address(rthread, JavaThread::vm_result_offset()));
++    __ str(zr, Address(rthread, JavaThread::vm_result_2_offset()));
++    break;
++  case handle_exception_nofpu_id:
++  case handle_exception_id:
++    // At this point all registers MAY be live.
++    oop_map = save_live_registers(sasm, id != handle_exception_nofpu_id);
++    break;
++  case handle_exception_from_callee_id: {
++    // At this point all registers except exception oop (i0) and
++    // exception pc (lr) are dead.
++    const int frame_size = 2 /*fp, return address*/;
++    oop_map = new OopMap(frame_size * VMRegImpl::slots_per_word, 0);
++    sasm->set_frame_size(frame_size);
++    break;
++  }
++  default:
++    __ should_not_reach_here();
++    break;
++  }
++
++  // verify that only i0 and i3 are valid at this time
++  __ invalidate_registers(false, true, true, false, true, true);
++  // verify that i0 contains a valid exception
++  __ verify_not_null_oop(exception_oop);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are
++  // empty before writing to them
++  Label oop_empty;
++  __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
++  __ cbz(rscratch1, oop_empty);
++  __ stop("exception oop already set");
++  __ BIND(oop_empty);
++
++  Label pc_empty;
++  __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
++  __ cbz(rscratch1, pc_empty);
++  __ stop("exception pc already set");
++  __ BIND(pc_empty);
++#endif
++
++  // save exception oop and issuing pc into JavaThread
++  // (exception handler will load it from here)
++  __ str(exception_oop, Address(rthread, JavaThread::exception_oop_offset()));
++  __ str(exception_pc, Address(rthread, JavaThread::exception_pc_offset()));
++
++  // patch throwing pc into return address (has bci & oop map)
++  __ str(exception_pc, Address(rfp, 1*BytesPerWord));
++
++  // compute the exception handler.
++  // the exception oop and the throwing pc are read from the fields in JavaThread
++  int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, exception_handler_for_pc));
++  oop_maps->add_gc_map(call_offset, oop_map);
++
++  // i0: handler address
++  //      will be the deopt blob if nmethod was deoptimized while we looked up
++  //      handler regardless of whether handler existed in the nmethod.
++
++  // only i0 is valid at this time, all other registers have been destroyed by the runtime call
++  __ invalidate_registers(false, true, true, true, true, true);
++
++  // patch the return address, this stub will directly return to the exception handler
++  __ str(i0, Address(rfp, 1*BytesPerWord));
++
++  switch (id) {
++  case forward_exception_id:
++  case handle_exception_nofpu_id:
++  case handle_exception_id:
++    // Restore the registers that were saved at the beginning.
++    restore_live_registers(sasm, id != handle_exception_nofpu_id);
++    break;
++  case handle_exception_from_callee_id:
++    // Pop the return address.
++    __ leave();
++    __ ret(lr);  // jump to exception handler
++    break;
++  default:  ShouldNotReachHere();
++  }
++
++  return oop_maps;
++}
++
++
++void Runtime1::generate_unwind_exception(StubAssembler *sasm) {
++  // incoming parameters
++  const Register exception_oop = i0;
++  // callee-saved copy of exception_oop during runtime call
++  const Register exception_oop_callee_saved = i19;
++  // other registers used in this stub
++  const Register exception_pc = i3;
++  const Register handler_addr = i1;
++
++  // verify that only i0, is valid at this time
++  __ invalidate_registers(false, true, true, true, true, true);
++
++#ifdef ASSERT
++  // check that fields in JavaThread for exception oop and issuing pc are empty
++  Label oop_empty;
++  __ ldr(rscratch1, Address(rthread, JavaThread::exception_oop_offset()));
++  __ cbz(rscratch1, oop_empty);
++  __ stop("exception oop must be empty");
++  __ BIND(oop_empty);
++
++  Label pc_empty;
++  __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
++  __ cbz(rscratch1, pc_empty);
++  __ stop("exception pc must be empty");
++  __ BIND(pc_empty);
++#endif
++
++  // Save our return address because
++  // exception_handler_for_return_address will destroy it.  We also
++  // save exception_oop
++  __ stp(lr, exception_oop, Address(__ pre(sp, -2 * wordSize)));
++
++  // search the exception handler address of the caller (using the return address)
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::exception_handler_for_return_address), rthread, lr);
++  // i0: exception handler address of the caller
++
++  // Only R0 is valid at this time; all other registers have been
++  // destroyed by the call.
++  __ invalidate_registers(false, true, true, true, false, true);
++
++  // move result of call into correct register
++  __ mov(handler_addr, i0);
++
++  // get throwing pc (= return address).
++  // lr has been destroyed by the call
++  __ ldp(lr, exception_oop, Address(__ post(sp, 2 * wordSize)));
++  __ mov(i3, lr);
++
++  __ verify_not_null_oop(exception_oop);
++
++  // continue at exception handler (return address removed)
++  // note: do *not* remove arguments when unwinding the
++  //       activation since the caller assumes having
++  //       all arguments on the stack when entering the
++  //       runtime to determine the exception handler
++  //       (GC happens at call site with arguments!)
++  // i0: exception oop
++  // i3: throwing pc
++  // i1: exception handler
++  __ br(handler_addr);
++}
++
++
++
++OopMapSet* Runtime1::generate_patching(StubAssembler* sasm, address target) {
++  // use the maximum number of runtime-arguments here because it is difficult to
++  // distinguish each RT-Call.
++  // Note: This number affects also the RT-Call in generate_handle_exception because
++  //       the oop-map is shared for all calls.
++  DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++  assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++  OopMap* oop_map = save_live_registers(sasm);
++
++  __ mov(c_rarg0, rthread);
++  Label retaddr;
++  __ set_last_Java_frame(sp, rfp, retaddr, rscratch1);
++  // do the call
++  __ lea(rscratch1, RuntimeAddress(target));
++  __ blr(rscratch1);
++  __ BIND(retaddr);
++  OopMapSet* oop_maps = new OopMapSet();
++  oop_maps->add_gc_map(__ offset(), oop_map);
++  // verify callee-saved register
++#ifdef ASSERT
++  { Label L;
++    __ get_thread(rscratch1);
++    __ cmp(rthread, rscratch1);
++    __ br(Assembler::EQ, L);
++    __ stop("StubAssembler::call_RT: rthread not callee saved?");
++    __ BIND(L);
++  }
++#endif
++  __ reset_last_Java_frame(true);
++  __ maybe_isb();
++
++  // check for pending exceptions
++  { Label L;
++    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
++    __ cbz(rscratch1, L);
++    // exception pending => remove activation and forward to exception handler
++
++    { Label L1;
++      __ cbnz(i0, L1);                                  // have we deoptimized?
++      __ far_jump(RuntimeAddress(Runtime1::entry_for(Runtime1::forward_exception_id)));
++      __ BIND(L1);
++    }
++
++    // the deopt blob expects exceptions in the special fields of
++    // JavaThread, so copy and clear pending exception.
++
++    // load and clear pending exception
++    __ ldr(i0, Address(rthread, Thread::pending_exception_offset()));
++    __ str(zr, Address(rthread, Thread::pending_exception_offset()));
++
++    // check that there is really a valid exception
++    __ verify_not_null_oop(i0);
++
++    // load throwing pc: this is the return address of the stub
++    __ mov(i3, lr);
++
++#ifdef ASSERT
++    // check that fields in JavaThread for exception oop and issuing pc are empty
++    Label oop_empty;
++    __ ldr(rscratch1, Address(rthread, Thread::pending_exception_offset()));
++    __ cbz(rscratch1, oop_empty);
++    __ stop("exception oop must be empty");
++    __ BIND(oop_empty);
++
++    Label pc_empty;
++    __ ldr(rscratch1, Address(rthread, JavaThread::exception_pc_offset()));
++    __ cbz(rscratch1, pc_empty);
++    __ stop("exception pc must be empty");
++    __ BIND(pc_empty);
++#endif
++
++    // store exception oop and throwing pc to JavaThread
++    __ str(i0, Address(rthread, JavaThread::exception_oop_offset()));
++    __ str(i3, Address(rthread, JavaThread::exception_pc_offset()));
++
++    restore_live_registers(sasm);
++
++    __ leave();
++
++    // Forward the exception directly to deopt blob. We can blow no
++    // registers and must leave throwing pc on the stack.  A patch may
++    // have values live in registers so the entry point with the
++    // exception in tls.
++    __ far_jump(RuntimeAddress(deopt_blob->unpack_with_exception_in_tls()));
++
++    __ BIND(L);
++  }
++
++
++  // Runtime will return true if the nmethod has been deoptimized during
++  // the patching process. In that case we must do a deopt reexecute instead.
++
++  Label reexecuteEntry, cont;
++
++  __ cbz(i0, cont);                                 // have we deoptimized?
++
++  // Will reexecute. Proper return address is already on the stack we just restore
++  // registers, pop all of our frame but the return address and jump to the deopt blob
++  restore_live_registers(sasm);
++  __ leave();
++  __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
++
++  __ BIND(cont);
++  restore_live_registers(sasm);
++  __ leave();
++  __ ret(lr);
++
++  return oop_maps;
++}
++
++
++OopMapSet* Runtime1::generate_code_for(StubID id, StubAssembler* sasm) {
++
++  const Register exception_oop = i0;
++  const Register exception_pc  = i3;
++
++  // for better readability
++  const bool must_gc_arguments = true;
++  const bool dont_gc_arguments = false;
++
++  // default value; overwritten for some optimized stubs that are called from methods that do not use the fpu
++  bool save_fpu_registers = true;
++
++  // stub code & info for the different stubs
++  OopMapSet* oop_maps = NULL;
++  OopMap* oop_map = NULL;
++  switch (id) {
++    {
++    case forward_exception_id:
++      {
++        oop_maps = generate_handle_exception(id, sasm);
++        __ leave();
++        __ ret(lr);
++      }
++      break;
++
++    case throw_div0_exception_id:
++      { StubFrame f(sasm, "throw_div0_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_div0_exception), false);
++      }
++      break;
++
++    case throw_null_pointer_exception_id:
++      { StubFrame f(sasm, "throw_null_pointer_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_null_pointer_exception), false);
++      }
++      break;
++
++    case new_instance_id:
++    case fast_new_instance_id:
++    case fast_new_instance_init_check_id:
++      {
++        Register klass = i3; // Incoming
++        Register obj   = i0; // Result
++
++        if (id == new_instance_id) {
++          __ set_info("new_instance", dont_gc_arguments);
++        } else if (id == fast_new_instance_id) {
++          __ set_info("fast new_instance", dont_gc_arguments);
++        } else {
++          assert(id == fast_new_instance_init_check_id, "bad StubID");
++          __ set_info("fast new_instance init check", dont_gc_arguments);
++        }
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if ((id == fast_new_instance_id || id == fast_new_instance_init_check_id) &&
++            !UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Label slow_path;
++          Register obj_size = i2;
++          Register t1       = i19;
++          Register t2       = i4;
++          assert_different_registers(klass, obj, obj_size, t1, t2);
++
++          __ stp(i19, zr, Address(__ pre(sp, -2 * wordSize)));
++
++          if (id == fast_new_instance_init_check_id) {
++            // make sure the klass is initialized
++            __ ldrb(rscratch1, Address(klass, InstanceKlass::init_state_offset()));
++            __ cmpw(rscratch1, InstanceKlass::fully_initialized);
++            __ br(Assembler::NE, slow_path);
++          }
++
++#ifdef ASSERT
++          // assert object can be fast path allocated
++          {
++            Label ok, not_ok;
++            __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
++            __ cmp(obj_size, 0u);
++            __ br(Assembler::LE, not_ok);  // make sure it's an instance (LH > 0)
++            __ tstw(obj_size, Klass::_lh_instance_slow_path_bit);
++            __ br(Assembler::EQ, ok);
++            __ BIND(not_ok);
++            __ stop("assert(can be fast path allocated)");
++            __ should_not_reach_here();
++            __ BIND(ok);
++          }
++#endif // ASSERT
++
++          // get the instance size (size is postive so movl is fine for 64bit)
++          __ ldrw(obj_size, Address(klass, Klass::layout_helper_offset()));
++
++          __ eden_allocate(obj, obj_size, 0, t1, slow_path);
++
++          __ initialize_object(obj, klass, obj_size, 0, t1, t2, /* is_tlab_allocated */ false);
++          __ verify_oop(obj);
++          __ ldp(i19, zr, Address(__ post(sp, 2 * wordSize)));
++          __ ret(lr);
++
++          __ BIND(slow_path);
++          __ ldp(i19, zr, Address(__ post(sp, 2 * wordSize)));
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_instance), klass);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_r0(sasm);
++        __ verify_oop(obj);
++        __ leave();
++        __ ret(lr);
++
++        // i0,: new instance
++      }
++
++      break;
++
++    case counter_overflow_id:
++      {
++        Register bci = i0, method = i1;
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        // Retrieve bci
++        __ ldrw(bci, Address(rfp, 2*BytesPerWord));
++        // And a pointer to the Method*
++        __ ldr(method, Address(rfp, 3*BytesPerWord));
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, counter_overflow), bci, method);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        __ ret(lr);
++      }
++      break;
++
++    case new_type_array_id:
++    case new_object_array_id:
++      {
++        Register length   = i19; // Incoming
++        Register klass    = i3; // Incoming
++        Register obj      = i0; // Result
++
++        if (id == new_type_array_id) {
++          __ set_info("new_type_array", dont_gc_arguments);
++        } else {
++          __ set_info("new_object_array", dont_gc_arguments);
++        }
++
++#ifdef ASSERT
++        // assert object type is really an array of the proper kind
++        {
++          Label ok;
++          Register t0 = obj;
++          __ ldrw(t0, Address(klass, Klass::layout_helper_offset()));
++          __ asrw(t0, t0, Klass::_lh_array_tag_shift);
++          int tag = ((id == new_type_array_id)
++                     ? Klass::_lh_array_tag_type_value
++                     : Klass::_lh_array_tag_obj_value);
++          __ mov(rscratch1, tag);
++          __ cmpw(t0, rscratch1);
++          __ br(Assembler::EQ, ok);
++          __ stop("assert(is an array klass)");
++          __ should_not_reach_here();
++          __ BIND(ok);
++        }
++#endif // ASSERT
++
++        // If TLAB is disabled, see if there is support for inlining contiguous
++        // allocations.
++        // Otherwise, just go to the slow path.
++        if (!UseTLAB && Universe::heap()->supports_inline_contig_alloc()) {
++          Register arr_size = i4;
++          Register t1       = i2;
++          Register t2       = i5;
++          Label slow_path;
++          assert_different_registers(length, klass, obj, arr_size, t1, t2);
++
++          // check that array length is small enough for fast path.
++          __ mov(rscratch1, C1_MacroAssembler::max_array_allocation_length);
++          __ cmpw(length, rscratch1);
++          __ br(Assembler::HI, slow_path);
++
++          // get the allocation size: round_up(hdr + length << (layout_helper & 0x1F))
++          // since size is positive ldrw does right thing on 64bit
++          __ ldrw(t1, Address(klass, Klass::layout_helper_offset()));
++          // since size is positive movw does right thing on 64bit
++          __ movw(arr_size, length);
++          __ lslvw(arr_size, length, t1);
++          __ ubfx(t1, t1, Klass::_lh_header_size_shift,
++                  exact_log2(Klass::_lh_header_size_mask + 1));
++          __ add(arr_size, arr_size, t1);
++          __ add(arr_size, arr_size, MinObjAlignmentInBytesMask); // align up
++          __ andr(arr_size, arr_size, ~MinObjAlignmentInBytesMask);
++
++          __ eden_allocate(obj, arr_size, 0, t1, slow_path);  // preserves arr_size
++
++          __ initialize_header(obj, klass, length, t1, t2);
++          __ ldrb(t1, Address(klass, in_bytes(Klass::layout_helper_offset()) + (Klass::_lh_header_size_shift / BitsPerByte)));
++          assert(Klass::_lh_header_size_shift % BitsPerByte == 0, "bytewise");
++          assert(Klass::_lh_header_size_mask <= 0xFF, "bytewise");
++          __ andr(t1, t1, Klass::_lh_header_size_mask);
++          __ sub(arr_size, arr_size, t1);  // body length
++          __ add(t1, t1, obj);       // body start
++          __ initialize_body(t1, arr_size, 0, t2);
++          __ membar(Assembler::StoreStore);
++          __ verify_oop(obj);
++
++          __ ret(lr);
++
++          __ BIND(slow_path);
++        }
++
++        __ enter();
++        OopMap* map = save_live_registers(sasm);
++        int call_offset;
++        if (id == new_type_array_id) {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_type_array), klass, length);
++        } else {
++          call_offset = __ call_RT(obj, noreg, CAST_FROM_FN_PTR(address, new_object_array), klass, length);
++        }
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_r0(sasm);
++
++        __ verify_oop(obj);
++        __ leave();
++        __ ret(lr);
++
++        // i0: new array
++      }
++      break;
++
++    case new_multi_array_id:
++      { StubFrame f(sasm, "new_multi_array", dont_gc_arguments);
++        // i0,: klass
++        // i19,: rank
++        // i2: address of 1st dimension
++        OopMap* map = save_live_registers(sasm);
++        __ mov(c_rarg1, i0);
++        __ mov(c_rarg3, i2);
++        __ mov(c_rarg2, i19);
++        int call_offset = __ call_RT(i0, noreg, CAST_FROM_FN_PTR(address, new_multi_array), i1, i2, i3);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers_except_r0(sasm);
++
++        // i0,: new multi array
++        __ verify_oop(i0);
++      }
++      break;
++
++    case register_finalizer_id:
++      {
++        __ set_info("register_finalizer", dont_gc_arguments);
++
++        // This is called via call_runtime so the arguments
++        // will be place in C abi locations
++
++        __ verify_oop(c_rarg0);
++
++        // load the klass and check the has finalizer flag
++        Label register_finalizer;
++        Register t = i5;
++        __ load_klass(t, i0);
++        __ ldrw(t, Address(t, Klass::access_flags_offset()));
++        __ tbnz(t, exact_log2(JVM_ACC_HAS_FINALIZER), register_finalizer);
++        __ ret(lr);
++
++        __ BIND(register_finalizer);
++        __ enter();
++        OopMap* oop_map = save_live_registers(sasm);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, SharedRuntime::register_finalizer), i0);
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++
++        // Now restore all the live registers
++        restore_live_registers(sasm);
++
++        __ leave();
++        __ ret(lr);
++      }
++      break;
++
++    case throw_class_cast_exception_id:
++      { StubFrame f(sasm, "throw_class_cast_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_class_cast_exception), true);
++      }
++      break;
++
++    case throw_incompatible_class_change_error_id:
++      { StubFrame f(sasm, "throw_incompatible_class_cast_exception", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_incompatible_class_change_error), false);
++      }
++      break;
++
++    case slow_subtype_check_id:
++      {
++        // Typical calling sequence:
++        // __ push(klass_RInfo);  // object klass or other subclass
++        // __ push(sup_k_RInfo);  // array element klass or other superclass
++        // __ bl(slow_subtype_check);
++        // Note that the subclass is pushed first, and is therefore deepest.
++        enum layout {
++          r0_off, r0_off_hi,
++          r2_off, r2_off_hi,
++          r4_off, r4_off_hi,
++          r5_off, r5_off_hi,
++          sup_k_off, sup_k_off_hi,
++          klass_off, klass_off_hi,
++          framesize,
++          result_off = sup_k_off
++        };
++
++        __ set_info("slow_subtype_check", dont_gc_arguments);
++        __ push(RegSet::of(i0, i2, i4, i5), sp);
++
++        // This is called by pushing args and not with C abi
++        // __ ldr(r4, Address(sp, (klass_off) * VMRegImpl::stack_slot_size)); // subclass
++        // __ ldr(r0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size)); // superclass
++
++        __ ldp(i4, i0, Address(sp, (sup_k_off) * VMRegImpl::stack_slot_size));
++
++        Label miss;
++        __ check_klass_subtype_slow_path(i4, i0, i2, i5, NULL, &miss);
++
++        // fallthrough on success:
++        __ mov(rscratch1, 1);
++        __ str(rscratch1, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
++        __ pop(RegSet::of(i0, i2, i4, i5), sp);
++        __ ret(lr);
++
++        __ BIND(miss);
++        __ str(zr, Address(sp, (result_off) * VMRegImpl::stack_slot_size)); // result
++        __ pop(RegSet::of(i0, i2, i4, i5), sp);
++        __ ret(lr);
++      }
++      break;
++
++    case monitorenter_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorenter_id:
++      {
++        StubFrame f(sasm, "monitorenter", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(1, i0); // i0,: object
++        f.load_argument(0, i1); // i1,: lock address
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorenter), i0, i1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case monitorexit_nofpu_id:
++      save_fpu_registers = false;
++      // fall through
++    case monitorexit_id:
++      {
++        StubFrame f(sasm, "monitorexit", dont_gc_arguments);
++        OopMap* map = save_live_registers(sasm, save_fpu_registers);
++
++        // Called with store_parameter and not C abi
++
++        f.load_argument(0, i0); // i0,: lock address
++
++        // note: really a leaf routine but must setup last java sp
++        //       => use call_RT for now (speed can be improved by
++        //       doing last java sp setup manually)
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, monitorexit), i0);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm, save_fpu_registers);
++      }
++      break;
++
++    case deoptimize_id:
++      {
++        StubFrame f(sasm, "deoptimize", dont_gc_arguments);
++        OopMap* oop_map = save_live_registers(sasm);
++        f.load_argument(0, c_rarg1);
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, deoptimize), c_rarg1);
++
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, oop_map);
++        restore_live_registers(sasm);
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++        __ leave();
++        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
++      }
++      break;
++
++    case throw_range_check_failed_id:
++      { StubFrame f(sasm, "range_check_failed", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_range_check_exception), true);
++      }
++      break;
++
++    case unwind_exception_id:
++      { __ set_info("unwind_exception", dont_gc_arguments);
++        // note: no stubframe since we are about to leave the current
++        //       activation and we are calling a leaf VM function only.
++        generate_unwind_exception(sasm);
++      }
++      break;
++
++    case access_field_patching_id:
++      { StubFrame f(sasm, "access_field_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, access_field_patching));
++      }
++      break;
++
++    case load_klass_patching_id:
++      { StubFrame f(sasm, "load_klass_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_klass_patching));
++      }
++      break;
++
++    case load_mirror_patching_id:
++      { StubFrame f(sasm, "load_mirror_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_mirror_patching));
++      }
++      break;
++
++    case load_appendix_patching_id:
++      { StubFrame f(sasm, "load_appendix_patching", dont_gc_arguments);
++        // we should set up register map
++        oop_maps = generate_patching(sasm, CAST_FROM_FN_PTR(address, move_appendix_patching));
++      }
++      break;
++
++    case handle_exception_nofpu_id:
++    case handle_exception_id:
++      { StubFrame f(sasm, "handle_exception", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case handle_exception_from_callee_id:
++      { StubFrame f(sasm, "handle_exception_from_callee", dont_gc_arguments);
++        oop_maps = generate_handle_exception(id, sasm);
++      }
++      break;
++
++    case throw_index_exception_id:
++      { StubFrame f(sasm, "index_range_check_failed", dont_gc_arguments);
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_index_exception), true);
++      }
++      break;
++
++    case throw_array_store_exception_id:
++      { StubFrame f(sasm, "throw_array_store_exception", dont_gc_arguments);
++        // tos + 0: link
++        //     + 1: return address
++        oop_maps = generate_exception_throw(sasm, CAST_FROM_FN_PTR(address, throw_array_store_exception), true);
++      }
++      break;
++
++    case predicate_failed_trap_id:
++      {
++        StubFrame f(sasm, "predicate_failed_trap", dont_gc_arguments);
++
++        OopMap* map = save_live_registers(sasm);
++
++        int call_offset = __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, predicate_failed_trap));
++        oop_maps = new OopMapSet();
++        oop_maps->add_gc_map(call_offset, map);
++        restore_live_registers(sasm);
++        __ leave();
++        DeoptimizationBlob* deopt_blob = SharedRuntime::deopt_blob();
++        assert(deopt_blob != NULL, "deoptimization blob must have been created");
++
++        __ far_jump(RuntimeAddress(deopt_blob->unpack_with_reexecution()));
++      }
++      break;
++
++
++    default:
++      { StubFrame f(sasm, "unimplemented entry", dont_gc_arguments);
++        __ mov(i0, (int)id);
++        __ call_RT(noreg, noreg, CAST_FROM_FN_PTR(address, unimplemented_entry), i0);
++        __ should_not_reach_here();
++      }
++      break;
++    }
++  }
++  return oop_maps;
++}
++
++#undef __
++
++const char *Runtime1::pd_name_for_address(address entry) { Unimplemented(); return 0; }
+diff --git a/src/hotspot/cpu/sw64/c1_globals_sw64.hpp b/src/hotspot/cpu/sw64/c1_globals_sw64.hpp
+new file mode 100644
+index 0000000000..90c7d659cb
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c1_globals_sw64.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C1_GLOBALS_SW64_HPP
++#define CPU_SW64_VM_C1_GLOBALS_SW64_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the client compiler.
++// (see c1_globals.hpp)
++
++#ifndef TIERED
++define_pd_global(bool, BackgroundCompilation,        true );
++define_pd_global(bool, UseTLAB,                      true );
++define_pd_global(bool, ResizeTLAB,                   true );
++define_pd_global(bool, InlineIntrinsics,             true );
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 false);
++define_pd_global(bool, UseOnStackReplacement,        true );
++define_pd_global(bool, TieredCompilation,            false);
++define_pd_global(intx, CompileThreshold,             1500 );
++
++define_pd_global(intx, OnStackReplacePercentage,     933  );
++define_pd_global(intx, FreqInlineSize,               325  );
++define_pd_global(intx, NewSizeThreadIncrease,        4*K  );
++define_pd_global(intx, InitialCodeCacheSize,         160*K);
++define_pd_global(intx, ReservedCodeCacheSize,        32*M );
++define_pd_global(intx, NonProfiledCodeHeapSize,      13*M );
++define_pd_global(intx, ProfiledCodeHeapSize,         14*M );
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M  );
++define_pd_global(bool, ProfileInterpreter,           false);
++define_pd_global(intx, CodeCacheExpansionSize,       32*K );
++define_pd_global(uintx, CodeCacheMinBlockLength,     1);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++define_pd_global(uintx, MetaspaceSize,               12*M );
++define_pd_global(bool, NeverActAsServerClassMachine, true );
++define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
++define_pd_global(bool, CICompileOSR,                 true );
++#endif // !TIERED
++define_pd_global(bool, UseTypeProfile,               false);
++define_pd_global(bool, RoundFPResults,               true );
++
++define_pd_global(bool, LIRFillDelaySlots,            false);
++define_pd_global(bool, OptimizeSinglePrecision,      true );
++define_pd_global(bool, CSEArrayLength,               false);
++define_pd_global(bool, TwoOperandLIRForm,            false );
++
++#endif // CPU_SW64_VM_C1_GLOBALS_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/c2_globals_sw64.hpp b/src/hotspot/cpu/sw64/c2_globals_sw64.hpp
+new file mode 100644
+index 0000000000..1aec3acdec
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c2_globals_sw64.hpp
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_C2_GLOBALS_SW64_HPP
++#define CPU_SW64_VM_C2_GLOBALS_SW64_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the server compiler.
++// (see c2_globals.hpp).  Alpha-sorted.
++
++define_pd_global(bool, BackgroundCompilation,        true);
++define_pd_global(bool, UseTLAB,                      true);
++define_pd_global(bool, ResizeTLAB,                   true);
++define_pd_global(bool, CICompileOSR,                 true);
++define_pd_global(bool, InlineIntrinsics,             true);
++define_pd_global(bool, PreferInterpreterNativeStubs, false);
++define_pd_global(bool, ProfileTraps,                 true);
++define_pd_global(bool, UseOnStackReplacement,        true);
++define_pd_global(bool, ProfileInterpreter,           true);
++define_pd_global(bool, TieredCompilation,            trueInTiered);
++define_pd_global(intx, CompileThreshold,             10000);
++
++define_pd_global(intx, OnStackReplacePercentage,     140);
++define_pd_global(intx, ConditionalMoveLimit,         3);
++define_pd_global(intx, FLOATPRESSURE,                64);
++define_pd_global(intx, FreqInlineSize,               325);
++define_pd_global(intx, MinJumpTableSize,             10);
++define_pd_global(intx, INTPRESSURE,                  22); //TODO check (value 24 will cause compile skiped)
++define_pd_global(intx, InteriorEntryAlignment,       16);
++define_pd_global(intx, NewSizeThreadIncrease, ScaleForWordSize(4*K));
++define_pd_global(intx, LoopUnrollLimit,              60);
++define_pd_global(intx, LoopPercentProfileLimit,      10);
++// InitialCodeCacheSize derived from specjbb2000 run.
++define_pd_global(intx, InitialCodeCacheSize,         2496*K); // Integral multiple of CodeCacheExpansionSize
++define_pd_global(intx, CodeCacheExpansionSize,       64*K);
++
++// Ergonomics related flags
++define_pd_global(uint64_t,MaxRAM,                    128ULL*G);
++define_pd_global(intx, RegisterCostAreaRatio,        16000);
++
++// Peephole and CISC spilling both break the graph, and so makes the
++// scheduler sick.
++define_pd_global(bool, OptoPeephole,                 false);
++define_pd_global(bool, UseCISCSpill,                 false);
++define_pd_global(bool, OptoScheduling,               false);
++define_pd_global(bool, OptoBundling,                 false);
++define_pd_global(bool, OptoRegScheduling,            false);
++define_pd_global(bool, SuperWordLoopUnrollAnalysis,  true);
++define_pd_global(bool, IdealizeClearArrayNode,       true);
++
++define_pd_global(intx, ReservedCodeCacheSize,        240*M);
++define_pd_global(intx, NonProfiledCodeHeapSize,      21*M);
++define_pd_global(intx, ProfiledCodeHeapSize,         22*M);
++define_pd_global(intx, NonNMethodCodeHeapSize,       5*M );
++define_pd_global(uintx, CodeCacheMinBlockLength,     6);
++define_pd_global(uintx, CodeCacheMinimumUseSpace,    400*K);
++
++// Heap related flags
++define_pd_global(uintx,MetaspaceSize,    ScaleForWordSize(16*M));
++
++// Ergonomics related flags
++define_pd_global(bool, NeverActAsServerClassMachine, false);
++
++define_pd_global(bool,  TrapBasedRangeChecks,        false); // Not needed.
++
++#endif // CPU_SW64_VM_C2_GLOBALS_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/c2_init_sw64.cpp b/src/hotspot/cpu/sw64/c2_init_sw64.cpp
+new file mode 100644
+index 0000000000..1f3877cf5e
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/c2_init_sw64.cpp
+@@ -0,0 +1,36 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "opto/compile.hpp"
++#include "opto/node.hpp"
++
++// processor dependent initialization for i486
++
++void Compile::pd_compiler2_init() {
++  guarantee(CodeEntryAlignment >= InteriorEntryAlignment, "" );
++  // QQQ presumably all 64bit cpu's support this. Seems like the ifdef could
++  // simply be left out.
++}
+diff --git a/src/hotspot/cpu/sw64/cas.m4 b/src/hotspot/cpu/sw64/cas.m4
+new file mode 100644
+index 0000000000..2f7b1ff9ee
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/cas.m4
+@@ -0,0 +1,142 @@
++dnl Copyright (c) 2016, Red Hat Inc. All rights reserved.
++dnl DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++dnl
++dnl This code is free software; you can redistribute it and/or modify it
++dnl under the terms of the GNU General Public License version 2 only, as
++dnl published by the Free Software Foundation.
++dnl
++dnl This code is distributed in the hope that it will be useful, but WITHOUT
++dnl ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++dnl FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++dnl version 2 for more details (a copy is included in the LICENSE file that
++dnl accompanied this code).
++dnl
++dnl You should have received a copy of the GNU General Public License version
++dnl 2 along with this work; if not, write to the Free Software Foundation,
++dnl Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++dnl
++dnl Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++dnl or visit www.oracle.com if you need additional information or have any
++dnl questions.
++dnl
++dnl 
++dnl Process this file with m4 cas.m4 to generate the CAE and wCAS
++dnl instructions used in sw64.ad.
++dnl
++
++// BEGIN This section of the file is automatically generated. Do not edit --------------
++
++// Sundry CAS operations.  Note that release is always true,
++// regardless of the memory ordering of the CAS.  This is because we
++// need the volatile case to be sequentially consistent but there is
++// no trailing StoreLoad barrier emitted by C2.  Unfortunately we
++// can't check the type of memory ordering here, so we always emit a
++// STLXR.
++
++// This section is generated from sw64_ad_cas.m4
++
++
++define(`CAS_INSN',
++`
++instruct compareAndExchange$1$5(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
++  match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
++  ifelse($5,Acq,'  predicate(needs_acquiring_load_exclusive(n));
++  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
++  effect(TEMP_DEF res, KILL cr);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++  ins_encode %{
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
++               Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true,
++               /*weak*/ false, $res$$Register);
++  %}
++  ins_pipe(pipe_slow);
++%}')dnl
++define(`CAS_INSN4',
++`
++instruct compareAndExchange$1$7(iReg$2NoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
++  match(Set res (CompareAndExchange$1 mem (Binary oldval newval)));
++  ifelse($7,Acq,'  predicate(needs_acquiring_load_exclusive(n));
++  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
++  effect(TEMP_DEF res, KILL cr);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
++  %}
++  ins_encode %{
++    __ $5(rscratch2, $oldval$$Register);
++    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
++               Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true,
++               /*weak*/ false, $res$$Register);
++    __ $6($res$$Register, $res$$Register);
++  %}
++  ins_pipe(pipe_slow);
++%}')dnl
++CAS_INSN4(B,I,byte,byte,uxtbw,sxtbw)
++CAS_INSN4(S,I,short,halfword,uxthw,sxthw)
++CAS_INSN(I,I,int,word)
++CAS_INSN(L,L,long,xword)
++CAS_INSN(N,N,narrow oop,word)
++CAS_INSN(P,P,ptr,xword)
++dnl
++dnl CAS_INSN4(B,I,byte,byte,uxtbw,sxtbw,Acq)
++dnl CAS_INSN4(S,I,short,halfword,uxthw,sxthw,Acq)
++dnl CAS_INSN(I,I,int,word,Acq)
++dnl CAS_INSN(L,L,long,xword,Acq)
++dnl CAS_INSN(N,N,narrow oop,word,Acq)
++dnl CAS_INSN(P,P,ptr,xword,Acq)
++dnl
++define(`CAS_INSN2',
++`
++instruct weakCompareAndSwap$1$6(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
++  match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
++  ifelse($6,Acq,'  predicate(needs_acquiring_load_exclusive(n));
++  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
++  effect(KILL cr);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
++    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
++  %}
++  ins_encode %{
++    __ uxt$5(rscratch2, $oldval$$Register);
++    __ cmpxchg($mem$$Register, rscratch2, $newval$$Register,
++               Assembler::$4, /*acquire*/ ifelse($6,Acq,true,false), /*release*/ true,
++               /*weak*/ true, noreg);
++    __ csetw($res$$Register, Assembler::EQ);
++  %}
++  ins_pipe(pipe_slow);
++%}')dnl
++define(`CAS_INSN3',
++`
++instruct weakCompareAndSwap$1$5(iRegINoSp res, indirect mem, iReg$2 oldval, iReg$2 newval, rFlagsReg cr) %{
++  match(Set res (WeakCompareAndSwap$1 mem (Binary oldval newval)));
++  ifelse($5,Acq,'  predicate(needs_acquiring_load_exclusive(n));
++  ins_cost(VOLATILE_REF_COST);`,'  ins_cost(2 * VOLATILE_REF_COST);`)
++  effect(KILL cr);
++  format %{
++    "cmpxchg $res = $mem, $oldval, $newval\t# ($3, weak) if $mem == $oldval then $mem <-- $newval"
++    "csetw $res, EQ\t# $res <-- (EQ ? 1 : 0)"
++  %}
++  ins_encode %{
++    __ cmpxchg($mem$$Register, $oldval$$Register, $newval$$Register,
++               Assembler::$4, /*acquire*/ ifelse($5,Acq,true,false), /*release*/ true,
++               /*weak*/ true, noreg);
++    __ csetw($res$$Register, Assembler::EQ);
++  %}
++  ins_pipe(pipe_slow);
++%}')dnl
++CAS_INSN2(B,I,byte,byte,bw)
++CAS_INSN2(S,I,short,halfword,hw)
++CAS_INSN3(I,I,int,word)
++CAS_INSN3(L,L,long,xword)
++CAS_INSN3(N,N,narrow oop,word)
++CAS_INSN3(P,P,ptr,xword)
++dnl CAS_INSN2(B,I,byte,byte,bw,Acq)
++dnl CAS_INSN2(S,I,short,halfword,hw,Acq)
++dnl CAS_INSN3(I,I,int,word,Acq)
++dnl CAS_INSN3(L,L,long,xword,Acq)
++dnl CAS_INSN3(N,N,narrow oop,word,Acq)
++dnl CAS_INSN3(P,P,ptr,xword,Acq)
++dnl
++
++// END This section of the file is automatically generated. Do not edit --------------
+diff --git a/src/hotspot/cpu/sw64/codeBuffer_sw64.hpp b/src/hotspot/cpu/sw64/codeBuffer_sw64.hpp
+new file mode 100644
+index 0000000000..0afd63cca7
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/codeBuffer_sw64.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_CODEBUFFER_SW64_HPP
++#define CPU_SW64_VM_CODEBUFFER_SW64_HPP
++
++private:
++  void pd_initialize() {}
++
++public:
++  void flush_bundle(bool start_new_bundle) {}
++
++#endif // CPU_SW64_VM_CODEBUFFER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/compiledIC_aot_sw64.cpp b/src/hotspot/cpu/sw64/compiledIC_aot_sw64.cpp
+new file mode 100644
+index 0000000000..67ad82e249
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/compiledIC_aot_sw64.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "aot/compiledIC_aot.hpp"
++#include "code/codeCache.hpp"
++#include "memory/resourceArea.hpp"
++
++void CompiledDirectStaticCall::set_to_far(const methodHandle& callee, address entry) {
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_far %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  set_destination_mt_safe(entry);
++}
++
++void CompiledPltStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub();
++  guarantee(stub != NULL, "stub not found");
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledPltStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeLoadGot* method_loader = nativeLoadGot_at(stub);
++  NativeGotJump* jump          = nativeGotJump_at(method_loader->next_instruction_address());
++
++  intptr_t data = method_loader->data();
++  address destination = jump->destination();
++  assert(data == 0 || data == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(destination == (address)Universe::non_oop_word()
++         || destination == entry,
++         "b) MT-unsafe modification of inline cache");
++
++  // Update stub.
++  method_loader->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++#ifdef NEVER_CALLED
++void CompiledPltStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeLoadGot* method_loader = nativeLoadGot_at(stub);
++  NativeGotJump* jump          = nativeGotJump_at(method_loader->next_instruction_address());
++  method_loader->set_data(0);
++  jump->set_jump_destination((address)-1);
++}
++#endif
++
++#ifndef PRODUCT
++void CompiledPltStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++
++#ifdef ASSERT
++  CodeBlob *cb = CodeCache::find_blob_unsafe((address) _call);
++  assert(cb && cb->is_aot(), "CompiledPltStaticCall can only be used on AOTCompiledMethod");
++#endif
++
++  // Verify stub.
++  address stub = find_stub();
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeLoadGot*     method_loader = nativeLoadGot_at(stub);
++  NativeGotJump*     jump          = nativeGotJump_at(method_loader->next_instruction_address());
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/sw64/compiledIC_sw64.cpp b/src/hotspot/cpu/sw64/compiledIC_sw64.cpp
+new file mode 100644
+index 0000000000..3194fbc110
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/compiledIC_sw64.cpp
+@@ -0,0 +1,222 @@
++/*
++ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/codeCache.hpp"
++#include "code/compiledIC.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nmethod.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/safepoint.hpp"
++
++// ----------------------------------------------------------------------------
++
++#define __ _masm.
++address CompiledStaticCall::emit_to_interp_stub(CodeBuffer &cbuf, address mark) {
++  // Stub is fixed up when the corresponding call is converted from
++  // calling compiled code to calling interpreted code.
++  // movq rbx, 0
++  // jmp -5 # to self
++  Register rbx = rmethod;
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // Get mark within main instrs section.
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++  //__ stop("emit_to_interp_stub :not check jzy");
++  address base = __ start_a_stub(to_interp_stub_size());
++  if (base == NULL) {
++    return NULL;  // CodeBuffer::expand failed.
++  }
++  // Static stub relocation stores the instruction address of the call.
++  __ relocate(static_stub_Relocation::spec(mark, false));
++  // Static stub relocation also tags the Method* in the code-stream.
++  //__ movl(rbx, R0);  // Method is zapped till fixup time.
++  __ prepare_patch_li48(rbx, 0);
++
++  // This is recognized as unresolved by relocs/nativeinst/ic code.
++  __ relocate(relocInfo::runtime_call_type);
++  cbuf.set_insts_mark();
++  address call_pc = (address)-1;
++  __ patchable_jump(call_pc);
++  __ align(16);
++ 
++  assert(__ pc() - base <= to_interp_stub_size(), "wrong stub size");
++
++  // Update current stubs pointer and restore insts_end.
++  __ end_a_stub();
++  return base;
++}
++#undef __
++
++int CompiledStaticCall::to_interp_stub_size() {//TODO:check jzy
++  int size = 4 * 4 + NativeCall::instruction_size; // sizeof(prepare_patch_li48) + NativeCall::instruction_size
++  return round_to(size, 16);
++}
++
++int CompiledStaticCall::to_trampoline_stub_size() {//Unimplemented();
++  // x86 doesn't use trampolines.
++  return 0;
++}
++
++// Relocation entries for call stub, compiled java to interpreter.
++int CompiledStaticCall::reloc_to_interp_stub() {//Unimplemented();
++  return 16; // todo:not check jzy
++}
++
++#if INCLUDE_AOT
++#define __ _masm.
++void CompiledStaticCall::emit_to_aot_stub(CodeBuffer &cbuf, address mark) {Unimplemented();
++  if (!UseAOT) {
++    return;
++  }
++  // Stub is fixed up when the corresponding call is converted from
++  // calling compiled code to calling aot code.
++  // movq rax, imm64_aot_code_address
++  // jmp  rax
++
++  if (mark == NULL) {
++    mark = cbuf.insts_mark();  // Get mark within main instrs section.
++  }
++
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a stub.
++  MacroAssembler _masm(&cbuf);
++
++  address base =
++  __ start_a_stub(to_aot_stub_size());
++  guarantee(base != NULL, "out of space");
++
++  // Static stub relocation stores the instruction address of the call.
++  __ relocate(static_stub_Relocation::spec(mark, true /* is_aot */), Assembler::imm_operand);
++  // Load destination AOT code address.
++  __ movl(rscratch1, R0);  // address is zapped till fixup time.
++  // This is recognized as unresolved by relocs/nativeinst/ic code.
++  __ jmp(rscratch1);
++
++  assert(__ pc() - base <= to_aot_stub_size(), "wrong stub size");
++
++  // Update current stubs pointer and restore insts_end.
++  __ end_a_stub();
++}
++#undef __
++
++int CompiledStaticCall::to_aot_stub_size() {Unimplemented();
++  if (UseAOT) {
++    return 12;  //<TODO:jzy ?>
++  } else {
++    return 0;
++  }
++}
++
++// Relocation entries for call stub, compiled java to aot.
++int CompiledStaticCall::reloc_to_aot_stub() {Unimplemented();
++  if (UseAOT) {
++    return 2; // 1 in emit_to_aot_stub + 1 in emit_call
++  } else {
++    return 0;
++  }
++}
++#endif // INCLUDE_AOT
++
++void CompiledDirectStaticCall::set_to_interpreted(const methodHandle& callee, address entry) {
++  address stub = find_stub(false /* is_aot */);
++  guarantee(stub != NULL, "stub not found");
++  if (TraceICs) {
++    ResourceMark rm;
++    tty->print_cr("CompiledDirectStaticCall@" INTPTR_FORMAT ": set_to_interpreted %s",
++                  p2i(instruction_address()),
++                  callee->name_and_sig_as_C_string());
++  }
++
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
++
++#ifdef ASSERT
++  // read the value once
++  volatile intptr_t data = method_holder->data();
++  volatile address destination = jump->jump_destination();
++  assert(data == 0 || data == (intptr_t)callee(),
++         "a) MT-unsafe modification of inline cache");
++  assert(destination == (address)-1 || destination == entry,
++         "b) MT-unsafe modification of inline cache");
++#endif
++
++  // Update stub.
++  method_holder->set_data((intptr_t)callee());
++  jump->set_jump_destination(entry);
++
++  // Update jump to call.
++  set_destination_mt_safe(stub);
++}
++
++void CompiledDirectStaticCall::set_stub_to_clean(static_stub_Relocation* static_stub) {
++  assert (CompiledIC_lock->is_locked() || SafepointSynchronize::is_at_safepoint(), "mt unsafe call");
++  // Reset stub.
++  address stub = static_stub->addr();
++  assert(stub != NULL, "stub not found");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++  method_holder->set_data(0);
++  if (!static_stub->is_aot()) {
++    NativeJump* jump = nativeJump_at(method_holder->next_instruction_address());
++    jump->set_jump_destination((address)-1);
++  }
++}
++
++
++//-----------------------------------------------------------------------------
++// Non-product mode code
++#ifndef PRODUCT
++
++void CompiledDirectStaticCall::verify() {
++  // Verify call.
++  _call->verify();
++  if (os::is_MP()) {
++    _call->verify_alignment();
++  }
++
++#ifdef ASSERT
++  CodeBlob *cb = CodeCache::find_blob_unsafe((address) _call);
++  assert(cb && !cb->is_aot(), "CompiledDirectStaticCall cannot be used on AOTCompiledMethod");
++#endif
++
++  // Verify stub.
++  address stub = find_stub(false /* is_aot */);
++  assert(stub != NULL, "no stub found for static call");
++  // Creation also verifies the object.
++  NativeMovConstReg* method_holder = nativeMovConstReg_at(stub);
++  NativeJump*        jump          = nativeJump_at(method_holder->next_instruction_address());
++
++  // Verify state.
++  assert(is_clean() || is_call_to_compiled() || is_call_to_interpreted(), "sanity check");
++}
++#endif // !PRODUCT
+diff --git a/src/hotspot/cpu/sw64/copy_sw64.hpp b/src/hotspot/cpu/sw64/copy_sw64.hpp
+new file mode 100644
+index 0000000000..2ad545568b
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/copy_sw64.hpp
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2003, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_COPY_SW64_HPP
++#define CPU_SW64_VM_COPY_SW64_HPP
++
++// Inline functions for memory copy and fill.
++
++// Contains inline asm implementations
++#include OS_CPU_HEADER_INLINE(copy)
++
++
++static void pd_fill_to_words(HeapWord* tohw, size_t count, juint value) {
++  julong* to = (julong*) tohw;
++  julong  v  = ((julong) value << 32) | value;
++  while (count-- > 0) {
++    *to++ = v;
++  }
++}
++
++static void pd_fill_to_aligned_words(HeapWord* tohw, size_t count, juint value) {
++  pd_fill_to_words(tohw, count, value);
++}
++
++static void pd_fill_to_bytes(void* to, size_t count, jubyte value) {
++  (void)memset(to, value, count);
++}
++
++static void pd_zero_to_words(HeapWord* tohw, size_t count) {
++  pd_fill_to_words(tohw, count, 0);
++}
++
++static void pd_zero_to_bytes(void* to, size_t count) {
++  (void)memset(to, 0, count);
++}
++
++#endif // CPU_SW64_VM_COPY_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/depChecker_sw64.cpp b/src/hotspot/cpu/sw64/depChecker_sw64.cpp
+new file mode 100644
+index 0000000000..43785e606e
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/depChecker_sw64.cpp
+@@ -0,0 +1,30 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "compiler/disassembler.hpp"
++#include "depChecker_sw64.hpp"
++
++// Nothing to do on sw64
+diff --git a/src/hotspot/cpu/sw64/depChecker_sw64.hpp b/src/hotspot/cpu/sw64/depChecker_sw64.hpp
+new file mode 100644
+index 0000000000..c0afd0ba30
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/depChecker_sw64.hpp
+@@ -0,0 +1,31 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_DEPCHECKER_SW64_HPP
++#define CPU_SW64_VM_DEPCHECKER_SW64_HPP
++
++// Nothing to do on sw64
++
++#endif // CPU_SW64_VM_DEPCHECKER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/disassembler_sw64.hpp b/src/hotspot/cpu/sw64/disassembler_sw64.hpp
+new file mode 100644
+index 0000000000..f4ef0681d5
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/disassembler_sw64.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_DISASSEMBLER_SW64_HPP
++#define CPU_SW64_VM_DISASSEMBLER_SW64_HPP
++
++  static int pd_instruction_alignment() {
++    return sizeof(int);
++  }
++
++  static const char* pd_cpu_opts() {
++    return "sw64only";
++  }
++
++#endif // CPU_SW64_VM_DISASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/frame_sw64.cpp b/src/hotspot/cpu/sw64/frame_sw64.cpp
+new file mode 100644
+index 0000000000..c9858a8667
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/frame_sw64.cpp
+@@ -0,0 +1,808 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/markOop.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/monitorChunk.hpp"
++#include "runtime/os.inline.hpp"
++#include "runtime/signature.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "vmreg_sw64.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#include "runtime/vframeArray.hpp"
++#endif
++
++#ifdef ASSERT
++void RegisterMap::check_location_valid() {
++}
++#endif
++
++// Profiling/safepoint support
++
++bool frame::safe_for_sender(JavaThread *thread) {
++  address   sp = (address)_sp;
++  address   fp = (address)_fp;
++  address   unextended_sp = (address)_unextended_sp;
++
++  // consider stack guards when trying to determine "safe" stack pointers
++  static size_t stack_guard_size = os::uses_stack_guard_pages() ?
++    JavaThread::stack_red_zone_size() + JavaThread::stack_yellow_zone_size() : 0;
++  size_t usable_stack_size = thread->stack_size() - stack_guard_size;
++
++  // sp must be within the usable part of the stack (not in guards)
++  bool sp_safe = (sp < thread->stack_base()) &&
++                 (sp >= thread->stack_base() - usable_stack_size);
++
++
++  if (!sp_safe) {
++    return false;
++  }
++
++  // unextended sp must be within the stack and above or equal sp
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
++                            (unextended_sp >= sp);
++
++  if (!unextended_sp_safe) {
++    return false;
++  }
++
++  // an fp must be within the stack and above (but not equal) sp
++  // second evaluation on fp+ is added to handle situation where fp is -1
++  bool fp_safe = ((fp!=NULL) && fp < thread->stack_base() && (fp >= sp) && (((fp + (return_addr_offset * sizeof(void*))) < thread->stack_base())));
++
++  // We know sp/unextended_sp are safe only fp is questionable here
++
++  // If the current frame is known to the code cache then we can attempt to
++  // to construct the sender and do some validation of it. This goes a long way
++  // toward eliminating issues when we get in frame construction code
++
++  if (_cb != NULL ) {
++
++    // First check if frame is complete and tester is reliable
++    // Unfortunately we can only check frame complete for runtime stubs and nmethod
++    // other generic buffer blobs are more problematic so we just assume they are
++    // ok. adapter blobs never have a frame complete and are never ok.
++
++    if (!_cb->is_frame_complete_at(_pc)) {
++      if (_cb->is_compiled() || _cb->is_adapter_blob() || _cb->is_runtime_stub()) {
++        return false;
++      }
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!_cb->code_contains(_pc)) {
++      return false;
++  }
++
++    // Entry frame checks
++    if (is_entry_frame()) {
++      // an entry frame must have a valid fp.
++      return fp_safe && is_entry_frame_valid(thread);
++    }
++
++    intptr_t* sender_sp = NULL;
++    intptr_t* sender_unextended_sp = NULL;
++    address   sender_pc = NULL;
++    intptr_t* saved_fp =  NULL;
++
++    if (is_interpreted_frame()) {
++      // fp must be safe
++      if (!fp_safe) {
++        return false;
++  }
++
++      sender_pc = (address) this->fp()[return_addr_offset];
++      // for interpreted frames, the value below is the sender "raw" sp,
++      // which can be different from the sender unextended sp (the sp seen
++      // by the sender) because of current frame local variables
++      sender_sp = (intptr_t*) addr_at(sender_sp_offset);
++      sender_unextended_sp = (intptr_t*) this->fp()[interpreter_frame_sender_sp_offset];
++      saved_fp = (intptr_t*) this->fp()[link_offset];
++
++    } else {
++      // must be some sort of compiled/runtime frame
++      // fp does not have to be safe (although it could be check for c1?)
++
++      // check for a valid frame_size, otherwise we are unlikely to get a valid sender_pc
++      if (_cb->frame_size() <= 0) {
++  return false;
++}
++
++      sender_sp = _unextended_sp + _cb->frame_size();
++      // Is sender_sp safe?
++      if ((address)sender_sp >= thread->stack_base()) {
++        return false;
++      }
++      sender_unextended_sp = sender_sp;
++      // On Intel the return_address is always the word on the stack
++      sender_pc = (address) *(sender_sp-1);
++      // Note: frame::sender_sp_offset is only valid for compiled frame
++      saved_fp = (intptr_t*) *(sender_sp - frame::sender_sp_offset);
++    }
++
++
++    // If the potential sender is the interpreter then we can do some more checking
++    if (Interpreter::contains(sender_pc)) {
++
++      // ebp is always saved in a recognizable place in any code we generate. However
++      // only if the sender is interpreted/call_stub (c1 too?) are we certain that the saved ebp
++      // is really a frame pointer.
++
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++  }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      return sender.is_interpreted_frame_valid(thread);
++
++    }
++
++    // We must always be able to find a recognizable pc
++    CodeBlob* sender_blob = CodeCache::find_blob_unsafe(sender_pc);
++    if (sender_pc == NULL ||  sender_blob == NULL) {
++      return false;
++  }
++
++    // Could be a zombie method
++    if (sender_blob->is_zombie() || sender_blob->is_unloaded()) {
++      return false;
++    }
++
++    // Could just be some random pointer within the codeBlob
++    if (!sender_blob->code_contains(sender_pc)) {
++      return false;
++    }
++
++    // We should never be able to see an adapter if the current frame is something from code cache
++    if (sender_blob->is_adapter_blob()) {
++      return false;
++    }
++
++    // Could be the call_stub
++    if (StubRoutines::returns_to_call_stub(sender_pc)) {
++      bool saved_fp_safe = ((address)saved_fp < thread->stack_base()) && (saved_fp > sender_sp);
++
++      if (!saved_fp_safe) {
++        return false;
++      }
++
++      // construct the potential sender
++
++      frame sender(sender_sp, sender_unextended_sp, saved_fp, sender_pc);
++
++      // Validate the JavaCallWrapper an entry frame must have
++      address jcw = (address)sender.entry_frame_call_wrapper();
++
++      bool jcw_safe = (jcw < thread->stack_base()) && (jcw > (address)sender.fp());
++
++      return jcw_safe;
++    }
++
++    CompiledMethod* nm = sender_blob->as_compiled_method_or_null();
++    if (nm != NULL) {
++        if (nm->is_deopt_mh_entry(sender_pc) || nm->is_deopt_entry(sender_pc) ||
++            nm->method()->is_method_handle_intrinsic()) {
++            return false;
++        }
++    }
++
++    // If the frame size is 0 something (or less) is bad because every nmethod has a non-zero frame size
++    // because the return address counts against the callee's frame.
++
++    if (sender_blob->frame_size() <= 0) {
++      assert(!sender_blob->is_compiled(), "should count return address at least");
++      return false;
++    }
++
++    // We should never be able to see anything here except an nmethod. If something in the
++    // code cache (current frame) is called by an entity within the code cache that entity
++    // should not be anything but the call stub (already covered), the interpreter (already covered)
++    // or an nmethod.
++
++    if (!sender_blob->is_compiled()) {
++        return false;
++    }
++
++    // Could put some more validation for the potential non-interpreted sender
++    // frame we'd create by calling sender if I could think of any. Wait for next crash in forte...
++
++    // One idea is seeing if the sender_pc we have is one that we'd expect to call to current cb
++
++    // We've validated the potential sender that would be created
++    return true;
++  }
++
++  // Must be native-compiled frame. Since sender will try and use fp to find
++  // linkages it must be safe
++
++  if (!fp_safe) {
++    return false;
++  }
++
++  // Will the pc we fetch be non-zero (which we'll find at the oldest frame)
++
++  if ( (address) this->fp()[return_addr_offset] == NULL) return false;
++
++
++  // could try and do some more potential verification of native frame if we could think of some...
++
++  return true;
++
++}
++
++
++void frame::patch_pc(Thread* thread, address pc) {
++  address* pc_addr = &(((address*) sp())[-1]);
++  if (TracePcPatching) {
++    tty->print_cr("patch_pc at address " INTPTR_FORMAT " [" INTPTR_FORMAT " -> " INTPTR_FORMAT "]",
++                  p2i(pc_addr), p2i(*pc_addr), p2i(pc));
++  }
++  // Either the return address is the original one or we are going to
++  // patch in the same address that's already there.
++  assert(_pc == *pc_addr || pc == *pc_addr, "must be");
++  *pc_addr = pc;
++  _cb = CodeCache::find_blob(pc);
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    assert(original_pc == _pc, "expected original PC to be stored before patching");
++    _deopt_state = is_deoptimized;
++    // leave _pc as is
++  } else {
++    _deopt_state = not_deoptimized;
++    _pc = pc;
++  }
++}
++
++bool frame::is_interpreted_frame() const  {
++  return Interpreter::contains(pc());
++}
++
++int frame::frame_size(RegisterMap* map) const {
++  frame sender = this->sender(map);
++  return sender.sp() - sp();
++}
++
++intptr_t* frame::entry_frame_argument_at(int offset) const {
++  // convert offset to index to deal with tsi
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  // Entry frame's arguments are always in relation to unextended_sp()
++  return &unextended_sp()[index];
++}
++
++// sender_sp
++
++intptr_t* frame::interpreter_frame_sender_sp() const {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  return (intptr_t*) at(interpreter_frame_sender_sp_offset);
++}
++
++void frame::set_interpreter_frame_sender_sp(intptr_t* sender_sp) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  ptr_at_put(interpreter_frame_sender_sp_offset, (intptr_t) sender_sp);
++}
++
++
++// monitor elements
++
++BasicObjectLock* frame::interpreter_frame_monitor_begin() const {
++  return (BasicObjectLock*) addr_at(interpreter_frame_monitor_block_bottom_offset);
++}
++
++BasicObjectLock* frame::interpreter_frame_monitor_end() const {
++  BasicObjectLock* result = (BasicObjectLock*) *addr_at(interpreter_frame_monitor_block_top_offset);
++  // make sure the pointer points inside the frame
++  assert(sp() <= (intptr_t*) result, "monitor end should be above the stack pointer");
++  assert((intptr_t*) result < fp(),  "monitor end should be strictly below the frame pointer");
++  return result;
++}
++
++void frame::interpreter_frame_set_monitor_end(BasicObjectLock* value) {
++  *((BasicObjectLock**)addr_at(interpreter_frame_monitor_block_top_offset)) = value;
++}
++
++// Used by template based interpreter deoptimization
++void frame::interpreter_frame_set_last_sp(intptr_t* sp) {
++  *((intptr_t**)addr_at(interpreter_frame_last_sp_offset)) = sp;
++}
++
++frame frame::sender_for_entry_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++  // Java frame called from C; skip all C frames and return top C
++  // frame of that chunk as the sender
++  JavaFrameAnchor* jfa = entry_frame_call_wrapper()->anchor();
++  assert(!entry_frame_is_first(), "next Java fp must be non zero");
++  assert(jfa->last_Java_sp() > sp(), "must be above this frame on stack");
++  // Since we are walking the stack now this nested anchor is obviously walkable
++  // even if it wasn't when it was stacked.
++  if (!jfa->walkable()) {
++    // Capture _last_Java_pc (if needed) and mark anchor walkable.
++    jfa->capture_last_Java_pc();
++  }
++  map->clear();
++  assert(map->include_argument_oops(), "should be set by clear");
++  vmassert(jfa->last_Java_pc() != NULL, "not walkable");
++    frame fr(jfa->last_Java_sp(), jfa->last_Java_fp(), jfa->last_Java_pc());
++    return fr;
++  }
++
++//------------------------------------------------------------------------------
++// frame::verify_deopt_original_pc
++//
++// Verifies the calculated original PC of a deoptimization PC for the
++// given unextended SP.
++#ifdef ASSERT
++void frame::verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp) {
++  frame fr;
++
++  // This is ugly but it's better than to change {get,set}_original_pc
++  // to take an SP value as argument.  And it's only a debugging
++  // method anyway.
++  fr._unextended_sp = unextended_sp;
++
++  address original_pc = nm->get_original_pc(&fr);
++  assert(nm->insts_contains_inclusive(original_pc),
++         "original PC must be in the main code section of the the compiled method (or must be immediately following it)");
++}
++#endif
++
++//------------------------------------------------------------------------------
++// frame::adjust_unextended_sp
++#ifdef ASSERT
++void frame::adjust_unextended_sp() {
++  // On sw64, sites calling method handle intrinsics and lambda forms are treated
++  // as any other call site. Therefore, no special action is needed when we are
++  // returning to any of these call sites.
++
++  if (_cb != NULL) {
++    CompiledMethod* sender_cm = _cb->as_compiled_method_or_null();
++    if (sender_cm != NULL) {
++      // If the sender PC is a deoptimization point, get the original PC.
++      if (sender_cm->is_deopt_entry(_pc) ||
++          sender_cm->is_deopt_mh_entry(_pc)) {
++        verify_deopt_original_pc(sender_cm, _unextended_sp);
++      }
++    }
++  }
++}
++#endif
++
++//------------------------------------------------------------------------------
++// frame::update_map_with_saved_link
++void frame::update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr) {
++  // The interpreter and compiler(s) always save EBP/RBP in a known
++  // location on entry. We must record where that location is
++  // so this if EBP/RBP was live on callout from c2 we can find
++  // the saved copy no matter what it called.
++
++  // Since the interpreter always saves EBP/RBP if we record where it is then
++  // we don't have to always save EBP/RBP on entry and exit to c2 compiled
++  // code, on entry will be enough.
++  map->set_location(rfp->as_VMReg(), (address) link_addr);
++
++  // this is weird "H" ought to be at a higher address however the
++  // oopMaps seems to have the "H" regs at the same address and the
++  // vanilla register.
++  // XXXX make this go away
++  if (true) {
++    map->set_location(rfp->as_VMReg()->next(), (address) link_addr);
++  }
++
++}
++
++
++//------------------------------------------------------------------------------
++// frame::sender_for_interpreter_frame
++frame frame::sender_for_interpreter_frame(RegisterMap* map) const {
++  // SP is the raw SP from the sender after adapter or interpreter
++  // extension.
++  intptr_t* sender_sp = this->sender_sp();
++
++  // This is the sp before any possible extension (adapter/locals).
++  intptr_t* unextended_sp = interpreter_frame_sender_sp();
++
++#if COMPILER2_OR_JVMCI
++  if (map->update_map()) {
++    update_map_with_saved_link(map, (intptr_t**) addr_at(link_offset));
++  }
++#endif // COMPILER2_OR_JVMCI
++
++  return frame(sender_sp, unextended_sp, link(), sender_pc());
++}
++
++
++//------------------------------------------------------------------------------
++// frame::sender_for_compiled_frame
++frame frame::sender_for_compiled_frame(RegisterMap* map) const {
++  assert(map != NULL, "map must be set");
++
++  // frame owned by optimizing compiler
++  assert(_cb->frame_size() >= 0, "must have non-zero frame size");
++  intptr_t* sender_sp = unextended_sp() + _cb->frame_size();
++  intptr_t* unextended_sp = sender_sp;
++
++  // On Intel the return_address is always the word on the stack
++  address sender_pc = (address) *(sender_sp-1);
++
++  // This is the saved value of EBP which may or may not really be an FP.
++  // It is only an FP if the sender is an interpreter frame (or C1?).
++  intptr_t** saved_fp_addr = (intptr_t**) (sender_sp - frame::sender_sp_offset);
++
++  if (map->update_map()) {
++    // Tell GC to use argument oopmaps for some runtime stubs that need it.
++    // For C1, the runtime stub might not have oop maps, so set this flag
++    // outside of update_register_map.
++    map->set_include_argument_oops(_cb->caller_must_gc_arguments(map->thread()));
++    if (_cb->oop_maps() != NULL) {
++      OopMapSet::update_register_map(this, map);
++    }
++
++    // Since the prolog does the save and restore of EBP there is no oopmap
++    // for it so we must fill in its location as if there was an oopmap entry
++    // since if our caller was compiled code there could be live jvm state in it.
++    update_map_with_saved_link(map, saved_fp_addr);
++  }
++
++  assert(sender_sp != sp(), "must have changed");
++  return frame(sender_sp, unextended_sp, *saved_fp_addr, sender_pc);
++}
++
++
++//------------------------------------------------------------------------------
++// frame::sender
++frame frame::sender(RegisterMap* map) const {
++  // Default is we done have to follow them. The sender_for_xxx will
++  // update it accordingly
++  map->set_include_argument_oops(false);
++
++  if (is_entry_frame())       return sender_for_entry_frame(map);
++  if (is_interpreted_frame()) return sender_for_interpreter_frame(map);
++  assert(_cb == CodeCache::find_blob(pc()),"Must be the same");
++
++  if (_cb != NULL) {
++    return sender_for_compiled_frame(map);
++  }
++  // Must be native-compiled frame, i.e. the marshaling code for native
++  // methods that exists in the core system.
++  return frame(sender_sp(), link(), sender_pc());
++}
++
++bool frame::is_interpreted_frame_valid(JavaThread* thread) const {
++  assert(is_interpreted_frame(), "Not an interpreted frame");
++  // These are reasonable sanity checks
++  if (fp() == 0 || (intptr_t(fp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (sp() == 0 || (intptr_t(sp()) & (wordSize-1)) != 0) {
++    return false;
++  }
++  if (fp() + interpreter_frame_initial_sp_offset < sp()) {
++    return false;
++  }
++  // These are hacks to keep us out of trouble.
++  // The problem with these is that they mask other problems
++  if (fp() <= sp()) {        // this attempts to deal with unsigned comparison above
++    return false;
++  }
++
++  // do some validation of frame elements
++  // first the method
++
++  Method* m = safe_interpreter_frame_method();
++
++  // validate the method we'd find in this potential sender
++  if (!Method::is_valid_method(m)) return false;
++
++  // stack frames shouldn't be much larger than max_stack elements
++  // this test requires the use the unextended_sp which is the sp as seen by
++  // the current frame, and not sp which is the "raw" pc which could point
++  // further because of local variables of the callee method inserted after
++  // method arguments
++  if (fp() - unextended_sp() > 1024 + m->max_stack()*Interpreter::stackElementSize) {
++    return false;
++  }
++
++  // validate bci/bcp
++
++  address bcp = interpreter_frame_bcp();
++  if (m->validate_bci_from_bcp(bcp) < 0) {
++    return false;
++  }
++
++  // validate ConstantPoolCache*
++  ConstantPoolCache* cp = *interpreter_frame_cache_addr();
++  if (MetaspaceObj::is_valid(cp) == false) return false;
++
++  // validate locals
++
++  address locals =  (address) *interpreter_frame_locals_addr();
++
++  if (locals > thread->stack_base() || locals < (address) fp()) return false;
++
++  // We'd have to be pretty unlucky to be mislead at this point
++  return true;
++}
++
++BasicType frame::interpreter_frame_result(oop* oop_result, jvalue* value_result) {
++  assert(is_interpreted_frame(), "interpreted frame expected");
++  Method* method = interpreter_frame_method();
++  BasicType type = method->result_type();
++
++  intptr_t* tos_addr;
++  if (method->is_native()) {
++    // Prior to calling into the runtime to report the method_exit the possible
++    // return value is pushed to the native stack. If the result is a jfloat/jdouble
++    // then ST0 is saved before EAX/EDX. See the note in generate_native_result
++    tos_addr = (intptr_t*)sp();
++    if (type == T_FLOAT || type == T_DOUBLE) {
++    // QQQ seems like this code is equivalent on the two platforms
++      // This is times two because we do a push(ltos) after pushing XMM0
++      // and that takes two interpreter stack slots.
++      tos_addr += 2 * Interpreter::stackElementWords;
++    }
++  } else {
++    tos_addr = (intptr_t*)interpreter_frame_tos_address();
++  }
++
++  switch (type) {
++    case T_OBJECT  :
++    case T_ARRAY   : {
++      oop obj;
++      if (method->is_native()) {
++        obj = cast_to_oop(at(interpreter_frame_oop_temp_offset));
++      } else {
++        oop* obj_p = (oop*)tos_addr;
++        obj = (obj_p == NULL) ? (oop)NULL : *obj_p;
++      }
++      assert(obj == NULL || Universe::heap()->is_in(obj), "sanity check");
++      *oop_result = obj;
++      break;
++    }
++    case T_BOOLEAN : value_result->z = *(jboolean*)tos_addr; break;
++    case T_BYTE    : value_result->b = *(jbyte*)tos_addr; break;
++    case T_CHAR    : value_result->c = *(jchar*)tos_addr; break;
++    case T_SHORT   : value_result->s = *(jshort*)tos_addr; break;
++    case T_INT     : value_result->i = *(jint*)tos_addr; break;
++    case T_LONG    : value_result->j = *(jlong*)tos_addr; break;
++    case T_FLOAT   : value_result->f = *(jfloat*)tos_addr; break;
++// yj not sure
++//    case T_FLOAT   : {
++//      if (method->is_native()) {
++//        jdouble d = *(jdouble*)tos_addr;  // Result was in ST0 so need to convert to jfloat
++//        value_result->f = (jfloat)d;
++//      } else {
++//        value_result->f = *(jfloat*)tos_addr;
++//      }
++//      break;
++//    }    
++    case T_DOUBLE  : value_result->d = *(jdouble*)tos_addr; break;
++    case T_VOID    : /* Nothing to do */ break;
++    default        : ShouldNotReachHere();
++  }
++
++  return type;
++}
++
++
++intptr_t* frame::interpreter_frame_tos_at(jint offset) const {
++  int index = (Interpreter::expr_offset_in_bytes(offset)/wordSize);
++  return &interpreter_frame_tos_address()[index];
++}
++
++#ifndef PRODUCT
++
++#define DESCRIBE_FP_OFFSET(name) \
++  values.describe(frame_no, fp() + frame::name##_offset, #name)
++
++void frame::describe_pd(FrameValues& values, int frame_no) {
++  if (is_interpreted_frame()) {
++    DESCRIBE_FP_OFFSET(interpreter_frame_sender_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_last_sp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_method);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mirror);
++    DESCRIBE_FP_OFFSET(interpreter_frame_mdp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_cache);
++    DESCRIBE_FP_OFFSET(interpreter_frame_locals);
++    DESCRIBE_FP_OFFSET(interpreter_frame_bcp);
++    DESCRIBE_FP_OFFSET(interpreter_frame_initial_sp);
++  } else if (is_entry_frame()) {
++    // This could be more descriptive if we use the enum in
++    // stubGenerator to map to real names but it's most important to
++    // claim these frame slots so the error checking works.
++    for (int i = 0; i < entry_frame_after_call_words; i++) {
++      values.describe(frame_no, fp() - i, err_msg("call_stub word fp - %d", i));
++    }
++  }
++}
++#endif // !PRODUCT
++
++intptr_t *frame::initial_deoptimization_info() {
++  // used to reset the saved FP
++  return fp();
++}
++
++intptr_t* frame::real_fp() const {
++  if (_cb != NULL) {
++    // use the frame size if valid
++    int size = _cb->frame_size();
++    if (size > 0) {
++      return unextended_sp() + size;
++    }
++  }
++  // else rely on fp()
++  assert(! is_compiled_frame(), "unknown compiled frame size");
++  return fp();
++}
++
++void frame::init(intptr_t* sp, intptr_t* fp, address pc) {
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  if (sp == NULL && _cb != NULL) {
++    sp = fp - _cb->frame_size() + 2;
++  }
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = pc;
++
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++
++  _is_c_frame = false;
++  _sender_fp_for_c_frame = NULL;
++  _sender_address_for_c_frame = NULL;
++  init_sender_for_c_frame();
++}
++// if this C frame is not leaf function,will be initial sender sf/fp at here
++// if not should initial sender sf/fp with context
++void frame::init_sender_for_c_frame() {
++  if (is_java_frame() ||
++      is_native_frame() ||
++      is_runtime_frame() ||
++      is_stub_frame()) {
++    _is_c_frame = false;
++    return;
++  }
++  _is_c_frame = true;
++  if (_fp == NULL) return;
++  bool stop_flag = false;
++  address pinsn = _pc ;
++  while ((_sender_fp_for_c_frame == NULL || _sender_address_for_c_frame == NULL) && (*((int *) pinsn)) && !stop_flag) {
++    int insn = *((int *) pinsn);
++    if (_sender_fp_for_c_frame == NULL && (insn & 0xffff0000) == 0xadfe0000) { // stl	fp,yy(sp)
++      int yy = (insn & 0x0000ffff) / 8;
++      _sender_fp_for_c_frame = (intptr_t *) (*(_fp + yy));
++    } else if ( _sender_address_for_c_frame == NULL && (insn & 0xffff0000) == 0xaf5e0000) { // stl	ra,xx(sp)
++      int xx = (insn & 0x0000ffff) / 8;
++      _sender_address_for_c_frame = (address) (*(_fp + xx));
++    } else if ((insn & 0xffff0000) == 0xffbb0000){ // ldih   gp,zz(t12)
++      stop_flag = true;
++    }
++    pinsn -= 4;
++  // scan function to _pc
++  }
++//}
++}
++
++void frame::init_sender_for_c_frame(address f_start_pc) {
++    do {
++        int insn = *((int *) f_start_pc);
++        if ( _sender_address_for_c_frame == NULL && (insn & 0xffff0000) == 0xaf5e0000) { // stl	ra,xx(sp)
++            int xx = (insn & 0x0000ffff) / 8;
++            _sender_address_for_c_frame = (address) (*(_sp + xx));
++        } else if (_sender_fp_for_c_frame == NULL && (insn & 0xffff0000) == 0xadfe0000) { // stl	fp,yy(sp)
++            int yy = (insn & 0x0000ffff) / 8;
++            _sender_fp_for_c_frame = (intptr_t *) (*(_sp + yy));
++        }
++        f_start_pc += 4;
++        // scan function to _pc
++    } while ((_sender_fp_for_c_frame == NULL || _sender_address_for_c_frame == NULL) && (*((int *) f_start_pc)));
++}
++
++// when thread stop before stl ra at stack
++void frame::fixRa(const void* ucVoid) {
++    if (!_is_c_frame) return;
++    if (_sender_address_for_c_frame != NULL) {
++        return;
++    } else {
++        const ucontext_t *uc = (const ucontext_t *) ucVoid;
++        if (uc != NULL) {
++            _sender_address_for_c_frame = os::ucontext_get_ra(uc);
++        } else {
++            _sender_address_for_c_frame = NULL;
++        }
++    }
++}
++
++intptr_t* frame::sender_sp() const {
++  if (_is_c_frame) {
++    return _sender_fp_for_c_frame;// for sw C frame, sp is always the same as fp
++  } else {
++    return addr_at(sender_sp_offset);
++  }
++}
++
++intptr_t* frame::link() const {
++  if (_is_c_frame)
++    return _sender_fp_for_c_frame;
++  else
++    return (intptr_t*) *(intptr_t **)addr_at(link_offset);
++}
++
++address  frame::sender_pc() const {
++  if (_is_c_frame)
++    return _sender_address_for_c_frame;
++  else {
++    return *sender_pc_addr();
++  }
++}
++
++#ifndef PRODUCT
++// This is a generic constructor which is only used by pns() in debug.cpp.
++frame::frame(void* sp, void* fp, void* pc) {
++  init((intptr_t*)sp, (intptr_t*)fp, (address)pc);
++}
++
++void frame::pd_ps() {}
++#endif
++
++void JavaFrameAnchor::make_walkable(JavaThread* thread) {
++  //tty->print_cr("to check here: yj");
++  // last frame set?
++  if (last_Java_sp() == NULL) return;
++  // already walkable?
++  if (walkable()) return;//assert(false, "to check here: yj");
++  vmassert(Thread::current() == (Thread*)thread, "not current thread");
++  vmassert(last_Java_sp() != NULL, "not called from Java code?");
++  vmassert(last_Java_pc() == NULL, "already walkable");
++  capture_last_Java_pc();
++  vmassert(walkable(), "something went wrong");
++}
++
++void JavaFrameAnchor::capture_last_Java_pc() {
++  vmassert(_last_Java_sp != NULL, "no last frame set");
++  vmassert(_last_Java_pc == NULL, "already walkable");
++  _last_Java_pc = (address)_last_Java_sp[-1];
++}
+diff --git a/src/hotspot/cpu/sw64/frame_sw64.hpp b/src/hotspot/cpu/sw64/frame_sw64.hpp
+new file mode 100644
+index 0000000000..1fbe54026d
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/frame_sw64.hpp
+@@ -0,0 +1,160 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_FRAME_SW64_HPP
++#define CPU_SW64_VM_FRAME_SW64_HPP
++
++#include "runtime/synchronizer.hpp"
++
++// A frame represents a physical stack frame (an activation).  Frames can be
++// C or Java frames, and the Java frames can be interpreted or compiled.
++// In contrast, vframes represent source-level activations, so that one physical frame
++// can correspond to multiple source level frames because of inlining.
++// A frame is comprised of {pc, fp, sp}
++// ------------------------------ Asm interpreter ----------------------------------------
++// Layout of asm interpreter frame:
++//    [expression stack      ] * <- sp
++//    [monitors              ]   \
++//     ...                        | monitor block size
++//    [monitors              ]   /
++//    [monitor block size    ]
++//    [byte code pointer     ]                   = bcp()                bcp_offset
++//    [pointer to locals     ]                   = locals()             locals_offset
++//    [constant pool cache   ]                   = cache()              cache_offset
++//    [methodData            ]                   = mdp()                mdx_offset
++//    [Method*               ]                   = method()             method_offset
++//    [last sp               ]                   = last_sp()            last_sp_offset
++//    [old stack pointer     ]                     (sender_sp)          sender_sp_offset
++//    [old frame pointer     ]   <- fp           = link()
++//    [return pc             ]
++//    [oop temp              ]                     (only for native calls)
++//    [locals and parameters ]
++//                               <- sender sp
++// ------------------------------ Asm interpreter ----------------------------------------
++
++ public:
++  enum {
++    pc_return_offset                                 =  0,
++    // All frames
++    link_offset                                      =  0,
++    return_addr_offset                               =  1,
++    // non-interpreter frames
++    sender_sp_offset                                 =  2,
++
++    // Interpreter frames
++    interpreter_frame_result_handler_offset          =  3, // for native calls only
++    interpreter_frame_oop_temp_offset                =  2, // for native calls only
++
++    interpreter_frame_sender_sp_offset               = -1,
++    // outgoing sp before a call to an invoked method
++    interpreter_frame_last_sp_offset                 = interpreter_frame_sender_sp_offset - 1,
++    interpreter_frame_method_offset                  = interpreter_frame_last_sp_offset - 1,
++    interpreter_frame_mirror_offset                  = interpreter_frame_method_offset - 1,
++    interpreter_frame_mdp_offset                     = interpreter_frame_mirror_offset - 1,
++    interpreter_frame_cache_offset                   = interpreter_frame_mdp_offset - 1,
++    interpreter_frame_locals_offset                  = interpreter_frame_cache_offset - 1,
++    interpreter_frame_bcp_offset                     = interpreter_frame_locals_offset - 1,
++    interpreter_frame_initial_sp_offset              = interpreter_frame_bcp_offset - 1,
++
++    interpreter_frame_monitor_block_top_offset       = interpreter_frame_initial_sp_offset,
++    interpreter_frame_monitor_block_bottom_offset    = interpreter_frame_initial_sp_offset,
++
++    // Entry frames
++    // n.b. these values are determined by the layout defined in
++    // stubGenerator for the Java call stub
++    entry_frame_after_call_words                     = 21,
++    entry_frame_call_wrapper_offset                  = -6, //generate_call_stub's call_wrapper_off
++
++    arg_reg_save_area_bytes                          =  0
++  };
++
++  intptr_t ptr_at(int offset) const {
++    return *ptr_at_addr(offset);
++  }
++
++  void ptr_at_put(int offset, intptr_t value) {
++    *ptr_at_addr(offset) = value;
++  }
++
++ private:
++  // an additional field beyond _sp and _pc:
++  intptr_t*   _fp; // frame pointer
++
++  bool          _is_c_frame;
++  intptr_t*     _sender_fp_for_c_frame;
++  address       _sender_address_for_c_frame;
++  void          init_sender_for_c_frame();
++
++  // The interpreter and adapters will extend the frame of the caller.
++  // Since oopMaps are based on the sp of the caller before extension
++  // we need to know that value. However in order to compute the address
++  // of the return address we need the real "raw" sp. Since sparc already
++  // uses sp() to mean "raw" sp and unextended_sp() to mean the caller's
++  // original sp we use that convention.
++
++  intptr_t*     _unextended_sp;
++  void adjust_unextended_sp() NOT_DEBUG_RETURN;
++
++  intptr_t* ptr_at_addr(int offset) const {
++    return (intptr_t*) addr_at(offset);
++  }
++
++#ifdef ASSERT
++  // Used in frame::sender_for_{interpreter,compiled}_frame
++  static void verify_deopt_original_pc(CompiledMethod* nm, intptr_t* unextended_sp);
++#endif
++
++ public:
++  // Constructors
++
++  frame(intptr_t* sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc);
++
++  frame(intptr_t* sp, intptr_t* fp);
++
++  void init(intptr_t* sp, intptr_t* fp, address pc);
++
++  // accessors for the instance variables
++  // Note: not necessarily the real 'frame pointer' (see real_fp)
++  intptr_t*   fp() const { return _fp; }
++
++  inline address* sender_pc_addr() const;
++
++  // expression stack tos if we are nested in a java call
++  intptr_t* interpreter_frame_last_sp() const;
++
++  // helper to update a map with callee-saved RBP
++  static void update_map_with_saved_link(RegisterMap* map, intptr_t** link_addr);
++
++  // deoptimization support
++  void interpreter_frame_set_last_sp(intptr_t* sp);
++
++  static jint interpreter_frame_expression_stack_direction() { return -1; }
++
++  void fixRa(const void* ucVoid);
++
++  void init_sender_for_c_frame(address f_start_pc);
++
++#endif // CPU_SW64_VM_FRAME_SW64_HPP
+\ No newline at end of file
+diff --git a/src/hotspot/cpu/sw64/frame_sw64.inline.hpp b/src/hotspot/cpu/sw64/frame_sw64.inline.hpp
+new file mode 100644
+index 0000000000..73946da15d
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/frame_sw64.inline.hpp
+@@ -0,0 +1,229 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_FRAME_SW64_INLINE_HPP
++#define CPU_SW64_VM_FRAME_SW64_INLINE_HPP
++
++#include "code/codeCache.hpp"
++#include "code/vmreg.inline.hpp"
++
++// Inline functions for Sw64 frames:
++
++// Constructors:
++
++inline frame::frame() {
++  _pc = NULL;
++  _sp = NULL;
++  _unextended_sp = NULL;
++  _fp = NULL;
++  _cb = NULL;
++  _deopt_state = unknown;
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp, address pc) {
++  init(sp, fp, pc);
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* unextended_sp, intptr_t* fp, address pc) {
++  _sp = sp;
++  _unextended_sp = unextended_sp;
++  _fp = fp;
++  _pc = pc;
++  assert(pc != NULL, "no pc?");
++  _cb = CodeCache::find_blob(pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    if (_cb->is_deoptimization_stub()) {
++      _deopt_state = is_deoptimized;
++    } else {
++      _deopt_state = not_deoptimized;
++    }
++  }
++  _is_c_frame = false;
++}
++
++inline frame::frame(intptr_t* sp, intptr_t* fp) {
++  _sp = sp;
++  _unextended_sp = sp;
++  _fp = fp;
++  _pc = (address)(sp[-1]);
++  Unimplemented();  //ZHJ
++
++  // Here's a sticky one. This constructor can be called via AsyncGetCallTrace
++  // when last_Java_sp is non-null but the pc fetched is junk. If we are truly
++  // unlucky the junk value could be to a zombied method and we'll die on the
++  // find_blob call. This is also why we can have no asserts on the validity
++  // of the pc we find here. AsyncGetCallTrace -> pd_get_top_frame_for_signal_handler
++  // -> pd_last_frame should use a specialized version of pd_last_frame which could
++  // call a specilaized frame constructor instead of this one.
++  // Then we could use the assert below. However this assert is of somewhat dubious
++  // value.
++  // assert(_pc != NULL, "no pc?");
++
++  _cb = CodeCache::find_blob(_pc);
++  adjust_unextended_sp();
++
++  address original_pc = CompiledMethod::get_deopt_original_pc(this);
++  if (original_pc != NULL) {
++    _pc = original_pc;
++    _deopt_state = is_deoptimized;
++  } else {
++    _deopt_state = not_deoptimized;
++  }
++}
++
++// Accessors
++
++inline bool frame::equal(frame other) const {
++  bool ret =  sp() == other.sp()
++              && unextended_sp() == other.unextended_sp()
++              && fp() == other.fp()
++              && pc() == other.pc();
++  assert(!ret || ret && cb() == other.cb() && _deopt_state == other._deopt_state, "inconsistent construction");
++  return ret;
++}
++
++// Return unique id for this frame. The id must have a value where we can distinguish
++// identity and younger/older relationship. NULL represents an invalid (incomparable)
++// frame.
++inline intptr_t* frame::id(void) const { return unextended_sp(); }
++
++// Relationals on frames based
++// Return true if the frame is younger (more recent activation) than the frame represented by id
++inline bool frame::is_younger(intptr_t* id) const { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() < id ; }
++
++// Return true if the frame is older (less recent activation) than the frame represented by id
++inline bool frame::is_older(intptr_t* id) const   { assert(this->id() != NULL && id != NULL, "NULL frame id");
++                                                    return this->id() > id ; }
++
++
++
++//inline intptr_t* frame::link() const              { return (intptr_t*) *(intptr_t **)addr_at(link_offset); }
++
++inline intptr_t* frame::link_or_null() const {
++  intptr_t** ptr = (intptr_t **)addr_at(link_offset);
++  return os::is_readable_pointer(ptr) ? *ptr : NULL;
++}
++
++inline intptr_t* frame::unextended_sp() const     { return _unextended_sp; }
++
++// Return address:
++
++inline address* frame::sender_pc_addr()      const { return (address*) addr_at( return_addr_offset); }
++//inline address  frame::sender_pc()           const { return *sender_pc_addr(); }
++//
++//inline intptr_t* frame::sender_sp() const { return addr_at(sender_sp_offset); }
++
++inline intptr_t** frame::interpreter_frame_locals_addr() const {
++  return (intptr_t**)addr_at(interpreter_frame_locals_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_last_sp() const {
++  return *(intptr_t**)addr_at(interpreter_frame_last_sp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_bcp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_bcp_offset);
++}
++
++inline intptr_t* frame::interpreter_frame_mdp_addr() const {
++  return (intptr_t*)addr_at(interpreter_frame_mdp_offset);
++}
++
++
++// Constant pool cache
++
++inline ConstantPoolCache** frame::interpreter_frame_cache_addr() const {
++  return (ConstantPoolCache**)addr_at(interpreter_frame_cache_offset);
++}
++
++// Method
++
++inline Method** frame::interpreter_frame_method_addr() const {
++  return (Method**)addr_at(interpreter_frame_method_offset);
++}
++
++// Mirror
++
++inline oop* frame::interpreter_frame_mirror_addr() const {
++  return (oop*)addr_at(interpreter_frame_mirror_offset);
++}
++
++// top of expression stack
++inline intptr_t* frame::interpreter_frame_tos_address() const {
++  intptr_t* last_sp = interpreter_frame_last_sp();
++  if (last_sp == NULL) {
++    return sp();
++  } else {
++    // sp() may have been extended or shrunk by an adapter.  At least
++    // check that we don't fall behind the legal region.
++    // For top deoptimized frame last_sp == interpreter_frame_monitor_end.
++    assert(last_sp <= (intptr_t*) interpreter_frame_monitor_end(), "bad tos");
++    return last_sp;
++  }
++}
++
++inline oop* frame::interpreter_frame_temp_oop_addr() const {
++  return (oop *)(fp() + interpreter_frame_oop_temp_offset);
++}
++
++inline int frame::interpreter_frame_monitor_size() {
++  return BasicObjectLock::size();
++}
++
++
++// expression stack
++// (the max_stack arguments are used by the GC; see class FrameClosure)
++
++inline intptr_t* frame::interpreter_frame_expression_stack() const {
++  intptr_t* monitor_end = (intptr_t*) interpreter_frame_monitor_end();
++  return monitor_end-1;
++}
++
++
++// Entry frames
++
++inline JavaCallWrapper** frame::entry_frame_call_wrapper_addr() const {
++  return (JavaCallWrapper**)addr_at(entry_frame_call_wrapper_offset);
++}
++
++
++// Compiled frames
++
++inline oop frame::saved_oop_result(RegisterMap* map) const {
++  return *((oop*) map->location(V0->as_VMReg()));
++}
++
++inline void frame::set_saved_oop_result(RegisterMap* map, oop obj) {
++  *((oop*) map->location(V0->as_VMReg())) = obj;
++}
++
++#endif // CPU_SW64_VM_FRAME_SW64_INLINE_HPP
+diff --git a/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.cpp
+new file mode 100755
+index 0000000000..958dbe644e
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.cpp
+@@ -0,0 +1,585 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/g1/g1BarrierSet.hpp"
++#include "gc/g1/g1BarrierSetAssembler.hpp"
++#include "gc/g1/g1BarrierSetRuntime.hpp"
++#include "gc/g1/g1CardTable.hpp"
++#include "gc/g1/g1ThreadLocalData.hpp"
++#include "gc/g1/heapRegion.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/sharedRuntime.hpp"
++#ifdef COMPILER1
++#include "c1/c1_LIRAssembler.hpp"
++#include "c1/c1_MacroAssembler.hpp"
++#include "gc/g1/c1/g1BarrierSetC1.hpp"
++#endif
++
++#define __ masm->
++
++void G1BarrierSetAssembler::gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                            Register addr, Register count) {SCOPEMARK_NAME(G1BarrierSetAssembler::gen_write_ref_array_pre_barrier, masm)
++  bool dest_uninitialized = (decorators & IS_DEST_UNINITIALIZED) != 0;
++//  __ stop("TODO:should check gen_write_ref_array_pre_barrier jzy");
++//    ShouldNotReachHere();
++  if (!dest_uninitialized) {
++    Register thread = rthread;
++
++    Label filtered;
++    Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++    // Is marking active?
++    if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++      __ cmpw(in_progress, 0);
++    } else {
++      assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++      __ cmpb(in_progress, 0);
++    }
++
++    __ jcc(Assembler::equal, filtered);
++
++    __ pushad();                      // push registers
++
++    if (count == c_rarg0) {
++      if (addr == c_rarg1) {
++        // exactly backwards!!
++        __ xchgptr(c_rarg1, c_rarg0);
++      } else {
++        __ movl(c_rarg1, count);
++        __ movl(c_rarg0, addr);
++      }
++    } else {
++      __ movl(c_rarg0, addr);
++      __ movl(c_rarg1, count);
++    }
++    if (UseCompressedOops) {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_narrow_oop_entry), 2);
++    } else {
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_pre_oop_entry), 2);
++    }
++
++    __ popad();
++
++    __ bind(filtered);
++  }
++}
++
++void G1BarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                             Register addr, Register count, Register tmp) {
++  __ pushad();             // push registers (overkill)
++//  __ stop("should check:gen_write_ref_array_post_barrier jzy");
++  if (c_rarg0 == count) { // On win64 c_rarg0 == rcx ?jzy
++    assert_different_registers(c_rarg1, addr);
++    __ movl(c_rarg1, count);
++    __ movl(c_rarg0, addr);
++    } else {
++    assert_different_registers(c_rarg0, count);
++    __ movl(c_rarg0, addr);
++    __ movl(c_rarg1, count);
++    }
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_array_post_entry), 2);
++
++  __ popad();
++}
++
++void G1BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                    Register dst, Address src, Register tmp1, Register tmp_thread) {
++  bool on_oop = type == T_OBJECT || type == T_ARRAY;
++  // __ stop("TODO:check load_at jzy");
++  bool on_weak = (decorators & ON_WEAK_OOP_REF) != 0;
++  bool on_phantom = (decorators & ON_PHANTOM_OOP_REF) != 0;
++  bool on_reference = on_weak || on_phantom;
++  ModRefBarrierSetAssembler::load_at(masm, decorators, type, dst, src, tmp1, tmp_thread);
++  if (on_oop && on_reference) {
++    const Register thread = rthread;
++    __ enter();
++    //__ sys_call(0xabc);
++    //__ br(R0, -1);
++
++    // Generate the G1 pre-barrier code to log the value of
++    // the referent field in an SATB buffer.
++    g1_write_barrier_pre(masm /* masm */,
++                         noreg /* obj */,
++                         dst /* pre_val */,
++                         thread /* thread */,
++                         tmp1 /* tmp */,
++                         true /* tosca_live */,
++                         true /* expand_call */);
++    __ leave();
++  }
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_pre(MacroAssembler* masm,
++                                                 Register obj,
++                                                 Register pre_val,
++                                                 Register thread,
++                                                 Register tmp,
++                                                 bool tosca_live,
++                                                 bool expand_call) {
++  // If expand_call is true then we expand the call_VM_leaf macro
++  // directly to skip generating the check by
++  // InterpreterMacroAssembler::call_VM_leaf_base that checks _last_sp.
++
++  assert(thread == rthread, "must be");
++  const Register rax = V0;
++  Label done;
++  Label runtime;
++  assert(pre_val != noreg, "check this code");
++
++  if (obj != noreg) {
++    assert_different_registers(obj, pre_val, tmp);
++    assert(pre_val != rax, "check this code");
++  }
++
++  Address in_progress(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  // Is marking active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ cmpw(in_progress, 0);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ ldbu(rscratch4, in_progress);
++    __ sextb(rscratch4, rcc);
++//    __ cmpb(in_progress, 0);
++  }
++  __ jcc(Assembler::equal, done);
++
++  // Do we need to load the previous value?
++  if (obj != noreg) {
++    __ load_heap_oop(pre_val, Address(obj, 0), noreg, noreg, AS_RAW);
++  }
++
++  // Is the previous value null?
++  __ cmpptr(pre_val, (int32_t) NULL_WORD);
++  __ jcc(Assembler::equal, done);
++
++  // Can we store original value in the thread's buffer?
++  // Is index == 0?
++  // (The index field is typed as size_t.)
++
++  __ ldptr(tmp, index);                    // tmp := *index_adr
++  __ cmpptr(tmp, 0);                       // tmp == 0?
++  __ jcc(Assembler::equal, runtime);       // If yes, goto runtime//sny beq(tmp, runtime);
++
++  __ subptr(tmp, wordSize, tmp);           // tmp := tmp - wordSize
++  __ stptr(tmp, index);                    // *index_adr := tmp
++  __ addptr(tmp, buffer);                  // tmp := tmp + *buffer_adr     tmp=T5
++
++  // Record the previous value
++  __ stptr(pre_val, Address(tmp, 0));
++  __ jmp(done);
++
++  __ bind(runtime);
++  // save the live input values
++  if(tosca_live) __ push(rax);
++  
++  if (obj != noreg && obj != rax)
++    __ push(obj);
++
++  if (pre_val != rax)
++    __ push(pre_val);
++
++  // Calling the runtime using the regular call_VM_leaf mechanism generates
++  // code (generated by InterpreterMacroAssember::call_VM_leaf_base)
++  // that checks that the *(ebp+frame::interpreter_frame_last_sp) == NULL.
++  //
++  // If we care generating the pre-barrier without a frame (e.g. in the
++  // intrinsified Reference.get() routine) then ebp might be pointing to
++  // the caller frame and so this check will most likely fail at runtime.
++  //
++  // Expanding the call directly bypasses the generation of the check.
++  // So when we do not have have a full interpreter frame on the stack
++  // expand_call should be passed true.
++
++
++  if (expand_call) {
++    assert(pre_val != c_rarg1, "smashed arg");
++
++    if (c_rarg1 != thread) {
++      __ movl(c_rarg1, thread);
++    }
++    if (c_rarg0 != pre_val) {
++      __ movl(c_rarg0, pre_val);
++    }
++
++    __ MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), 2);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), pre_val, thread);
++  }
++
++  // save the live input values
++  if (pre_val != rax)
++    __ pop(pre_val);
++
++  if (obj != noreg && obj != rax)
++    __ pop(obj);
++
++  if(tosca_live) __ pop(rax);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::g1_write_barrier_post(MacroAssembler* masm,
++                                                  Register store_addr,
++                                                  Register new_val,
++                                                  Register thread,
++                                                  Register tmp,
++                                                  Register tmp2) {
++
++  assert(thread == rthread, "must be");
++  assert(tmp  != AT, "must be");
++  assert(tmp2 != AT, "must be");
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  CardTableBarrierSet* ct =
++    barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label runtime;
++
++  // Does store cross heap regions?
++  
++//  __ movl(rscratch4, store_addr);
++  __ xorptr(store_addr, new_val, rscratch4);
++  __ srll(rscratch4, HeapRegion::LogOfHRGrainBytes, rscratch4);
++  __ jcc(Assembler::equal, done, rscratch4);
++
++  // crosses regions, storing NULL?
++
++  __ cmpptr(new_val, (int32_t) NULL_WORD);
++  __ jcc(Assembler::equal, done);
++
++  // storing region crossing non-NULL, is card already dirty?
++
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code"); //dx?
++
++  const Register card_addr = tmp;
++  const Register cardtable = tmp2;
++
++  __ movl(card_addr, store_addr);
++  __ srll(card_addr, CardTable::card_shift, card_addr);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ mov_immediate64(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ addptr(card_addr, cardtable, card_addr);
++  
++  __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
++  __ jcc(Assembler::equal, done);
++
++  assert((int)CardTable::dirty_card_val() == 0, "must be 0"); //dx?
++
++  //__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
++  __ memb();
++  __ cmpb(Address(card_addr, 0), (int)G1CardTable::dirty_card_val());
++  __ jcc(Assembler::equal, done);
++
++
++  // storing a region crossing, non-NULL oop, card is clean.
++  // dirty card and log.
++  __ mov_immediate32(rscratch4, (int)G1CardTable::dirty_card_val());
++  __ stb(rscratch4, Address(card_addr, 0));//movb
++
++  __ ldws(rcc, queue_index);
++  __ beq_l(rcc, runtime);
++  __ jcc(Assembler::equal, runtime);
++  __ subl(rcc, wordSize, rcc);
++  __ stw (rcc, queue_index); //LSP!!
++  __ ldptr(tmp2, buffer);
++  __ ldl(rscratch4, queue_index);//?sny ldw
++  __ addl(tmp2, rscratch4, tmp2);
++  __ stl(card_addr, Address(tmp2, 0));
++
++  __ jmp(done);
++  
++  __ bind(runtime);
++  // save the live input values
++  __ push(store_addr);
++  __ push(new_val);
++
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++
++  __ pop(new_val);
++  __ pop(store_addr);
++
++  __ bind(done);
++}
++
++void G1BarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool as_normal = (decorators & AS_NORMAL) != 0;
++  assert((decorators & IS_DEST_UNINITIALIZED) == 0, "unsupported");
++  // __ stop("TODO:check oop_store_at jzy");
++  bool needs_pre_barrier = as_normal;
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  Register tmp3 = r8; //need different? x86 uses r8
++  Register thread = rthread;
++  assert_different_registers(tmp1, tmp2, tmp3, thread); //need this check? jzy
++  
++  // flatten object address if needed
++  // We do it regardless of precise because we need the registers
++  if (dst.index() == noreg && dst.disp() == 0) {
++    if (dst.base() != tmp1) {
++      __ movl(tmp1, dst.base()); // !
++    }
++  } else {
++    __ lea(tmp1, dst);
++  }
++
++
++  if (needs_pre_barrier) {
++    g1_write_barrier_pre(masm /*masm*/,
++                         tmp1 /* obj */,
++                         tmp2 /* pre_val */,
++                         thread /* thread */,
++                         tmp3  /* tmp */,
++                         val != noreg /* tosca_live */,
++                         false /* expand_call */);
++  }
++  if (val == noreg) {
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
++  } else {
++    Register new_val = val;
++    if (needs_post_barrier) {
++      // G1 barrier needs uncompressed oop for region cross check.
++      if (UseCompressedOops) {
++        new_val = tmp2;
++        __ movl(new_val, val);
++      }
++    }
++    BarrierSetAssembler::store_at(masm, decorators, type, Address(tmp1, 0), val, noreg, noreg);
++    if (needs_post_barrier) {
++      g1_write_barrier_post(masm /*masm*/,
++                            tmp1 /* store_adr */,
++                          new_val /* new_val */,
++                          thread /* thread */,
++                            tmp3 /* tmp */,
++                          tmp2 /* tmp2 */);
++  }
++}
++
++}
++
++#ifdef COMPILER1
++
++#undef __
++#define __ ce->masm()->
++
++void G1BarrierSetAssembler::gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // At this point we know that marking is in progress.
++  // If do_load() is true then we have to emit the
++  // load of the previous value; otherwise it has already
++  // been loaded into _pre_val.
++  // __ stop("TODO:check gen_pre_barrier_stub jzy");
++  __ bind(*stub->entry());
++  assert(stub->pre_val()->is_register(), "Precondition.");
++
++  Register pre_val_reg = stub->pre_val()->as_register();
++
++  if (stub->do_load()) {
++    ce->mem2reg(stub->addr(), stub->pre_val(), T_OBJECT, stub->patch_code(), stub->info(), false /*wide*/, false /*unaligned*/);
++  }
++
++  __ cmpptr(pre_val_reg, (int32_t)NULL_WORD);
++  __ jcc(Assembler::equal, *stub->continuation());
++  ce->store_parameter(stub->pre_val()->as_register(), 0);
++  __ call(RuntimeAddress(bs->pre_barrier_c1_runtime_code_blob()->code_begin()));
++  __ jmp(*stub->continuation());
++
++}
++
++void G1BarrierSetAssembler::gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub) {
++  G1BarrierSetC1* bs = (G1BarrierSetC1*)BarrierSet::barrier_set()->barrier_set_c1();
++  // __ stop("TODO:check gen_post_barrier_stub jzy");
++  __ bind(*stub->entry());
++  assert(stub->addr()->is_register(), "Precondition.");
++  assert(stub->new_val()->is_register(), "Precondition.");
++  Register new_val_reg = stub->new_val()->as_register();
++  __ cmpptr(new_val_reg, (int32_t) NULL_WORD);
++  __ jcc(Assembler::equal, *stub->continuation());
++  ce->store_parameter(stub->addr()->as_pointer_register(), 0);
++  __ call(RuntimeAddress(bs->post_barrier_c1_runtime_code_blob()->code_begin()));
++  __ jmp(*stub->continuation());
++}
++
++#undef __
++
++#define __ sasm->
++
++void G1BarrierSetAssembler::generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_pre_barrier", false);
++  // arg0 : previous value of memory
++  const Register rax = V0;
++  const Register rdx = T0;
++  const Register rcx = T1;
++  // __ stop("TODO:check generate_c1_pre_barrier_runtime_stub jzy");
++  __ push(rax);
++  __ push(rdx);
++
++  const Register pre_val = rax;
++  const Register thread = rthread;
++  const Register tmp = rdx;
++
++
++  Address queue_active(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_active_offset()));
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::satb_mark_queue_buffer_offset()));
++
++  Label done;
++  Label runtime;
++
++  // Is marking still active?
++  if (in_bytes(SATBMarkQueue::byte_width_of_active()) == 4) {
++    __ cmpw(queue_active, 0);
++  } else {
++    assert(in_bytes(SATBMarkQueue::byte_width_of_active()) == 1, "Assumption");
++    __ cmpb(queue_active, 0);
++  }
++  __ jcc(Assembler::equal, done);
++
++  // Can we store original value in the thread's buffer?
++
++  __ ldptr(tmp, queue_index);
++  __ jcc(Assembler::zero, runtime, tmp);
++  __ subptr(tmp, wordSize, tmp);
++  __ stl(tmp, queue_index);
++  __ addptr(tmp, buffer, tmp);
++
++  // prev_val (rax)
++  __ load_parameter(0, pre_val);
++  __ stl(pre_val, Address(tmp, 0));
++  __ jmp(done);
++
++  __ bind(runtime);
++
++  __ save_live_registers_no_oop_map(true);
++
++  // load the pre-value
++  __ load_parameter(0, rcx);
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_pre_entry), rcx, thread);
++
++  __ restore_live_registers(true);
++
++  __ bind(done);
++
++  __ pop(rdx);
++  __ pop(rax);
++
++  __ epilogue();
++}
++
++void G1BarrierSetAssembler::generate_c1_post_barrier_runtime_stub(StubAssembler* sasm) {
++  __ prologue("g1_post_barrier", false);
++
++  // arg0: store_address
++  Address store_addr(rbp, 2*BytesPerWord);
++  // __ stop("TODO:check generate_c1_post_barrier_runtime_stub jzy");
++  CardTableBarrierSet* ct =
++    barrier_set_cast<CardTableBarrierSet>(BarrierSet::barrier_set());
++  assert(sizeof(*ct->card_table()->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  Label done;
++  Label enqueued;
++  Label runtime;
++
++  // At this point we know new_value is non-NULL and the new_value crosses regions.
++  // Must check to see if card is already dirty
++  const Register rax = V0;
++  const Register rdx = T0;
++  const Register rcx = T1;
++  
++  const Register thread = NOT_LP64(rax) LP64_ONLY(rthread);
++
++  Address queue_index(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_index_offset()));
++  Address buffer(thread, in_bytes(G1ThreadLocalData::dirty_card_queue_buffer_offset()));
++
++  __ push(rax);
++  __ push(rcx);
++
++  const Register cardtable = rax;
++  const Register card_addr = rcx;
++
++  __ load_parameter(0, card_addr);
++  __ srll(card_addr, CardTable::card_shift, card_addr);
++  // Do not use ExternalAddress to load 'byte_map_base', since 'byte_map_base' is NOT
++  // a valid address and therefore is not properly handled by the relocation code.
++  __ mov_immediate64(cardtable, (intptr_t)ct->card_table()->byte_map_base());
++  __ addptr(card_addr, cardtable, card_addr);
++
++
++  __ cmpb(Address(card_addr, 0), (int)G1CardTable::g1_young_card_val());
++  __ jcc(Assembler::equal, done);
++
++  //__ membar(Assembler::Membar_mask_bits(Assembler::StoreLoad));
++  __ memb();
++  __ cmpb(Address(card_addr, 0), (int)CardTable::dirty_card_val());
++  __ jcc(Assembler::equal, done);
++
++  // storing region crossing non-NULL, card is clean.
++  // dirty card and log.
++  const Register tmp = rdx;
++  __ push(rdx);
++  
++  __ mov_immediate32(tmp, (int)CardTable::dirty_card_val());
++  __ stb(tmp, Address(card_addr, 0));
++
++  __ ldptr(tmp, queue_index);
++  __ jcc(Assembler::zero, runtime, tmp);
++  __ subptr(tmp, wordSize, tmp);
++  __ stl(tmp, queue_index);
++  __ addptr(tmp, buffer, tmp);
++  __ stl(card_addr, Address(tmp, 0));
++  __ jmp(enqueued);
++
++  __ bind(runtime);
++
++  __ save_live_registers_no_oop_map(true);
++
++  __ call_VM_leaf(CAST_FROM_FN_PTR(address, G1BarrierSetRuntime::write_ref_field_post_entry), card_addr, thread);
++
++  __ restore_live_registers(true);
++
++  __ bind(enqueued);
++  __ pop(rdx);
++
++  __ bind(done);
++  __ pop(rcx);
++  __ pop(rax);
++
++  __ epilogue();
++}
++
++#undef __
++
++#endif // COMPILER1
+diff --git a/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.hpp
+new file mode 100644
+index 0000000000..4320e5caaa
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/g1/g1BarrierSetAssembler_sw64.hpp
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_GC_G1_G1BARRIERSETASSEMBLER_SW64_HPP
++#define CPU_SW64_GC_G1_G1BARRIERSETASSEMBLER_SW64_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++#include "utilities/macros.hpp"
++
++class LIR_Assembler;
++class StubAssembler;
++class G1PreBarrierStub;
++class G1PostBarrierStub;
++
++class G1BarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                       Register addr, Register count);
++  void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                        Register start, Register count, Register tmp);
++
++  void g1_write_barrier_pre(MacroAssembler* masm,
++                            Register obj,
++                            Register pre_val,
++                            Register thread,
++                            Register tmp,
++                            bool tosca_live,
++                            bool expand_call);
++
++  void g1_write_barrier_post(MacroAssembler* masm,
++                             Register store_addr,
++                             Register new_val,
++                             Register thread,
++                             Register tmp,
++                             Register tmp2);
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++public:
++#ifdef COMPILER1
++  void gen_pre_barrier_stub(LIR_Assembler* ce, G1PreBarrierStub* stub);
++  void gen_post_barrier_stub(LIR_Assembler* ce, G1PostBarrierStub* stub);
++
++  void generate_c1_pre_barrier_runtime_stub(StubAssembler* sasm);
++  void generate_c1_post_barrier_runtime_stub(StubAssembler* sasm);
++#endif
++
++  void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++               Register dst, Address src, Register tmp1, Register tmp_thread);
++};
++
++#endif // CPU_SW64_GC_G1_G1BARRIERSETASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.cpp
+new file mode 100755
+index 0000000000..23f3c39230
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.cpp
+@@ -0,0 +1,272 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "runtime/jniHandles.hpp"
++#include "runtime/thread.hpp"
++
++#define __ masm->
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void BarrierSetAssembler::load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register dst, Address src, Register tmp1, Register tmp_thread) {SCOPEMARK_NAME(BarrierSetAssembler::load_at, masm)
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++  bool atomic = (decorators & MO_RELAXED) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (UseCompressedOops) {
++        __ ldwu(dst, src);
++        if (is_not_null) {
++          __ decode_heap_oop_not_null(dst);
++        } else {
++          __ decode_heap_oop(dst); //<TODO:interface is different, jzy>
++        }
++      } else {
++        __ ldl(dst, src);
++      }
++    } else {
++      assert(in_native, "why else?");
++      __ ldl(dst, src);
++    }
++    break;
++  }
++  case T_BOOLEAN: __ load_unsigned_byte(dst, src);  break;
++  case T_BYTE:    __ load_signed_byte64(dst, src);  break;
++  case T_CHAR:    __ load_unsigned_short(dst, src); break;
++  case T_SHORT:   __ load_signed_short(dst, src);   break;
++  case T_INT:     __ ldws (dst, src);               break;
++  case T_ADDRESS: __ ldl (dst, src);                break;
++  case T_FLOAT:
++    assert(dst == noreg, "only to ftos");
++    __ load_float(FSF, src);
++    break;
++  case T_DOUBLE:
++    assert(dst == noreg, "only to dtos");
++    __ load_double(FSF, src);
++    break;
++  case T_LONG:
++    assert(dst == noreg, "only to ltos");
++    __ ldl(FSR, src);
++    break;
++  default: Unimplemented();
++  }
++}
++
++void BarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                   Address dst, Register val, Register tmp1, Register tmp2) {
++  bool in_heap = (decorators & IN_HEAP) != 0;
++  bool in_native = (decorators & IN_NATIVE) != 0;
++  bool is_not_null = (decorators & IS_NOT_NULL) != 0;
++  bool atomic = (decorators & MO_RELAXED) != 0;
++
++  switch (type) {
++  case T_OBJECT:
++  case T_ARRAY: {
++    if (in_heap) {
++      if (val == noreg) {
++        assert(!is_not_null, "inconsistent access");
++
++        if (UseCompressedOops) {
++          __ stw(R0, dst);
++        } else {
++          __ stl(R0, dst);
++        }
++
++      } else {
++
++        if (UseCompressedOops) {
++          assert(!dst.uses(val), "not enough registers");
++          if (is_not_null) {
++            __ encode_heap_oop_not_null(val);
++          } else {
++            __ encode_heap_oop(val);
++          }
++            __ stw(val, dst);
++        } else {
++            __ stl(val, dst);
++        }
++      }
++    } else {
++      assert(in_native, "why else?");
++      assert(val != noreg, "not supported");
++      __ stl(val, dst);
++    }
++    break;
++  }
++  case T_BOOLEAN:
++    __ andw(val, 0x1, val);  // boolean is true if LSB is 1
++    __ stb(val, dst);
++    break;
++  case T_BYTE:
++    __ stb(val, dst);
++    break;
++  case T_SHORT:
++    __ sth(val, dst);
++    break;
++  case T_CHAR:    
++    __ sth(val, dst);
++    break;
++  case T_INT:
++    __ stw(val, dst);
++    break;
++  case T_LONG:
++    assert(val == noreg, "only tos");
++    __ stl(FSR, dst);
++    break;
++  case T_FLOAT:
++    assert(val == noreg, "only tos");
++    __ store_float(FSF, dst);
++    break;
++  case T_DOUBLE:
++    assert(val == noreg, "only tos");
++    __ store_double(FSF, dst);
++    break;
++  case T_ADDRESS:
++    __ stptr(val, dst); 
++    break;
++  default: Unimplemented();
++  }
++}
++/*
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Address obj2) {
++    __ cmpptr(obj1, obj2);
++}
++*/
++void BarrierSetAssembler::obj_equals(MacroAssembler* masm,
++                                     Register obj1, Register obj2, Register cc) {
++    __ cmpptr(obj1, obj2, cc);
++}
++
++// yj todo: below
++void BarrierSetAssembler::try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                                        Register obj, Register tmp, Label& slowpath) {
++  const int32_t inverted_jweak_mask = ~static_cast<int32_t>(JNIHandles::weak_tag_mask);
++  STATIC_ASSERT(inverted_jweak_mask == -2); // otherwise check this code
++  __ andptr(obj, inverted_jweak_mask, obj);
++  __ ldptr(obj, Address(obj, 0));  // *obj
++}
++
++void BarrierSetAssembler::tlab_allocate(MacroAssembler* masm,
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Register t2,
++                                        Label& slow_case) {
++  assert_different_registers(obj, t1, t2);
++  assert_different_registers(obj, var_size_in_bytes, t1);
++  Register end = t2;
++  if (!thread->is_valid()) {
++    thread = rthread;
++  }
++
++  __ verify_tlab();
++
++  __ ldptr(obj, Address(thread, JavaThread::tlab_top_offset()));
++  if (var_size_in_bytes == noreg) {
++    __ lea(end, Address(obj, con_size_in_bytes));
++  } else {
++    __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
++  }
++  __ cmpptr(end, Address(thread, JavaThread::tlab_end_offset()));
++  __ jcc(Assembler::above, slow_case);
++
++  // update the tlab top pointer
++  __ stptr(end, Address(thread, JavaThread::tlab_top_offset()));
++
++  // recover var_size_in_bytes if necessary
++  if (var_size_in_bytes == end) {
++    __ subptr(var_size_in_bytes, obj, var_size_in_bytes);
++  }
++  __ verify_tlab();
++}
++
++// Defines obj, preserves var_size_in_bytes
++void BarrierSetAssembler::eden_allocate(MacroAssembler* masm, 
++                                        Register thread, Register obj,
++                                        Register var_size_in_bytes,
++                                        int con_size_in_bytes,
++                                        Register t1,
++                                        Label& slow_case) {
++  Register rax = V0;  
++  assert(obj == rax, "obj must be in rax, for cmpxchg");
++  assert_different_registers(obj, var_size_in_bytes, t1);
++  if (!Universe::heap()->supports_inline_contig_alloc()) {
++    __ jmp(slow_case);
++  } else {
++    Register end = t1;
++    Label retry;
++    __ bind(retry);
++    ExternalAddress heap_top((address) Universe::heap()->top_addr());
++    __ ldptr(obj, heap_top);
++    if (var_size_in_bytes == noreg) {
++      __ lea(end, Address(obj, con_size_in_bytes));
++    } else {
++      __ lea(end, Address(obj, var_size_in_bytes, Address::times_1));
++    }
++    // if end < obj then we wrapped around => object too long => slow case
++    __ cmpptr(end, obj);
++    __ jcc(Assembler::below, slow_case);
++    __ cmpptr(end, ExternalAddress((address) Universe::heap()->end_addr()));
++    __ jcc(Assembler::above, slow_case);
++    // Compare obj with the top addr, and if still equal, store the new top addr in
++    // end at the address of the top addr pointer. Sets ZF if was equal, and clears
++    // it otherwise. Use lock prefix for atomicity on MPs.
++    __ cmpxchgptr(end, heap_top, obj, rscratch2);//AT==0 should retry, it's special TODO:Fixme jzy
++    __ jcc(Assembler::failed, retry);
++    incr_allocated_bytes(masm, thread, var_size_in_bytes, con_size_in_bytes);
++  }
++}
++
++void BarrierSetAssembler::incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                                          Register var_size_in_bytes,
++                                          int con_size_in_bytes) {
++  if (!thread->is_valid()) {
++    thread = rthread;
++  }
++
++  __ ldl(rscratch4, Address(thread, in_bytes(JavaThread::allocated_bytes_offset())));
++  if (var_size_in_bytes->is_valid()) {
++    __ addl(rscratch4, var_size_in_bytes, rscratch4);
++  } else {
++    __ addl(rscratch4, con_size_in_bytes, rscratch4);
++  }
++  __ stl(rscratch4, Address(thread, in_bytes(JavaThread::allocated_bytes_offset())));
++}
++
++
+diff --git a/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.hpp
+new file mode 100644
+index 0000000000..d19238d626
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/shared/barrierSetAssembler_sw64.hpp
+@@ -0,0 +1,75 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_GC_SHARED_BARRIERSETASSEMBLER_SW64_HPP
++#define CPU_SW64_GC_SHARED_BARRIERSETASSEMBLER_SW64_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "memory/allocation.hpp"
++#include "oops/access.hpp"
++
++class BarrierSetAssembler: public CHeapObj<mtGC> {
++private:
++  void incr_allocated_bytes(MacroAssembler* masm, Register thread,
++                            Register var_size_in_bytes, int con_size_in_bytes);
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register src, Register dst, Register count) {}
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register start, Register end, Register tmp) {}
++  virtual void load_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                       Register dst, Address src, Register tmp1, Register tmp_thread);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++
++  virtual void try_resolve_jobject_in_native(MacroAssembler* masm, Register jni_env,
++                                             Register obj, Register tmp, Label& slowpath);
++
++  virtual void tlab_allocate(MacroAssembler* masm,
++    Register thread,                   // Current thread      
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++
++  void eden_allocate(MacroAssembler* masm,
++    Register thread,                   // Current thread 
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  virtual void barrier_stubs_init() {}
++  
++  virtual void obj_equals(MacroAssembler* masm,
++                          Register obj1, Register obj2, Register cc=rcc);
++
++};
++
++#endif // CPU_SW64_GC_SHARED_BARRIERSETASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.cpp
+new file mode 100644
+index 0000000000..5e1d9387b4
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.cpp
+@@ -0,0 +1,149 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/cardTableBarrierSetAssembler.hpp"
++#include "interpreter/interp_masm.hpp"
++
++#define __ masm->
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++
++void CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                                    Register addr, Register count, Register tmp) {SCOPEMARK_NAME(CardTableBarrierSetAssembler::gen_write_ref_array_post_barrier, masm)
++//  ShouldNotReachHere();
++                                                                    BarrierSet *bs = BarrierSet::barrier_set();
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++  intptr_t disp = (intptr_t) ct->byte_map_base();
++
++  Label L_loop, L_done;
++  const Register end = count;
++  assert_different_registers(addr, end);
++
++  __ testw(count, count);
++  __ jcc(Assembler::zero, L_done); // zero count - nothing to do
++
++  
++  __ lea(end, Address(addr, count, TIMES_OOP, 0));  // end == addr+count*oop_size
++  __ subptr(end, BytesPerHeapOop, end); // end - 1 to make inclusive
++  __ srll(addr, CardTable::card_shift, addr);
++  __ srll(end, CardTable::card_shift, end);
++  __ subptr(end, addr, end); // end --> cards count
++
++  __ mov_immediate64(tmp, disp);
++  __ addptr(addr, tmp, addr);
++  __ bind(L_loop);
++  __ stb(R0, Address(addr, count, Address::times_1));
++  __ decrement(count);
++  __ jcc(Assembler::greaterEqual, L_loop, count);
++
++  __ bind(L_done);
++}
++
++void CardTableBarrierSetAssembler::store_check(MacroAssembler* masm, Register obj, Address dst) {SCOPEMARK_NAME(CardTableBarrierSetAssembler::store_check, masm)
++  // Does a store check for the oop in register obj. The content of
++  // register obj is destroyed afterwards.
++  BarrierSet* bs = BarrierSet::barrier_set();
++
++  CardTableBarrierSet* ctbs = barrier_set_cast<CardTableBarrierSet>(bs);
++  CardTable* ct = ctbs->card_table();
++  assert(sizeof(*ct->byte_map_base()) == sizeof(jbyte), "adjust this code");
++
++  __ srll(obj, CardTable::card_shift, obj);
++
++  Address card_addr;
++
++  // The calculation for byte_map_base is as follows:
++  // byte_map_base = _byte_map - (uintptr_t(low_bound) >> card_shift);
++  // So this essentially converts an address to a displacement and it will
++  // never need to be relocated. On 64bit however the value may be too
++  // large for a 32bit displacement.
++  intptr_t byte_map_base = (intptr_t)ct->byte_map_base();
++  if (__ is_simm32(byte_map_base)) {
++    card_addr = Address(noreg, obj, Address::times_1, byte_map_base);
++  } else {
++    // By doing it as an ExternalAddress 'byte_map_base' could be converted to a rip-relative
++    // displacement and done in a single instruction given favorable mapping and a
++    // smarter version of as_Address. However, 'ExternalAddress' generates a relocation
++    // entry and that entry is not properly handled by the relocation code.
++    AddressLiteral cardtable((address)byte_map_base, relocInfo::none);
++    Address index(noreg, obj, Address::times_1);
++    card_addr = __ as_Address(ArrayAddress(cardtable, index), rscratch4);
++  }
++  int dirty = CardTable::dirty_card_val();
++  if (UseCondCardMark) {
++    Label L_already_dirty;
++    //if (ct->scanned_concurrently()) {//according to aarch64
++      __ memb();
++    //}
++    __ cmpb(card_addr, dirty);
++    __ jcc(Assembler::equal, L_already_dirty);
++//    __ movw(temp, dirty);
++//    __ stb(temp, card_addr);
++    __ stb(R0, card_addr);
++    __ bind(L_already_dirty);
++  } else {
++//    __ movw(temp, dirty);
++//    __ stb(temp, card_addr);
++      if (ct->scanned_concurrently()) {
++          __ memb();
++      }
++      __ stb(R0, card_addr);
++  }
++}
++
++void CardTableBarrierSetAssembler::oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                Address dst, Register val, Register tmp1, Register tmp2) {SCOPEMARK_NAME(CardTableBarrierSetAssembler::oop_store_at, masm)
++  bool in_heap = (decorators & IN_HEAP) != 0;
++
++  bool is_array = (decorators & IS_ARRAY) != 0;
++  bool on_anonymous = (decorators & ON_UNKNOWN_OOP_REF) != 0;
++  bool precise = is_array || on_anonymous;
++
++  bool needs_post_barrier = val != noreg && in_heap;
++
++  BarrierSetAssembler::store_at(masm, decorators, type, dst, val, noreg, noreg);
++  if (needs_post_barrier) {
++    // flatten object address if needed
++    if (!precise || (dst.index() == noreg && dst.disp() == 0)) {
++      store_check(masm, dst.base(), dst);
++    } else {
++      __ lea(tmp1, dst);
++      store_check(masm, tmp1, dst);
++    }
++  }
++}
+diff --git a/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.hpp
+new file mode 100644
+index 0000000000..55b8d224f1
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/shared/cardTableBarrierSetAssembler_sw64.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SW64_HPP
++#define CPU_SW64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SW64_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++class CardTableBarrierSetAssembler: public ModRefBarrierSetAssembler {
++protected:
++  void store_check(MacroAssembler* masm, Register obj, Address dst);
++
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp);
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2);
++
++};
++
++#endif // #ifndef CPU_SW64_GC_SHARED_CARDTABLEBARRIERSETASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.cpp b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.cpp
+new file mode 100644
+index 0000000000..21969989a4
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.cpp
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/modRefBarrierSetAssembler.hpp"
++
++#define __ masm->
++
++void ModRefBarrierSetAssembler::arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                   Register src, Register dst, Register count) {SCOPEMARK_NAME(ModRefBarrierSetAssembler::arraycopy_prologue, masm)
++  bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
++  bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
++  bool obj_int = type == T_OBJECT && UseCompressedOops;
++//    ShouldNotReachHere();
++  //use T11 is very special, related to stubGenerator_sw64::array_copy* TODO:refactor jzy
++  if (type == T_OBJECT || type == T_ARRAY) {
++    if (!checkcast) {
++      if (!obj_int) {
++        // Save count for barrier
++        __ movl(T11, count);
++      } else if (disjoint) {
++        // Save dst in r11 in the disjoint case
++        __ movl(T11, dst);
++      }
++    }
++    gen_write_ref_array_pre_barrier(masm, decorators, dst, count);
++  }
++}
++
++void ModRefBarrierSetAssembler::arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                                   Register src, Register dst, Register count) {SCOPEMARK_NAME(ModRefBarrierSetAssembler::arraycopy_epilogue, masm)
++  bool checkcast = (decorators & ARRAYCOPY_CHECKCAST) != 0;
++  bool disjoint = (decorators & ARRAYCOPY_DISJOINT) != 0;
++  bool obj_int = type == T_OBJECT && UseCompressedOops;
++  Register tmp = V0;
++//  ShouldNotReachHere();
++  if (type == T_OBJECT || type == T_ARRAY) {
++    if (!checkcast) {
++      if (!obj_int) {
++        // Save count for barrier
++        count = T11;
++      } else if (disjoint) {
++        // Use the saved dst in the disjoint case
++        dst = T11;
++      }
++    } else {
++      assert_different_registers(src, dst, count, rscratch1);//need this? jzy
++      tmp = rscratch1;
++    }
++    gen_write_ref_array_post_barrier(masm, decorators, dst, count, tmp);
++  }
++}
++
++void ModRefBarrierSetAssembler::store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                         Address dst, Register val, Register tmp1, Register tmp2) {
++  if (type == T_OBJECT || type == T_ARRAY) {
++    oop_store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  } else {
++    BarrierSetAssembler::store_at(masm, decorators, type, dst, val, tmp1, tmp2);
++  }
++}
+diff --git a/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.hpp b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.hpp
+new file mode 100644
+index 0000000000..22bfed475d
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/gc/shared/modRefBarrierSetAssembler_sw64.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_GC_SHARED_MODREFBARRIERSETASSEMBLER_SW64_HPP
++#define CPU_SW64_GC_SHARED_MODREFBARRIERSETASSEMBLER_SW64_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++
++// The ModRefBarrierSetAssembler filters away accesses on BasicTypes other
++// than T_OBJECT/T_ARRAY (oops). The oop accesses call one of the protected
++// accesses, which are overridden in the concrete BarrierSetAssembler.
++
++class ModRefBarrierSetAssembler: public BarrierSetAssembler {
++protected:
++  virtual void gen_write_ref_array_pre_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                               Register addr, Register count) {}
++  virtual void gen_write_ref_array_post_barrier(MacroAssembler* masm, DecoratorSet decorators,
++                                                Register start, Register count, Register tmp) {}
++
++  virtual void oop_store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                            Address dst, Register val, Register tmp1, Register tmp2) = 0;
++
++public:
++  virtual void arraycopy_prologue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register src, Register dst, Register count);
++  virtual void arraycopy_epilogue(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                                  Register start, Register count, Register tmp);
++  virtual void store_at(MacroAssembler* masm, DecoratorSet decorators, BasicType type,
++                        Address dst, Register val, Register tmp1, Register tmp2);
++};
++
++#endif // CPU_SW64_GC_SHARED_MODREFBARRIERSETASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/globalDefinitions_sw64.hpp b/src/hotspot/cpu/sw64/globalDefinitions_sw64.hpp
+new file mode 100644
+index 0000000000..e36dc1be25
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/globalDefinitions_sw64.hpp
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_GLOBALDEFINITIONS_SW64_HPP
++#define CPU_SW64_VM_GLOBALDEFINITIONS_SW64_HPP
++// Size of SW Instructions
++const int BytesPerInstWord = 4;
++
++const int StackAlignmentInBytes = (2*wordSize);
++
++// Indicates whether the C calling conventions require that
++// 32-bit integer argument values are extended to 64 bits.
++const bool CCallingConventionRequiresIntsAsLongs = false;
++
++
++// true if x is a power of 2, false otherwise
++inline bool is_power_of_2(intptr_t x) ;
++
++inline intptr_t mask_bits      (intptr_t  x, intptr_t m);
++
++// returns integer round-up to the nearest multiple of s (s must be a power of two)
++inline intptr_t round_to(intptr_t x, uintx s) {
++  #ifdef ASSERT
++    if (!is_power_of_2(s)) fatal("s must be a power of 2");
++  #endif
++  const uintx m = s - 1;
++  return mask_bits(x + m, ~m);
++}
++
++#define SUPPORTS_NATIVE_CX8
++
++#define SUPPORT_RESERVED_STACK_AREA
++
++#endif // CPU_SW64_VM_GLOBALDEFINITIONS_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/globals_sw64.hpp b/src/hotspot/cpu/sw64/globals_sw64.hpp
+new file mode 100644
+index 0000000000..6e32e4d5a5
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/globals_sw64.hpp
+@@ -0,0 +1,193 @@
++/*
++ * Copyright (c) 2000, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_GLOBALS_SW64_HPP
++#define CPU_SW64_VM_GLOBALS_SW64_HPP
++
++#include "utilities/globalDefinitions.hpp"
++#include "utilities/macros.hpp"
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, ConvertSleepToYield,      true);
++define_pd_global(bool, ShareVtableStubs,         true);
++define_pd_global(bool, CountInterpCalls,         true);
++define_pd_global(bool, NeedsDeoptSuspend,        false); // only register window machines need this
++
++define_pd_global(bool, ImplicitNullChecks,       true);  // Generate code for implicit null checks
++define_pd_global(bool, TrapBasedNullChecks,  false);
++define_pd_global(bool, UncommonNullCast,         true);  // Uncommon-trap NULLs past to check cast
++
++define_pd_global(uintx, CodeCacheSegmentSize,    64 TIERED_ONLY(+64)); // Tiered compilation has large code-entry alignment.
++define_pd_global(intx, CodeEntryAlignment,       16);
++define_pd_global(intx, OptoLoopAlignment,        16);
++define_pd_global(intx, InlineFrequencyCount,     100);
++
++#define DEFAULT_STACK_YELLOW_PAGES (2)
++#define DEFAULT_STACK_RED_PAGES (1)
++// Java_java_net_SocketOutputStream_socketWrite0() uses a 64k buffer on the
++// stack if compiled for unix and LP64. To pass stack overflow tests we need
++// 20 shadow pages.
++#define DEFAULT_STACK_SHADOW_PAGES (20 DEBUG_ONLY(+5))
++#define DEFAULT_STACK_RESERVED_PAGES (1)
++
++#define MIN_STACK_YELLOW_PAGES DEFAULT_STACK_YELLOW_PAGES
++#define MIN_STACK_RED_PAGES    DEFAULT_STACK_RED_PAGES
++#define MIN_STACK_SHADOW_PAGES DEFAULT_STACK_SHADOW_PAGES
++#define MIN_STACK_RESERVED_PAGES (0)
++
++define_pd_global(intx, StackYellowPages, DEFAULT_STACK_YELLOW_PAGES);
++define_pd_global(intx, StackRedPages, DEFAULT_STACK_RED_PAGES);
++define_pd_global(intx, StackShadowPages, DEFAULT_STACK_SHADOW_PAGES);
++////define_pd_global(intx, StackShadowPages,     3 DEBUG_ONLY(+1));
++define_pd_global(intx, StackReservedPages, DEFAULT_STACK_RESERVED_PAGES);
++
++define_pd_global(uintx, TLABSize,                   0);
++define_pd_global(uintx, NewSize,                    1024 * K);
++define_pd_global(intx,  PreInflateSpin,             10);
++
++define_pd_global(intx, PrefetchCopyIntervalInBytes, -1);
++define_pd_global(intx, PrefetchScanIntervalInBytes, -1);
++define_pd_global(intx, PrefetchFieldsAhead,         -1);
++
++define_pd_global(bool, RewriteBytecodes,     true);
++define_pd_global(bool, RewriteFrequentPairs, true);
++define_pd_global(bool, UseMembar,            true);
++
++////define_pd_global(bool, PreserveFramePointer, false);
++
++// GC Ergo Flags
++define_pd_global(uintx, CMSYoungGenPerWorker, 64*M);  // default max size of CMS young gen, per GC worker thread
++
++define_pd_global(uintx, TypeProfileLevel, 111);
++
++define_pd_global(bool, PreserveFramePointer, false);
++// Only c2 cares about this at the moment
++define_pd_global(intx, AllocatePrefetchStyle,        2);
++define_pd_global(intx, AllocatePrefetchDistance,     -1);
++define_pd_global(bool, CompactStrings, true);
++
++// Clear short arrays bigger than one word in an arch-specific way
++define_pd_global(intx, InitArrayShortSize, 4096 * BytesPerLong);
++define_pd_global(intx, InlineSmallCode,          4000);
++
++define_pd_global(bool, ThreadLocalHandshakes, false);
++
++////#if defined(COMPILER1) || defined(COMPILER2)
++////define_pd_global(intx, InlineSmallCode,          1000);
++////#endif
++
++#define ARCH_FLAGS(develop, \
++                   product, \
++                   diagnostic, \
++                   experimental, \
++                   notproduct, \
++                   range, \
++                   constraint, \
++                   writeable) \
++  product(bool, UseSW8A, false,                                         \
++         "Use SW8A on Shenwei CPUs")                                    \
++  product(bool, UseAddpi, false,                                         \
++            "Use addpi of SW8A's instructions")                         \
++  product(bool, UseCAS, false,                                           \
++         "Use CASx of SW8A's instructions")                             \
++  product(bool, UseWmemb, false,                                         \
++         "Use wmemb on SW8A CPU")                                       \
++    product(bool, NearCpool, true,                                        \
++         "constant pool is close to instructions")                      \
++  product(bool, TraceSignalHandling, false,                              \
++          "Trace signal handling")                                      \
++  product(bool, UseBarriersForVolatile, false,                          \
++          "Use memory barriers to implement volatile accesses")         \
++  product(bool, UseNeon, false,                                         \
++          "Use Neon for CRC32 computation")                             \
++  product(bool, UseCRC32, false,                                        \
++          "Use CRC32 instructions for CRC32 computation")               \
++  product(bool, UseSIMDForMemoryOps, false,                             \
++          "Use SIMD instructions in generated memory move code")        \
++  product(bool, UseSIMDForArrayEquals, true,                            \
++          "Use SIMD instructions in generated array equals code")       \
++  product(bool, UseSimpleArrayEquals, false,                            \
++          "Use simpliest and shortest implementation for array equals") \
++  product(bool, AvoidUnalignedAccesses, false,                          \
++          "Avoid generating unaligned memory accesses")                 \
++  product(bool, UseLSE, false,                                          \
++          "Use LSE instructions")                                       \
++  product(bool, UseBlockZeroing, true,                                  \
++          "Use DC ZVA for block zeroing")                               \
++  product(intx, BlockZeroingLowLimit, 256,                              \
++          "Minimum size in bytes when block zeroing will be used")      \
++          range(1, max_jint)                                            \
++  product(bool, TraceTraps, false, "Trace all traps the signal handler")\
++  product(int, SoftwarePrefetchHintDistance, -1,                        \
++          "Use prfm hint with specified distance in compiled code."     \
++          "Value -1 means off.")                                        \
++          range(-1, 4096)                                               \
++  product(bool, UseSW6B, false,                                             \
++                "Use SW6B on Shenwei CPUs")                                 \
++                                                                            \
++  product(bool, UseSimdForward, false,                                      \
++          "arraycopy disjoint stubs with SIMD instructions")                \
++  product(bool, UseSimdBackward, false,                                     \
++          "arraycopy conjoint stubs with SIMD instructions")                \
++  product(bool, UseSimdLongOop, false,                                      \
++          "conjoint oop copy with SIMD instructions")                       \
++  /* product(bool, UseCodeCacheAllocOpt, true, */                           \
++  /*              "Allocate code cache within 32-bit memory address space") */  \
++                                                                            \
++  product(bool, UseCountLeadingZerosInstruction, true,                      \
++          "Use count leading zeros instruction")                            \
++                                                                            \
++  product(bool, UseCountTrailingZerosInstruction, false,                    \
++          "Use count trailing zeros instruction")                           \
++                                                                            \
++  product(bool, FastIntDiv, false,                                          \
++          "make Integer division faster")                                   \
++                                                                            \
++  product(bool, FastLongDiv, false,                                         \
++          "make Long division faster")                                      \
++                                                                            \
++  product(bool, FastIntRem, false,                                          \
++         "make Integer remainder faster")                                   \
++                                                                            \
++  product(bool, FastLongRem, false,                                         \
++          "make Long remainder faster")                                     \
++                                                                            \
++  product(bool, SafePatch, true,                                            \
++          "use double li48 to make patch operations safer")                 \
++                                                                            \
++  product(bool, UseNecessaryMembar, true,                                   \
++          "It is necessary to add memb instruction on sw platform")         \
++                                                                            \
++  product(bool, FRegisterConflict, true,                                    \
++          "When FRegisterConflict is true, prevent source and destination FloatRegisters from being the same. " \
++          "When FRegisterConflict is false, ignore the conflict")           \
++  product(bool, UseSetfpec, false,                                          \
++          "true for 9906, false for 9916")                                  \
++  product(bool, UseGetLongIntrinsic, false,                                 \
++          "Use Unsafe.getLong intrinsic")
++
++#endif // CPU_SW64_VM_GLOBALS_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/icBuffer_sw64.cpp b/src/hotspot/cpu/sw64/icBuffer_sw64.cpp
+new file mode 100644
+index 0000000000..6a3866eef0
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/icBuffer_sw64.cpp
+@@ -0,0 +1,91 @@
++/*
++ * Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/icBuffer.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/bytecodes.hpp"
++#include "memory/resourceArea.hpp"
++#include "nativeInst_sw64.hpp"
++#include "oops/oop.inline.hpp"
++
++int InlineCacheBuffer::ic_stub_code_size() {
++  return NativeMovConstReg::instruction_size + NativeJump::instruction_size;
++}
++
++
++
++void InlineCacheBuffer::assemble_ic_buffer_code(address code_begin, void* cached_value, address entry_point) {//Unimplemented();
++  ResourceMark rm;
++  CodeBuffer      code(code_begin, ic_stub_code_size());
++  MacroAssembler* masm            = new MacroAssembler(&code);
++  // note: even though the code contains an embedded value, we do not need reloc info
++  // because
++  // (1) the value is old (i.e., doesn't matter for scavenges)
++  // (2) these ICStubs are removed *before* a GC happens, so the roots disappear
++  // assert(cached_value == NULL || cached_oop->is_perm(), "must be perm oop");
++
++  // TODO:confirm  jzy
++  // 1. need flush?
++  // 2. need relocate?
++  #define __ masm->
++  __ prepare_patch_li48(V0, (long)cached_value); //sw64.ad #frame
++
++  __ patchable_jump(entry_point);
++  __ flush();
++#undef __
++}
++
++
++address InlineCacheBuffer::ic_buffer_entry_point(address code_begin) {
++  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);   // creation also verifies the object
++  address jmp = move->next_instruction_address();
++  NativeInstruction* ni = nativeInstruction_at(jmp);
++  if (ni->is_jump()) {
++    NativeJump*        jump = nativeJump_at(jmp);
++    return jump->jump_destination();
++  } else {
++    fatal("no a ic buffer entry");
++    return NULL;
++  }
++}
++
++
++void* InlineCacheBuffer::ic_buffer_cached_value(address code_begin) {
++  // creation also verifies the object
++  NativeMovConstReg* move = nativeMovConstReg_at(code_begin);
++  // Verifies the jump
++  address jmp = move->next_instruction_address();
++  NativeInstruction* ni = nativeInstruction_at(jmp);
++  if (ni->is_jump()) {
++    NativeJump*        jump = nativeJump_at(jmp);
++  } else {
++    fatal("no a ic buffer entry");
++  }
++  void* o = (void*)move->data();
++  return o;
++}
+diff --git a/src/hotspot/cpu/sw64/icache_sw64.cpp b/src/hotspot/cpu/sw64/icache_sw64.cpp
+new file mode 100644
+index 0000000000..5fb940774f
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/icache_sw64.cpp
+@@ -0,0 +1,96 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/icache.hpp"
++
++#define __ _masm->
++extern void sw64TestHook();
++
++void ICache::initialize() {
++  sw64TestHook();
++}
++
++void ICacheStubGenerator::generate_icache_flush(ICache::flush_icache_stub_t* flush_icache_stub) {
++  StubCodeMark mark(this, "ICache", "flush_icache_stub");
++  
++  Register rax = V0;
++  address start = __ pc();
++
++  const Register addr  = c_rarg0;
++  const Register lines = c_rarg1;
++  const Register magic = c_rarg2;
++
++  Label flush_line, done;
++
++  __ jcc(Assembler::zero, done, lines);
++
++  // Force ordering wrt cflush.
++  // Other fence and sync instructions won't do the job.
++  __ memb();
++
++  __ bind(flush_line);
++  __ clflush(Address(addr, 0));
++  __ addptr(addr, ICache::line_size, addr);
++  __ decrementw(lines);
++  __ jcc(Assembler::notZero, flush_line, lines);
++
++  __ memb();
++
++  __ bind(done);
++
++  __ movl(rax, magic); // Handshake with caller to make sure it happened!
++  __ ret_sw();
++
++  // Must be set here so StubCodeMark destructor can call the flush stub.
++  *flush_icache_stub = (ICache::flush_icache_stub_t)start;
++}
++
++void ICache::call_flush_stub(address start, int lines) {
++  //in fact, the current os implementation simply flush all ICACHE&DCACHE
++//  sysmips(3, 0, 0, 0);
++//  __asm__ __volatile__ ("ldi $0,266");
++//  __asm__ __volatile__ ("sys_call 0x83");
++}
++
++void ICache::invalidate_word(address addr) {
++  //cacheflush(addr, 4, ICACHE);
++
++//  sysmips(3, 0, 0, 0);
++//  __asm__ __volatile__ ("ldi $0,266");
++//  __asm__ __volatile__ ("sys_call 0x83");
++}
++
++void ICache::invalidate_range(address start, int nbytes) {
++//  sysmips(3, 0, 0, 0);
++//  __asm__ __volatile__ ("ldi $0,266");
++//  __asm__ __volatile__ ("sys_call 0x83");
++}
++
++void ICache::invalidate_all() {
++//  __asm__ __volatile__ ("ldi $0,266");
++//  __asm__ __volatile__ ("sys_call 0x83");
++}
+diff --git a/src/hotspot/cpu/sw64/icache_sw64.hpp b/src/hotspot/cpu/sw64/icache_sw64.hpp
+new file mode 100644
+index 0000000000..22bfdb90a5
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/icache_sw64.hpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 1997, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_ICACHE_SW64_HPP
++#define CPU_SW64_VM_ICACHE_SW64_HPP
++
++// Interface for updating the instruction cache.  Whenever the VM
++// modifies code, part of the processor instruction cache potentially
++// has to be flushed.
++
++class ICache : public AbstractICache {
++ public:
++  enum {
++    stub_size      = 0,   // Size of the icache flush stub in bytes
++    line_size      = 32,  // flush instruction affects a dword
++    log2_line_size = 5    // log2(line_size)
++  };
++  static void initialize();
++
++  static void call_flush_stub(address start, int lines);
++
++  static void invalidate_word(address addr);
++
++  static void invalidate_range(address start, int nbytes);
++
++  static void invalidate_all();
++
++};
++
++#endif // CPU_SW64_VM_ICACHE_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/interp_masm_sw64.cpp b/src/hotspot/cpu/sw64/interp_masm_sw64.cpp
+new file mode 100755
+index 0000000000..b4cbf17536
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/interp_masm_sw64.cpp
+@@ -0,0 +1,1909 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "interp_masm_sw64.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "logging/log.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/markOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/basicLock.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/thread.inline.hpp"
++#include "assembler_sw64.hpp"
++
++// Implementation of InterpreterMacroAssembler
++
++void InterpreterMacroAssembler::jump_to_entry(address entry) {
++  assert(entry, "Entry must have been generated by now");
++  jump(RuntimeAddress(entry));
++}
++
++void InterpreterMacroAssembler::profile_obj_type(Register obj, const Address& mdo_addr) {
++  Label update, next, none;
++
++  verify_oop(obj);
++
++  jcc(Assembler::notZero, update, obj);
++  ldptr(AT, mdo_addr);
++  orptr(AT, TypeEntries::null_seen, AT);
++  stptr(AT, mdo_addr);
++  jmp(next);
++
++  bind(update);
++  load_klass(obj, obj);
++
++  ldptr(AT, mdo_addr);
++  xorptr(obj, AT, obj);
++  testptr(obj, TypeEntries::type_klass_mask);
++  jcc(Assembler::zero, next); // klass seen before, nothing to
++                               // do. The unknown bit may have been
++                               // set already but no need to check.
++
++  testptr(obj, TypeEntries::type_unknown);
++  jcc(Assembler::notZero, next); // already unknown. Nothing to do anymore.
++
++  ldptr(AT, mdo_addr);
++  jcc(Assembler::zero, none, AT);
++  cmpptr(AT, TypeEntries::null_seen);
++  jcc(Assembler::equal, none);
++  // There is a chance that the checks above (re-reading profiling
++  // data from memory) fail if another thread has just set the
++  // profiling to this obj's klass
++  xorptr(obj, AT, obj);
++  testptr(obj, TypeEntries::type_klass_mask);
++  jcc(Assembler::zero, next);
++
++  // different than before. Cannot keep accurate profile.
++  orptr(AT, TypeEntries::type_unknown, AT);
++  stptr(AT, mdo_addr);
++  jmp(next);
++
++  bind(none);
++  // first time here. Set profile type.
++  stptr(obj, mdo_addr);
++
++  bind(next);
++}
++
++void InterpreterMacroAssembler::profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual) {
++  if (!ProfileInterpreter) {
++    return;
++  }
++
++  if (MethodData::profile_arguments() || MethodData::profile_return()) {
++    Label profile_continue;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    int off_to_start = is_virtual ? in_bytes(VirtualCallData::virtual_call_data_size()) : in_bytes(CounterData::counter_data_size());
++
++    cmpb(Address(mdp, in_bytes(DataLayout::tag_offset()) - off_to_start), is_virtual ? DataLayout::virtual_call_type_data_tag : DataLayout::call_type_data_tag);
++    jcc(Assembler::notEqual, profile_continue);
++
++    if (MethodData::profile_arguments()) {
++      Label done;
++      int off_to_args = in_bytes(TypeEntriesAtCall::args_data_offset());
++      addptr(mdp, off_to_args, mdp);
++
++      for (int i = 0; i < TypeProfileArgsLimit; i++) {
++        if (i > 0 || MethodData::profile_return()) {
++          // If return value type is profiled we may have no argument to profile
++          ldptr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
++          subw(tmp, i*TypeStackSlotEntries::per_arg_count(), tmp);
++          cmpw(tmp, TypeStackSlotEntries::per_arg_count());
++          jcc(Assembler::less, done);
++        }
++        ldptr(tmp, Address(callee, Method::const_offset()));
++        ldhu(tmp, Address(tmp, ConstMethod::size_of_parameters_offset()));
++        // stack offset o (zero based) from the start of the argument
++        // list, for n arguments translates into offset n - o - 1 from
++        // the end of the argument list
++        ldptr(AT, Address(mdp, in_bytes(TypeEntriesAtCall::stack_slot_offset(i))-off_to_args));
++        subw(tmp, AT, tmp);
++        subw(tmp, 1, tmp);
++        Address arg_addr = argument_address(tmp);
++        ldptr(tmp, arg_addr);
++
++        Address mdo_arg_addr(mdp, in_bytes(TypeEntriesAtCall::argument_type_offset(i))-off_to_args);
++        profile_obj_type(tmp, mdo_arg_addr);
++
++        int to_add = in_bytes(TypeStackSlotEntries::per_arg_size());
++        addptr(mdp, to_add, mdp);
++        off_to_args += to_add;
++      }
++
++      if (MethodData::profile_return()) {
++        ldptr(tmp, Address(mdp, in_bytes(TypeEntriesAtCall::cell_count_offset())-off_to_args));
++        subw(tmp, TypeProfileArgsLimit*TypeStackSlotEntries::per_arg_count(), tmp);
++      }
++
++      bind(done);
++
++      if (MethodData::profile_return()) {
++        // We're right after the type profile for the last
++        // argument. tmp is the number of cells left in the
++        // CallTypeData/VirtualCallTypeData to reach its end. Non null
++        // if there's a return to profile.
++        assert(ReturnTypeEntry::static_cell_count() < TypeStackSlotEntries::per_arg_count(), "can't move past ret type");
++        slll(tmp, exact_log2(DataLayout::cell_size), tmp);
++        //addw(tmp, 0, tmp);
++        addptr(mdp, tmp, mdp);
++      }
++      stptr(mdp, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
++    } else {
++      assert(MethodData::profile_return(), "either profile call args or call ret");
++      update_mdp_by_constant(mdp, in_bytes(TypeEntriesAtCall::return_only_size()));
++    }
++
++    // mdp points right after the end of the
++    // CallTypeData/VirtualCallTypeData, right after the cells for the
++    // return value type if there's one
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_return_type(Register mdp, Register ret, Register tmp) {
++  assert_different_registers(mdp, ret, tmp, _bcp_register);
++  if (ProfileInterpreter && MethodData::profile_return()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    if (MethodData::profile_return_jsr292_only()) {
++      assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++      // If we don't profile all invoke bytecodes we must make sure
++      // it's a bytecode we indeed profile. We can't go back to the
++      // begining of the ProfileData we intend to update to check its
++      // type because we're right after it and we don't known its
++      // length
++      Label do_profile;
++      cmpb(Address(_bcp_register, 0), Bytecodes::_invokedynamic);
++      jcc(Assembler::equal, do_profile);
++      cmpb(Address(_bcp_register, 0), Bytecodes::_invokehandle);
++      jcc(Assembler::equal, do_profile);
++      get_method(tmp);
++      cmph(Address(tmp, Method::intrinsic_id_offset_in_bytes()), vmIntrinsics::_compiledLambdaForm);
++      jcc(Assembler::notEqual, profile_continue);
++
++      bind(do_profile);
++    }
++
++    Address mdo_ret_addr(mdp, -in_bytes(ReturnTypeEntry::size()));
++    movl(tmp, ret);
++    profile_obj_type(tmp, mdo_ret_addr);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_parameters_type(Register mdp, Register tmp1, Register tmp2) {
++  if (ProfileInterpreter && MethodData::profile_parameters()) {
++    Label profile_continue, done;
++
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Load the offset of the area within the MDO used for
++    // parameters. If it's negative we're not profiling any parameters
++    ldws(tmp1, Address(mdp, in_bytes(MethodData::parameters_type_data_di_offset()) - in_bytes(MethodData::data_offset())));
++    jcc(Assembler::negative, profile_continue, tmp1);
++
++    // Compute a pointer to the area for parameters from the offset
++    // and move the pointer to the slot for the last
++    // parameters. Collect profiling from last parameter down.
++    // mdo start + parameters offset + array length - 1
++    addptr(mdp, tmp1, mdp);
++    ldptr(tmp1, Address(mdp, ArrayData::array_len_offset()));
++    decrementl(tmp1, TypeStackSlotEntries::per_arg_count());
++
++    Label loop;
++    bind(loop);
++
++    int off_base = in_bytes(ParametersTypeData::stack_slot_offset(0));
++    int type_base = in_bytes(ParametersTypeData::type_offset(0));
++    Address::ScaleFactor per_arg_scale = Address::times(DataLayout::cell_size);
++    Address arg_off(mdp, tmp1, per_arg_scale, off_base);
++    Address arg_type(mdp, tmp1, per_arg_scale, type_base);
++    
++    // load offset on the stack from the slot for this parameter
++    ldptr(tmp2, arg_off);
++    negptr(tmp2);
++    // read the parameter from the local area
++    ldptr(tmp2, Address(rlocals, tmp2, Interpreter::stackElementScale()));
++
++    // profile the parameter
++    profile_obj_type(tmp2, arg_type);
++
++    // go to next parameter
++    decrementl(tmp1, TypeStackSlotEntries::per_arg_count());
++    jcc(Assembler::positive, loop, tmp1);
++
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::call_VM_leaf_base(address entry_point,
++                                                  int number_of_arguments) {SCOPEMARK_NAME(InterpreterMacroAssembler::call_VM_leaf_base, this)
++  // interpreter specific
++  //
++  // Note: No need to save/restore bcp & locals (r13 & r14) pointer
++  //       since these are callee saved registers and no blocking/
++  //       GC can happen in leaf calls.
++  // Further Note: DO NOT save/restore bcp/locals. If a caller has
++  // already saved them so that it can use BCP/LVP as temporaries
++  // then a save/restore here will DESTROY the copy the caller
++  // saved! There used to be a save_bcp() that only happened in
++  // the ASSERT path (no restore_bcp). Which caused bizarre failures
++  // when jvm built with ASSERTs.
++#ifdef ASSERT
++  {
++    Label L;
++    cmpptr(Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
++    jcc(Assembler::equal, L);
++    stop("InterpreterMacroAssembler::call_VM_leaf_base:"
++         " last_sp != NULL");
++    bind(L);
++  }
++#endif
++  // super call
++  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments);
++  // interpreter specific
++  // LP64: Used to ASSERT that BCP/LVP were equal to frame's bcp/locals
++  // but since they may not have been saved (and we don't want to
++  // save them here (see note above) the assert is invalid.
++}
++
++void InterpreterMacroAssembler::call_VM_base(Register oop_result,
++                                             Register java_thread,
++                                             Register last_java_sp,
++                                             address  entry_point,
++                                             int      number_of_arguments,
++                                             bool     check_exceptions) {SCOPEMARK_NAME(InterpreterMacroAssembler::call_VM_base, this)
++  // interpreter specific
++  //
++  // Note: Could avoid restoring locals ptr (callee saved) - however doesn't
++  //       really make a difference for these runtime calls, since they are
++  //       slow anyway. Btw., bcp must be saved/restored since it may change
++  //       due to GC.
++  save_bcp();
++#ifdef ASSERT
++  {
++    Label L;
++    cmpptr(Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize), (int32_t)NULL_WORD);
++    jcc(Assembler::equal, L);
++    stop("InterpreterMacroAssembler::call_VM_base:"
++         " last_sp != NULL");
++    bind(L);
++  }
++#endif /* ASSERT */
++  // super call
++  MacroAssembler::call_VM_base(oop_result, noreg, last_java_sp,
++                               entry_point, number_of_arguments,
++                               check_exceptions);
++  // interpreter specific
++  restore_bcp();
++  restore_locals();
++}
++
++void InterpreterMacroAssembler::check_and_handle_popframe(Register java_thread) {
++  if (JvmtiExport::can_pop_frame()) {
++    Label L;
++    // Initiate popframe handling only if it is not already being
++    // processed.  If the flag has the popframe_processing bit set, it
++    // means that this code is called *during* popframe handling - we
++    // don't want to reenter.
++    // This method is only called just after the call into the vm in
++    // call_VM_base, so the arg registers are available.
++    Register pop_cond = c_rarg0;
++    ldw(pop_cond, Address(java_thread, JavaThread::popframe_condition_offset()));
++    testw(pop_cond, JavaThread::popframe_pending_bit);
++    jcc(Assembler::zero, L);
++    testw(pop_cond, JavaThread::popframe_processing_bit);
++    jcc(Assembler::notZero, L);
++    // Call Interpreter::remove_activation_preserving_args_entry() to get the
++    // address of the same-named entrypoint in the generated interpreter code.
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_preserving_args_entry));
++    jmp(V0);
++    bind(L);
++  }
++}
++
++void InterpreterMacroAssembler::load_earlyret_value(TosState state) {
++  const Register rcx = T11;
++  const Register rax = V0;
++  Register thread = rthread;
++  ldptr(rcx, Address(thread, JavaThread::jvmti_thread_state_offset()));
++  const Address tos_addr(rcx, JvmtiThreadState::earlyret_tos_offset());
++  const Address oop_addr(rcx, JvmtiThreadState::earlyret_oop_offset());
++  const Address val_addr(rcx, JvmtiThreadState::earlyret_value_offset());
++  switch (state) {
++    case atos: ldptr(rax, oop_addr);
++               stptr(R0, oop_addr);
++               verify_oop(rax, state);              break;
++    case ltos: ldptr(rax, val_addr);                 break;
++    case btos:                                   // fall through
++    case ztos:                                   // fall through
++    case ctos:                                   // fall through
++    case stos:                                   // fall through
++    case itos: ldws(rax, val_addr);                 break;
++    case ftos: load_float(FSF, val_addr);                break;
++    case dtos: load_double(FSF, val_addr);               break;
++    case vtos: /* nothing to do */                  break;
++    default  : ShouldNotReachHere();
++  }
++  // Clean up tos value in the thread object
++  movw(AT, (int) ilgl);
++  stw(AT, tos_addr);
++  stptr(R0, val_addr);
++}
++
++
++void InterpreterMacroAssembler::check_and_handle_earlyret(Register java_thread) {
++  if (JvmtiExport::can_force_early_return()) {
++    Label L;
++    Register tmp = c_rarg0;
++
++    ldptr(tmp, Address(rthread, JavaThread::jvmti_thread_state_offset()));
++    testptr(tmp, tmp);
++    jcc(Assembler::zero, L); // if (thread->jvmti_thread_state() == NULL) exit;
++
++    // Initiate earlyret handling only if it is not already being processed.
++    // If the flag has the earlyret_processing bit set, it means that this code
++    // is called *during* earlyret handling - we don't want to reenter.
++    ldwu(tmp, Address(tmp, JvmtiThreadState::earlyret_state_offset()));
++    cmpw(tmp, JvmtiThreadState::earlyret_pending);
++    jcc(Assembler::notEqual, L);
++
++    // Call Interpreter::remove_activation_early_entry() to get the address of the
++    // same-named entrypoint in the generated interpreter code.
++    ldptr(tmp, Address(rthread, JavaThread::jvmti_thread_state_offset()));
++    ldws(tmp, Address(tmp, JvmtiThreadState::earlyret_tos_offset()));
++    call_VM_leaf(CAST_FROM_FN_PTR(address, Interpreter::remove_activation_early_entry), tmp);
++    jmp(V0);
++    bind(L);
++  }
++}
++
++void InterpreterMacroAssembler::get_unsigned_2_byte_index_at_bcp(Register reg,
++                                                                 int bcp_offset) {
++  assert(bcp_offset >= 0, "bcp is still pointing to start of bytecode");
++  ldbu(AT, bcp_offset, rbcp);
++  ldbu(reg, bcp_offset + 1, rbcp);
++  slll(AT, 8, AT);
++  bis(reg, AT, reg);
++}
++
++void InterpreterMacroAssembler::get_cache_index_at_bcp(Register index,
++                                                       int bcp_offset,
++                                                       size_t index_size) {SCOPEMARK_NAME(get_cache_index_at_bcp, this)
++  assert(bcp_offset > 0, "bcp is still pointing to start of bytecode");
++  if (index_size == sizeof(u2)) {
++    ldhu_unaligned(index, Address(rbcp, bcp_offset));
++  } else if (index_size == sizeof(u4)) {
++    ldw(index, Address(rbcp, bcp_offset));
++    // Check if the secondary index definition is still ~x, otherwise
++    // we have to change the following assembler code to calculate the
++    // plain index.
++    assert(ConstantPool::decode_invokedynamic_index(~123) == 123, "else change next line");
++    notw(index, index);
++  } else if (index_size == sizeof(u1)) {
++    ldbu(index, Address(rbcp, bcp_offset));
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void InterpreterMacroAssembler::get_cache_and_index_at_bcp(Register cache,
++                                                           Register index,
++                                                           int bcp_offset,
++                                                           size_t index_size) {SCOPEMARK_NAME(get_cache_and_index_at_bcp, this)
++  assert_different_registers(cache, index);
++  get_cache_index_at_bcp(index, bcp_offset, index_size);
++  ldptr(cache, Address(rfp, frame::interpreter_frame_cache_offset * wordSize));
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  assert(exact_log2(in_words(ConstantPoolCacheEntry::size())) == 2, "else change next line");
++  slll(index, 2, index);
++}
++
++void InterpreterMacroAssembler::get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                                                        Register index,
++                                                                        Register bytecode,
++                                                                        int byte_no,
++                                                                        int bcp_offset,
++                                                                        size_t index_size) {SCOPEMARK_NAME(get_cache_and_index_and_bytecode_at_bcp, this)
++  get_cache_and_index_at_bcp(cache, index, bcp_offset, index_size);
++  // We use a 32-bit load here since the layout of 64-bit words on
++  // little-endian machines allow us that.
++  ldwu(bytecode, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::indices_offset()));
++  const int shift_count = (1 + byte_no) * BitsPerByte;
++  assert((byte_no == TemplateTable::f1_byte && shift_count == ConstantPoolCacheEntry::bytecode_1_shift) ||
++         (byte_no == TemplateTable::f2_byte && shift_count == ConstantPoolCacheEntry::bytecode_2_shift),
++         "correct shift count");
++  srll(bytecode, shift_count, bytecode);
++  assert(ConstantPoolCacheEntry::bytecode_1_mask == ConstantPoolCacheEntry::bytecode_2_mask, "common mask");
++  andw(bytecode, ConstantPoolCacheEntry::bytecode_1_mask, bytecode);
++}
++
++void InterpreterMacroAssembler::get_cache_entry_pointer_at_bcp(Register cache,
++                                                               Register tmp,
++                                                               int bcp_offset,
++                                                               size_t index_size) {
++  assert(cache != tmp, "must use different register");
++  get_cache_index_at_bcp(tmp, bcp_offset, index_size);
++  assert(sizeof(ConstantPoolCacheEntry) == 4 * wordSize, "adjust code below");
++  // convert from field index to ConstantPoolCacheEntry index
++  // and from word offset to byte offset
++  assert(exact_log2(in_bytes(ConstantPoolCacheEntry::size_in_bytes())) == 2 + LogBytesPerWord, "else change next line");
++  slll(tmp, 2 + LogBytesPerWord, tmp);
++  ldptr(cache, Address(rfp, frame::interpreter_frame_cache_offset * wordSize));
++  // skip past the header
++  addptr(cache, in_bytes(ConstantPoolCache::base_offset()), cache);
++  addptr(cache, tmp, cache);
++}
++
++// Load object from cpool->resolved_references(index)
++void InterpreterMacroAssembler::load_resolved_reference_at_index(
++                                           Register result, Register index, Register tmp) {
++  assert_different_registers(result, index);
++
++  get_constant_pool(result);
++  // load pointer for resolved_references[] objArray
++  ldptr(result, Address(result, ConstantPool::cache_offset_in_bytes()));
++  ldptr(result, Address(result, ConstantPoolCache::resolved_references_offset_in_bytes()));
++  resolve_oop_handle(result, tmp);
++  load_heap_oop(result, Address(result, index,
++                                UseCompressedOops ? Address::times_4 : Address::times_ptr,
++                                arrayOopDesc::base_offset_in_bytes(T_OBJECT)), tmp);  
++}
++
++// load cpool->resolved_klass_at(index)
++void InterpreterMacroAssembler::load_resolved_klass_at_index(Register cpool,
++                                             Register index, Register klass) {
++  ldhu(index, Address(cpool, index, Address::times_ptr, sizeof(ConstantPool)));
++  memb();
++  Register resolved_klasses = cpool;
++  ldptr(resolved_klasses, Address(cpool, ConstantPool::resolved_klasses_offset_in_bytes()));
++  memb();
++  ldptr(klass, Address(resolved_klasses, index, Address::times_ptr, Array<Klass*>::base_offset_in_bytes()));
++}
++
++// Generate a subtype check: branch to ok_is_subtype if sub_klass is a
++// subtype of super_klass.
++//
++// Args:
++//      FSR: superklass
++//      Rsub_klass: subklass
++//
++// Kills:
++//      T0, T1
++void InterpreterMacroAssembler::gen_subtype_check(Register Rsub_klass, 
++                                                  Label& ok_is_subtype) {SCOPEMARK_NAME(gen_subtype_check, this)
++  Register rcx = c_rarg4;
++  Register rdi = T0;
++  Register rax = FSR;
++  assert(Rsub_klass != FSR,     "FSR holds superklass");
++  assert(Rsub_klass != rlocals, "s1 holds locals");
++  assert(Rsub_klass != rbcp,    "s0 holds bcp");
++  assert(Rsub_klass != rcx,      "T1 holds 2ndary super array length");
++  assert(Rsub_klass != rdi,      "T0 holds 2ndary super array scan ptr");
++
++  // Profile the not-null value's klass.
++  profile_typecheck(rcx, Rsub_klass, rdi); // blows rcx, reloads rdi
++
++// Do the check.
++  check_klass_subtype(Rsub_klass, rax, rcx, ok_is_subtype); // blows rcx
++
++// Profile the failure of the check.
++  profile_typecheck_failed(rcx); // blows rcx
++}
++
++// Java Expression Stack
++
++void InterpreterMacroAssembler::pop_ptr(Register r) {
++  assert(r != esp, "current not consider esp");
++  if(UseSW8A) {
++      ldl_a(r, Interpreter::stackElementSize, esp);
++  } else {
++      ldptr(r, Address(esp, 0));
++      addl(esp, Interpreter::stackElementSize, esp);
++  }
++}
++
++void InterpreterMacroAssembler::push_ptr(Register r) {
++  assert(r != esp, "current not consider esp");  
++  subl(esp, Interpreter::stackElementSize, esp);
++  stptr(r, Address(esp, 0));
++}
++
++void InterpreterMacroAssembler::push_i(Register r) {
++  assert(r != esp, "current not consider esp");  
++  // For compatibility reason, don't change to sw.
++  movw(r, r);//clear high 32-bits zero
++  subl(esp, Interpreter::stackElementSize, esp);
++  stl(r, Address(esp, 0));
++}
++
++void InterpreterMacroAssembler::push_f(FloatRegister r) {
++  subl(esp, Interpreter::stackElementSize, esp);
++  fsts(r, Address(esp, 0));
++}
++
++void InterpreterMacroAssembler::pop_f(FloatRegister r) {
++    if(UseSW8A) {
++        flds_a(r, Interpreter::stackElementSize, esp);
++    } else {
++        flds(r, Address(esp, 0));
++        addl(esp, Interpreter::stackElementSize, esp);
++    }
++}
++
++void InterpreterMacroAssembler::push_d(FloatRegister r) {
++  subl(esp, 2 * Interpreter::stackElementSize, esp);
++  fstd(r, Address(esp, 0));
++}
++
++void InterpreterMacroAssembler::pop_d(FloatRegister r) {
++    if(UseSW8A) {
++        fldd_a(r, 2 * Interpreter::stackElementSize, esp);
++    } else {
++        fldd(r, Address(esp, 0));
++        addl(esp, 2 * Interpreter::stackElementSize, esp);
++    }
++}
++
++void InterpreterMacroAssembler::pop_i(Register r) {
++    assert(r != esp, "current not consider esp");
++    if(UseSW8A) {
++        ldw_a(r, Interpreter::stackElementSize, esp);
++    } else {
++        ldws(r, Address(esp, 0));
++        addl(esp, Interpreter::stackElementSize, esp);
++    }
++}
++
++void InterpreterMacroAssembler::pop_l(Register r) {
++    assert(r != esp, "current not consider esp");
++    if(UseSW8A) {
++        ldl_a(r, 2 * Interpreter::stackElementSize, esp);
++    } else {
++        ldptr(r, Address(esp, 0));
++        addl(esp, 2 * Interpreter::stackElementSize, esp);
++    }
++}
++
++void InterpreterMacroAssembler::push_l(Register r) {
++  assert(r != esp, "current not consider esp");
++  subl(esp, 2 * Interpreter::stackElementSize, esp);
++  stptr(R0, Address(esp, Interpreter::stackElementSize));
++  stptr(r, Address(esp, 0));
++}
++
++void InterpreterMacroAssembler::pop(TosState state) {
++  switch (state) {
++    case atos: pop_ptr();                 break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: pop_i();                   break;
++    case ltos: pop_l();                   break;
++    case ftos: pop_f();                   break;
++    case dtos: pop_d();                   break;
++    case vtos: /* nothing to do */        break;
++    default:   ShouldNotReachHere();
++  }
++  verify_oop(FSR, state);
++}
++
++//FSR=V0,SSR=T4
++void InterpreterMacroAssembler::push(TosState state) {
++  verify_oop(FSR, state);
++  switch (state) {
++    case atos: push_ptr();                break;
++    case btos:
++    case ztos:
++    case ctos:
++    case stos:
++    case itos: push_i();                  break;
++    case ltos: push_l();                  break;
++    case ftos: push_f();                  break;
++    case dtos: push_d();                  break;
++    case vtos: /* nothing to do */        break;
++    default  : ShouldNotReachHere();
++  }
++}
++
++// Helpers for swap and dup
++void InterpreterMacroAssembler::load_ptr(int n, Register val) {
++  ldptr(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++}
++
++void InterpreterMacroAssembler::store_ptr(int n, Register val) {
++  stptr(val, Address(esp, Interpreter::expr_offset_in_bytes(n)));
++}
++
++void InterpreterMacroAssembler::prepare_to_jump_from_interpreted() {SCOPEMARK_NAME(prepare_to_jump_from_interpreted, this)
++  // set sender sp
++  movl(rsender, esp);
++  // record last_sp
++  stptr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++}
++
++
++// Jump to from_interpreted entry of a call unless single stepping is possible
++// in this thread in which case we must call the i2i entry
++void InterpreterMacroAssembler::jump_from_interpreted(Register method, Register temp) {SCOPEMARK_NAME(jump_from_interpreted, this)
++  prepare_to_jump_from_interpreted();
++
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    ldbu(AT, Address(rthread, JavaThread::interp_only_mode_offset()));
++    jcc(Assembler::zero, run_compiled_code, AT);
++    jmp(Address(method, Method::interpreter_entry_offset()));
++    bind(run_compiled_code);
++  }
++
++  jmp(Address(method, Method::from_interpreted_offset()));
++}
++
++// The following two routines provide a hook so that an implementation
++// can schedule the dispatch in two parts.  sw64 does not do this.
++void InterpreterMacroAssembler::dispatch_prolog(TosState state, int step) {
++  // Nothing sw64 specific to be done here
++}
++
++void InterpreterMacroAssembler::dispatch_epilog(TosState state, int step) {
++    dispatch_next(state, step);
++}
++
++void InterpreterMacroAssembler::dispatch_base(TosState state,
++                                              address* table,
++                                              bool verifyoop,
++                                              bool generate_poll) {
++  // it's a convention that the bytecode to dispatch to in rnext
++  if (VerifyActivationFrameSize) {
++    Label L;
++    subptr(rfp, esp, rscratch1);
++    int32_t min_frame_size =
++      (frame::link_offset - frame::interpreter_frame_initial_sp_offset) *
++      wordSize;
++    cmpptr(rscratch1, (int32_t)min_frame_size);
++    jcc(Assembler::greaterEqual, L);
++    stop("broken stack frame");
++    bind(L);
++  }
++  if (verifyoop) {
++    verify_oop(FSR, state);
++  }
++  
++  address* const safepoint_table = Interpreter::safept_table(state);
++  Label no_safepoint, dispatch;
++  if (SafepointMechanism::uses_thread_local_poll() && table != safepoint_table && generate_poll) {
++    NOT_PRODUCT(block_comment("Thread-local Safepoint poll"));
++    testb(Address(rthread, Thread::polling_page_offset()), SafepointMechanism::poll_bit());
++    
++    jcc(Assembler::zero, no_safepoint);
++    lea(rscratch1, ExternalAddress((address)safepoint_table));
++    jmp(dispatch);
++  }
++
++  bind(no_safepoint);
++  lea(rscratch1, ExternalAddress((address)table));
++  bind(dispatch);
++  jmp(Address(rscratch1, rnext, Address::times_8)); //set rnext like dispatch_next
++}
++
++void InterpreterMacroAssembler::dispatch_only(TosState state, bool generate_poll) {
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_only_normal(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state));
++}
++
++void InterpreterMacroAssembler::dispatch_only_noverify(TosState state) {
++  dispatch_base(state, Interpreter::normal_table(state), false);
++}
++
++
++void InterpreterMacroAssembler::dispatch_next(TosState state, int step, bool generate_poll) {SCOPEMARK_NAME(InterpreterMacroAssembler::dispatch_next, this)
++  // load next bytecode (load before advancing rbcp to prevent AGI)
++  load_unsigned_byte(rnext, Address(rbcp, step));//use rnext in dispatch_base
++  // advance rbcp
++  incrementl(rbcp, step);
++  dispatch_base(state, Interpreter::dispatch_table(state), true, generate_poll);
++}
++
++void InterpreterMacroAssembler::dispatch_via(TosState state, address* table) {
++  // load current bytecode
++  load_unsigned_byte(rnext, Address(rbcp, 0));
++  dispatch_base(state, table);
++}
++
++void InterpreterMacroAssembler::narrow(Register result) {
++
++  const Register rcx = T9;
++  // Get method->_constMethod->_result_type
++  ldptr(rcx, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
++  ldptr(rcx, Address(rcx, Method::const_offset()));
++  load_unsigned_byte(rcx, Address(rcx, ConstMethod::result_type_offset()));
++
++  Label done, notBool, notByte, notChar;
++
++  // common case first
++  cmpw(rcx, T_INT);
++  jcc(Assembler::equal, done);
++
++  // mask integer result to narrower return type.
++  cmpw(rcx, T_BOOLEAN);
++  jcc(Assembler::notEqual, notBool);
++  andw(result, 0x1, result);
++  jmp(done);
++
++  bind(notBool);
++  cmpw(rcx, T_BYTE);
++  jcc(Assembler::notEqual, notByte);
++  sextb(result, result); //TODO jzy 64-bits? 32-bits?
++  jmp(done);
++
++  bind(notByte);
++  cmpw(rcx, T_CHAR);
++  jcc(Assembler::notEqual, notChar);
++  zapnot(result, 0x3, result);
++  jmp(done);
++
++  bind(notChar);
++  // cmpw(rcx, T_SHORT);  // all that's left
++  // jcc(Assembler::notEqual, done);
++  sexth(result, result);
++
++  // Nothing to do for T_INT
++  bind(done);
++}
++
++// remove activation
++//
++// Unlock the receiver if this is a synchronized method.
++// Unlock any Java monitors from syncronized blocks.
++// Remove the activation from the stack.
++//
++// If there are locked Java monitors
++//    If throw_monitor_exception
++//       throws IllegalMonitorStateException
++//    Else if install_monitor_exception
++//       installs IllegalMonitorStateException
++//    Else
++//       no error processing
++void InterpreterMacroAssembler::remove_activation(
++        TosState state,
++        Register ret_addr,
++        bool throw_monitor_exception,
++        bool install_monitor_exception,
++  bool notify_jvmdi) {SCOPEMARK_NAME(remove_activation, this)
++  // Note: Registers V0, T4 and f0, f1 may be in use for the
++  // result check if synchronized method
++  Label unlocked, unlock, no_unlock;
++  
++  const Register rbx = T2;
++  const Register rcx = T3;
++  const Register robj = c_rarg1;
++  const Register rmon = c_rarg1;
++
++  // get the value of _do_not_unlock_if_synchronized into rdx
++  const Address do_not_unlock_if_synchronized(rthread,
++    in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  ldbu(rbx, do_not_unlock_if_synchronized);
++  stb(R0, do_not_unlock_if_synchronized); // reset the flag
++
++  // get method access flags
++  ldptr(rcx, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
++  ldw(rcx, Address(rcx, Method::access_flags_offset()));
++  testw(rcx, JVM_ACC_SYNCHRONIZED);
++  jcc(Assembler::zero, unlocked);
++
++  // Don't unlock anything if the _do_not_unlock_if_synchronized flag
++  // is set.
++  jcc(Assembler::notZero, no_unlock, rbx);
++  
++  // unlock monitor
++  push(state); // save result
++
++  // BasicObjectLock will be first in list, since this is a
++  // synchronized method. However, need to check that the object has
++  // not been unlocked by an explicit monitorexit bytecode.
++  const Address monitor(rfp, frame::interpreter_frame_initial_sp_offset *
++                        wordSize - (int) sizeof(BasicObjectLock));
++  // We use c_rarg1 so that if we go slow path it will be the correct
++  // register for unlock_object to pass to VM directly
++  lea(robj, monitor); // address of first monitor
++  
++  ldptr(FSR, Address(robj, BasicObjectLock::obj_offset_in_bytes()));
++  jcc(Assembler::notZero, unlock, FSR);
++  
++  pop(state);
++  if (throw_monitor_exception) {
++    // Entry already unlocked, need to throw exception
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::throw_illegal_monitor_state_exception));
++    should_not_reach_here("throw_illegal_monitor_state_exception");
++  } else {
++    // Monitor already unlocked during a stack unroll. If requested,
++    // install an illegal_monitor_state_exception.  Continue with
++    // stack unrolling.
++    if (install_monitor_exception) {
++      call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                      InterpreterRuntime::new_illegal_monitor_state_exception));
++    }
++    jmp(unlocked);
++  }
++
++  bind(unlock);
++  unlock_object(robj);
++  pop(state);
++
++  // Check that for block-structured locking (i.e., that all locked
++  // objects has been unlocked)
++  bind(unlocked);
++
++  // FSR, rdx: Might contain return value
++
++  // Check that all monitors are unlocked
++  {
++    Label loop, exception, entry, restart;
++    const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++    const Address monitor_block_top(
++        rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    const Address monitor_block_bot(
++        rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
++
++    bind(restart);
++    // We use c_rarg1 so that if we go slow path it will be the correct
++    // register for unlock_object to pass to VM directly
++    ldptr(rmon, monitor_block_top); // points to current entry, starting
++                                  // with top-most entry
++    lea(rbx, monitor_block_bot);  // points to word before bottom of
++                                  // monitor block
++    jmp(entry);
++
++    // Entry already locked, need to throw exception
++    bind(exception);
++
++    if (throw_monitor_exception) {
++      // Throw exception
++      MacroAssembler::call_VM(noreg,
++                              CAST_FROM_FN_PTR(address, InterpreterRuntime::
++                                   throw_illegal_monitor_state_exception));
++      should_not_reach_here("892 throw_illegal_monitor_state_exception");
++    } else {
++      // Stack unrolling. Unlock object and install illegal_monitor_exception.
++      // Unlock does not block, so don't have to worry about the frame.
++      // We don't have to preserve c_rarg1 since we are going to throw an exception.
++
++      push(state);
++      movl(robj, rmon);   // nop if robj and rmon are the same
++      unlock_object(robj);
++      pop(state);
++
++      if (install_monitor_exception) {
++        call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                        InterpreterRuntime::
++                                        new_illegal_monitor_state_exception));
++      }
++
++      jmp(restart);
++    }
++
++    bind(loop);
++    // check if current entry is used
++    ldptr(rcc, Address(rmon, BasicObjectLock::obj_offset_in_bytes()));
++    jcc(Assembler::notZero, exception);
++
++    addptr(rmon, entry_size, rmon); // otherwise advance to next entry
++    bind(entry);
++    cmpptr(rmon, rbx); // check if bottom reached
++    jcc(Assembler::notEqual, loop); // if not at bottom then check this entry
++  }
++
++  bind(no_unlock);
++
++  // jvmti support
++  if (notify_jvmdi) {
++    notify_method_exit(state, NotifyJVMTI);    // preserve TOSCA
++  } else {
++    notify_method_exit(state, SkipNotifyJVMTI); // preserve TOSCA
++  }
++
++  // remove activation
++  // get sender sp
++  ldptr(rbx,
++         Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize));
++  if (StackReservedPages > 0) {
++    // testing if reserved zone needs to be re-enabled
++    Label no_reserved_zone_enabling;
++
++    ldwu(AT, Address(rthread, JavaThread::stack_guard_state_offset()));
++    cmpw(AT, JavaThread::stack_guard_enabled);
++    jcc(Assembler::equal, no_reserved_zone_enabling);
++
++    ldptr(AT, Address(rthread, JavaThread::reserved_stack_activation_offset()));
++    cmpptr(rbx, AT);
++    jcc(Assembler::lessEqual, no_reserved_zone_enabling);
++
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), rthread);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_delayed_StackOverflowError));
++    should_not_reach_here("throw_delayed_StackOverflowError");
++
++    bind(no_reserved_zone_enabling);
++  }
++  leave();                           // remove frame anchor
++  movl(ret_addr, RA);                     // get return address TODO:jzy
++  movl(esp, rbx);                     // set sp to sender sp
++}
++
++void InterpreterMacroAssembler::get_method_counters(Register method,
++                                                    Register mcs, Label& skip) {
++  Label has_counters;
++  ldptr(mcs, Address(method, Method::method_counters_offset()));
++  testptr(mcs, mcs);
++  jcc(Assembler::notZero, has_counters);
++  call_VM(noreg, CAST_FROM_FN_PTR(address,
++          InterpreterRuntime::build_method_counters), method);
++  ldptr(mcs, Address(method,Method::method_counters_offset()));
++  testptr(mcs, mcs);
++  jcc(Assembler::zero, skip); // No MethodCounters allocated, OutOfMemory
++  bind(has_counters);
++}
++
++// Lock object
++//
++// Args:
++//      rdx, c_rarg1: BasicObjectLock to be used for locking
++//
++// Kills:
++//      rax, rbx
++void InterpreterMacroAssembler::lock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg1,
++         "The argument is only for looks. It must be c_rarg1");
++
++  const Register rax = T2;
++  const Register rbx = T1;
++  
++  if (UseHeavyMonitors) {
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
++  } else {
++    Label done;
++
++    const Register swap_reg = rax; // Must use rax for cmpxchg instruction
++    const Register tmp_reg = rbx; // Will be passed to biased_locking_enter to avoid a
++                                  // problematic case where tmp_reg = no_reg.
++    const Register obj_reg = c_rarg3; // Will contain the oop
++
++    const int obj_offset = BasicObjectLock::obj_offset_in_bytes();
++    const int lock_offset = BasicObjectLock::lock_offset_in_bytes ();
++    const int mark_offset = lock_offset +
++                            BasicLock::displaced_header_offset_in_bytes();
++
++    Label slow_case;
++
++    // Load object pointer into obj_reg
++    ldptr(obj_reg, Address(lock_reg, obj_offset));
++
++    if (UseBiasedLocking) {
++      biased_locking_enter(lock_reg, obj_reg, swap_reg, tmp_reg, false, done, &slow_case);
++    }
++
++    // Load immediate 1 into swap_reg %rax
++    ldi(swap_reg, (int32_t)1, R0);
++
++    // Load (object->mark() | 1) into swap_reg %rax
++    ldptr(AT, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    orptr(swap_reg, AT, swap_reg);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    stptr(swap_reg, Address(lock_reg, mark_offset));
++
++    assert(lock_offset == 0,
++           "displaced header must be first word in BasicObjectLock");
++    
++    cmpxchg(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()), swap_reg);//<TODO:need run check jzy>
++
++    if (PrintBiasedLockingStatistics) {
++      Label L;
++      jcc(Assembler::failed, L, AT);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, tmp_reg, rscratch1);
++      jmp(done);
++      bind(L);
++    } else {
++      jcc(Assembler::success, done, AT);
++    }
++
++    const int zero_bits = 7;// yj todo: 7 or 3?? 7 means lsb 3 bits must be same, while 3 mean 2 bits
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & zero_bits) == 0, and
++    //  2) esp <= mark < mark + os::pagesize()
++    //
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - esp) & (zero_bits - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant bits clear.
++    // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
++    subptr(swap_reg, esp, swap_reg);
++    andptr(swap_reg, zero_bits - os::vm_page_size(), swap_reg);
++    
++    // Save the test result, for recursive case, the result is zero
++    stptr(swap_reg, Address(lock_reg, mark_offset));
++    
++    if (PrintBiasedLockingStatistics) {
++      Label L;
++      jcc(Assembler::notZero, L, swap_reg);
++      atomic_inc32((address)BiasedLocking::fast_path_entry_count_addr(), 1, tmp_reg, rscratch1);
++      jmp(done);
++      bind(L);
++    } else {
++      jcc(Assembler::zero, done, swap_reg);
++    }
++    
++    bind(slow_case);
++
++    // Call the runtime routine for slow case
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorenter),
++            lock_reg);
++
++    bind(done);
++  }
++}
++
++
++// Unlocks an object. Used in monitorexit bytecode and
++// remove_activation.  Throws an IllegalMonitorException if object is
++// not locked by current thread.
++//
++// Args:
++//      c_rarg1: BasicObjectLock for lock
++//
++// Kills:
++//      rax
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ... (param regs)
++//      rscratch1, rscratch2 (scratch regs)
++void InterpreterMacroAssembler::unlock_object(Register lock_reg) {
++  assert(lock_reg == c_rarg1,
++         "The argument is only for looks. It must be c_rarg1");
++
++  if (UseHeavyMonitors) {
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++  } else {
++    Label done;
++
++    const Register swap_reg   = T2;
++    const Register header_reg = c_rarg2;  // Will contain the old oopMark
++    const Register obj_reg    = c_rarg3;  // Will contain the oop
++
++    save_bcp(); // Save in case of exception
++
++    // Convert from BasicObjectLock structure to object and BasicLock structure
++    // Store the BasicLock address into %T2
++    lea(swap_reg, Address(lock_reg, BasicObjectLock::lock_offset_in_bytes()));
++
++    // Load oop into obj_reg(%c_rarg3)
++    ldptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++
++    // Free entry
++    stptr(R0, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes()));
++
++    if (UseBiasedLocking) {
++      biased_locking_exit(obj_reg, header_reg, done);
++    }
++
++    // Load the old header from BasicLock structure
++    ldptr(header_reg, Address(swap_reg,
++                               BasicLock::displaced_header_offset_in_bytes()));
++
++    // Test for recursion
++    testptr(header_reg, header_reg);
++
++    // zero for recursive case
++    jcc(Assembler::zero, done);
++
++    // Atomic swap back the old header
++    cmpxchg(header_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()), swap_reg);
++
++    // zero for recursive case
++    jcc(Assembler::success, done);
++
++    // Call the runtime routine for slow case.
++    stptr(obj_reg, Address(lock_reg, BasicObjectLock::obj_offset_in_bytes())); // restore obj
++    
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::monitorexit),
++            lock_reg);
++
++    bind(done);
++
++    restore_bcp();
++  }
++}
++
++void InterpreterMacroAssembler::test_method_data_pointer(Register mdp,
++                                                         Label& zero_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  ldptr(mdp, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
++  jcc(Assembler::zero, zero_continue, mdp);
++}
++
++// Set the method data pointer for the current bcp.
++void InterpreterMacroAssembler::set_method_data_pointer_for_bcp() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Label set_mdp;
++  const Register rax = V0;//TODO:why not save? jzy
++  const Register rbx = T9;
++//   V0 and T0 will be used as two temporary registers.
++  stl(rax, (-1) * wordSize, esp);
++  stl(rbx, (-2) * wordSize, esp);
++  subl(esp, 2 * wordSize, esp);
++
++  get_method(rbx);
++  // Test MDO to avoid the call if it is NULL.
++  ldptr(rax, Address(rbx, in_bytes(Method::method_data_offset())));
++  testptr(rax, rax);
++  jcc(Assembler::zero, set_mdp);
++  // rbx: method
++  // _bcp_register: bcp
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::bcp_to_di), rbx, _bcp_register);
++  // rax: mdi
++  // mdo is guaranteed to be non-zero here, we checked for it before the call.
++  get_method(rbx);
++  ldptr(rbx, Address(rbx, in_bytes(Method::method_data_offset())));
++  addptr(rbx, in_bytes(MethodData::data_offset()), rbx);
++  addptr(rax, rbx, rax);
++  bind(set_mdp);
++  stptr(rax, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize)); //TODO check? lsp
++  addl(esp, 2 * wordSize, esp);
++  ldl(rax, (-1) * wordSize, esp);
++  ldl(rbx, (-2) * wordSize, esp);
++}
++//TODO:why not save c_rarg0 c_rarg1
++void InterpreterMacroAssembler::verify_method_data_pointer() {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++#ifdef ASSERT
++  Label verify_continue;
++  push(rax);
++  push(rbx);
++  Register arg3_reg = c_rarg3;
++  Register arg2_reg = c_rarg2;
++  push(arg3_reg);
++  push(arg2_reg);
++  test_method_data_pointer(arg3_reg, verify_continue); // If mdp is zero, continue
++  get_method(rbx);
++
++  // If the mdp is valid, it will point to a DataLayout header which is
++  // consistent with the bcp.  The converse is highly probable also.
++  load_unsigned_short(arg2_reg,
++                      Address(arg3_reg, in_bytes(DataLayout::bci_offset())));
++  addptr(arg2_reg, Address(rbx, Method::const_offset()));
++  lea(arg2_reg, Address(arg2_reg, ConstMethod::codes_offset()));
++  cmpptr(arg2_reg, _bcp_register);
++  jcc(Assembler::equal, verify_continue);
++  // rbx: method
++  // _bcp_register: bcp
++  // c_rarg3: mdp
++  call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::verify_mdp),
++               rbx, _bcp_register, arg3_reg);
++  bind(verify_continue);
++  pop(arg2_reg);
++  pop(arg3_reg);
++  pop(rbx);
++  pop(rax);
++#endif // ASSERT
++}
++
++
++void InterpreterMacroAssembler::set_mdp_data_at(Register mdp_in,
++                                                int constant,
++                                                Register value) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address data(mdp_in, constant);
++  stptr(value, data);
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      int constant,
++                                                      bool decrement) {
++  // Counter address
++  Address data(mdp_in, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Address data,
++                                                      bool decrement) {
++  //TODO check lsp???
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  // %%% this does 64bit counters at best it is wasting space
++  // at worst it is a rare bug when counters overflow
++
++  if (decrement) {
++    // Decrement the register.
++    ldptr(rscratch4, data);
++    ldi(rscratch4, (int32_t) -DataLayout::counter_increment, rscratch4);
++    // If the decrement causes the counter to overflow, stay negative
++    Label L;
++    jcc(Assembler::greaterEqual, L, rscratch4);
++    stptr(rscratch4, data);
++    bind(L);
++  } else {
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    ldptr(rscratch4, data);
++    // Increment the register.
++    ldi(rscratch1, DataLayout::counter_increment, rscratch4);
++    // If the increment causes the counter to overflow, pull back by 1.
++    Label L;
++    cmpult(rscratch1, rscratch4, rcc);
++    bne_l(rcc, L);
++    stptr(rscratch1, data);
++    bind(L);
++  }
++}
++
++
++void InterpreterMacroAssembler::increment_mdp_data_at(Register mdp_in,
++                                                      Register reg,
++                                                      int constant,
++                                                      bool decrement) {SCOPEMARK_NAME(InterpreterMacroAssembler::increment_mdp_data_at, this)
++  assert_different_registers(mdp_in, reg, rscratch1, rscratch4);                                                        
++  Address data(mdp_in, reg, Address::times_1, constant);
++
++  increment_mdp_data_at(data, decrement);
++}
++
++void InterpreterMacroAssembler::set_mdp_flag_at(Register mdp_in,
++                                                int flag_byte_constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  int header_offset = in_bytes(DataLayout::flags_offset());
++  int header_bits = flag_byte_constant;
++  // Set the flag
++  ldbu(rscratch4, Address(mdp_in, header_offset));
++  bis(rscratch4, header_bits, rscratch4);
++  stb(rscratch4, Address(mdp_in, header_offset));
++}
++
++
++
++void InterpreterMacroAssembler::test_mdp_data_at(Register mdp_in,
++                                                 int offset,
++                                                 Register value,
++                                                 Register test_value_out,
++                                                 Label& not_equal_continue) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  if (test_value_out == noreg) {
++    cmpptr(value, Address(mdp_in, offset));
++  } else {
++    // Put the test value into a register, so caller can use it:
++    ldptr(test_value_out, Address(mdp_in, offset));
++    cmpptr(test_value_out, value);
++  }
++  jcc(Assembler::notEqual, not_equal_continue);
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address disp_address(mdp_in, offset_of_disp);
++  addptr(mdp_in, disp_address);
++  stptr(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_offset(Register mdp_in,
++                                                     Register reg,
++                                                     int offset_of_disp) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  Address disp_address(mdp_in, reg, Address::times_1, offset_of_disp);
++  addptr(mdp_in, disp_address);
++  stptr(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_by_constant(Register mdp_in,
++                                                       int constant) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  addptr(mdp_in, constant, mdp_in);
++  stptr(mdp_in, Address(rfp, frame::interpreter_frame_mdp_offset * wordSize));
++}
++
++
++void InterpreterMacroAssembler::update_mdp_for_ret(Register return_bci) {
++  assert(ProfileInterpreter, "must be profiling interpreter");
++  push(return_bci); // save/restore across call_VM
++  call_VM(noreg,
++          CAST_FROM_FN_PTR(address, InterpreterRuntime::update_mdp_for_ret),
++          return_bci);
++  pop(return_bci);
++}
++
++
++void InterpreterMacroAssembler::profile_taken_branch(Register mdp,
++                                                     Register bumped_count) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    // Otherwise, assign to mdp
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the taken count.
++    // We inline increment_mdp_data_at to return bumped_count in a register
++    //increment_mdp_data_at(mdp, in_bytes(JumpData::taken_offset()));
++    Address data(mdp, in_bytes(JumpData::taken_offset()));
++    ldptr(rscratch4, data);
++    assert(DataLayout::counter_increment == 1,
++           "flow-free idiom only works with 1");
++    // yj: we learn aarch64 here to test overflow
++    Label L; 
++    ldi(bumped_count, DataLayout::counter_increment, rscratch4);
++    cmpult(bumped_count, rscratch4, rcc);
++    bne_l(rcc, L);
++    stptr(bumped_count, data);
++    bind(L);
++//    stptr(bumped_count, in_bytes(JumpData::taken_offset()), mdp); // Store back out
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_offset(mdp, in_bytes(JumpData::displacement_offset()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_not_taken_branch(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are taking a branch.  Increment the not taken count.
++    increment_mdp_data_at(mdp, in_bytes(BranchData::not_taken_offset()));
++
++    // The method data pointer needs to be updated to correspond to
++    // the next bytecode
++    update_mdp_by_constant(mdp, in_bytes(BranchData::branch_data_size()));
++    bind(profile_continue);
++  }
++}
++
++void InterpreterMacroAssembler::profile_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp, in_bytes(CounterData::counter_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_final_call(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // We are making a call.  Increment the count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    // The method data pointer needs to be updated to reflect the new target.
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_virtual_call(Register receiver,
++                                                     Register mdp,
++                                                     Register reg2,
++                                                     bool receiver_can_be_null) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label skip_receiver_profile;
++    if (receiver_can_be_null) {
++      Label not_null;
++      jcc(Assembler::notZero, not_null, receiver);
++      // We are making a call.  Increment the count for null receiver.
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++      jmp(skip_receiver_profile);
++      bind(not_null);
++    }
++
++    // Record the receiver type.
++    record_klass_in_profile(receiver, mdp, reg2, true);
++    bind(skip_receiver_profile);
++
++    // The method data pointer needs to be updated to reflect the new target.
++#if INCLUDE_JVMCI
++    if (MethodProfileWidth == 0) {
++      update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    }
++#else // INCLUDE_JVMCI
++    update_mdp_by_constant(mdp,
++                           in_bytes(VirtualCallData::
++                                    virtual_call_data_size()));
++#endif // INCLUDE_JVMCI
++    bind(profile_continue);
++  }
++}
++
++#if INCLUDE_JVMCI
++void InterpreterMacroAssembler::profile_called_method(Register method, Register mdp, Register reg2) {
++  assert_different_registers(method, mdp, reg2);
++  if (ProfileInterpreter && MethodProfileWidth > 0) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    Label done;
++    record_item_in_profile_helper(method, mdp, reg2, 0, done, MethodProfileWidth,
++      &VirtualCallData::method_offset, &VirtualCallData::method_count_offset, in_bytes(VirtualCallData::nonprofiled_receiver_count_offset()));
++    bind(done);
++
++    update_mdp_by_constant(mdp, in_bytes(VirtualCallData::virtual_call_data_size()));
++    bind(profile_continue);
++  }
++}
++#endif // INCLUDE_JVMCI
++
++// This routine creates a state machine for updating the multi-row
++// type profile at a virtual call site (or other type-sensitive bytecode).
++// The machine visits each row (of receiver/count) until the receiver type
++// is found, or until it runs out of rows.  At the same time, it remembers
++// the location of the first empty row.  (An empty row records null for its
++// receiver, and can be allocated for a newly-observed receiver type.)
++// Because there are two degrees of freedom in the state, a simple linear
++// search will not work; it must be a decision tree.  Hence this helper
++// function is recursive, to generate the required tree structured code.
++// It's the interpreter, so we are trading off code space for speed.
++// See below for example code.
++void InterpreterMacroAssembler::record_klass_in_profile_helper(
++                                        Register receiver, Register mdp,
++                                        Register reg2, int start_row,
++                                        Label& done, bool is_virtual_call) {
++  if (TypeProfileWidth == 0) {
++    if (is_virtual_call) {
++      increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      increment_mdp_data_at(mdp, in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset()));
++    }
++#endif // INCLUDE_JVMCI
++  } else {
++    int non_profiled_offset = -1;
++    if (is_virtual_call) {
++      non_profiled_offset = in_bytes(CounterData::count_offset());
++    }
++#if INCLUDE_JVMCI
++    else if (EnableJVMCI) {
++      non_profiled_offset = in_bytes(ReceiverTypeData::nonprofiled_receiver_count_offset());
++    }
++#endif // INCLUDE_JVMCI
++
++    record_item_in_profile_helper(receiver, mdp, reg2, 0, done, TypeProfileWidth,
++        &VirtualCallData::receiver_offset, &VirtualCallData::receiver_count_offset, non_profiled_offset);
++  }
++}
++
++void InterpreterMacroAssembler::record_item_in_profile_helper(Register item, Register mdp,
++                                        Register reg2, int start_row, Label& done, int total_rows,
++                                        OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                        int non_profiled_offset) {
++  int last_row = total_rows - 1;
++  assert(start_row <= last_row, "must be work left to do");
++  // Test this row for both the item and for null.
++  // Take any of three different outcomes:
++  //   1. found item => increment count and goto done
++  //   2. found null => keep looking for case 1, maybe allocate this cell
++  //   3. found something else => keep looking for cases 1 and 2
++  // Case 3 is handled by a recursive call.
++  for (int row = start_row; row <= last_row; row++) {
++    Label next_test;
++    bool test_for_null_also = (row == start_row);
++
++    // See if the item is item[n].
++    int item_offset = in_bytes(item_offset_fn(row));
++    test_mdp_data_at(mdp, item_offset, item,
++                     (test_for_null_also ? reg2 : noreg),
++                     next_test);
++    // (Reg2 now contains the item from the CallData.)
++
++    // The item is item[n].  Increment count[n].
++    int count_offset = in_bytes(item_count_offset_fn(row));
++    increment_mdp_data_at(mdp, count_offset);
++    jmp(done);
++    bind(next_test);
++
++    if (test_for_null_also) {
++      Label found_null;
++      // Failed the equality check on item[n]...  Test for null.
++      testptr(reg2, reg2);
++      if (start_row == last_row) {
++        // The only thing left to do is handle the null case.
++        if (non_profiled_offset >= 0) {
++          jcc(Assembler::zero, found_null);
++          // Item did not match any saved item and there is no empty row for it.
++          // Increment total counter to indicate polymorphic case.
++          increment_mdp_data_at(mdp, non_profiled_offset);
++          jmp(done);
++          bind(found_null);
++        } else {
++          jcc(Assembler::notZero, done);
++        }
++        break;
++      }
++      // Since null is rare, make it be the branch-taken case.
++      jcc(Assembler::zero, found_null);
++
++      // Put all the "Case 3" tests here.
++      record_item_in_profile_helper(item, mdp, reg2, start_row + 1, done, total_rows,
++        item_offset_fn, item_count_offset_fn, non_profiled_offset);
++
++      // Found a null.  Keep searching for a matching item,
++      // but remember that this is an empty (unused) slot.
++      bind(found_null);
++    }
++  }
++
++  // In the fall-through case, we found no matching item, but we
++  // observed the item[start_row] is NULL.
++
++  // Fill in the item field and increment the count.
++  int item_offset = in_bytes(item_offset_fn(start_row));
++  set_mdp_data_at(mdp, item_offset, item);
++  int count_offset = in_bytes(item_count_offset_fn(start_row));
++  movw(reg2, DataLayout::counter_increment);
++  set_mdp_data_at(mdp, count_offset, reg2);
++  if (start_row > 0) {
++    jmp(done);
++  }
++}
++
++// Example state machine code for three profile rows:
++//   // main copy of decision tree, rooted at row[1]
++//   if (row[0].rec == rec) { row[0].incr(); goto done; }
++//   if (row[0].rec != NULL) {
++//     // inner copy of decision tree, rooted at row[1]
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[1].rec != NULL) {
++//       // degenerate decision tree, rooted at row[2]
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       if (row[2].rec != NULL) { count.incr(); goto done; } // overflow
++//       row[2].init(rec); goto done;
++//     } else {
++//       // remember row[1] is empty
++//       if (row[2].rec == rec) { row[2].incr(); goto done; }
++//       row[1].init(rec); goto done;
++//     }
++//   } else {
++//     // remember row[0] is empty
++//     if (row[1].rec == rec) { row[1].incr(); goto done; }
++//     if (row[2].rec == rec) { row[2].incr(); goto done; }
++//     row[0].init(rec); goto done;
++//   }
++//   done:
++
++void InterpreterMacroAssembler::record_klass_in_profile(Register receiver,
++                                                        Register mdp, Register reg2,
++                                                        bool is_virtual_call) {
++  assert(ProfileInterpreter, "must be profiling");
++  Label done;
++
++  record_klass_in_profile_helper(receiver, mdp, reg2, 0, done, is_virtual_call);
++
++  bind (done);
++}
++
++void InterpreterMacroAssembler::profile_ret(Register return_bci,
++                                            Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++    uint row;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the total ret count.
++    increment_mdp_data_at(mdp, in_bytes(CounterData::count_offset()));
++
++    for (row = 0; row < RetData::row_limit(); row++) {
++      Label next_test;
++
++      // See if return_bci is equal to bci[n]:
++      test_mdp_data_at(mdp,
++                       in_bytes(RetData::bci_offset(row)),
++                       return_bci, noreg,
++                       next_test);
++
++      // return_bci is equal to bci[n].  Increment the count.
++      increment_mdp_data_at(mdp, in_bytes(RetData::bci_count_offset(row)));
++
++      // The method data pointer needs to be updated to reflect the new target.
++      update_mdp_by_offset(mdp,
++                           in_bytes(RetData::bci_displacement_offset(row)));
++      jmp(profile_continue);
++      bind(next_test);
++    }
++
++    update_mdp_for_ret(return_bci);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_null_seen(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    set_mdp_flag_at(mdp, BitData::null_seen_byte_constant());
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck_failed(Register mdp) {
++  if (ProfileInterpreter && TypeProfileCasts) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    int count_offset = in_bytes(CounterData::count_offset());
++    // Back up the address, since we have already bumped the mdp.
++    count_offset -= in_bytes(VirtualCallData::virtual_call_data_size());
++
++    // *Decrement* the counter.  We expect to see zero or small negatives.
++    increment_mdp_data_at(mdp, count_offset, true);
++
++    bind (profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_typecheck(Register mdp, Register klass, Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // The method data pointer needs to be updated.
++    int mdp_delta = in_bytes(BitData::bit_data_size());
++    if (TypeProfileCasts) {
++      mdp_delta = in_bytes(VirtualCallData::virtual_call_data_size());
++
++      // Record the object type.
++      record_klass_in_profile(klass, mdp, reg2, false);
++    }
++    update_mdp_by_constant(mdp, mdp_delta);
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_default(Register mdp) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Update the default case count
++    increment_mdp_data_at(mdp,
++                          in_bytes(MultiBranchData::default_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         in_bytes(MultiBranchData::
++                                  default_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++void InterpreterMacroAssembler::profile_switch_case(Register index,
++                                                    Register mdp,
++                                                    Register reg2) {
++  if (ProfileInterpreter) {
++    Label profile_continue;
++
++    // If no method data exists, go to profile_continue.
++    test_method_data_pointer(mdp, profile_continue);
++
++    // Build the base (index * per_case_size_in_bytes()) +
++    // case_array_offset_in_bytes()
++    movw(reg2, in_bytes(MultiBranchData::per_case_size()));
++    mull(index, reg2, index);
++    addptr(index, in_bytes(MultiBranchData::case_array_offset()), index);
++
++    // Update the case count
++    increment_mdp_data_at(mdp,
++                          index,
++                          in_bytes(MultiBranchData::relative_count_offset()));
++
++    // The method data pointer needs to be updated.
++    update_mdp_by_offset(mdp,
++                         index,
++                         in_bytes(MultiBranchData::
++                                  relative_displacement_offset()));
++
++    bind(profile_continue);
++  }
++}
++
++
++
++void InterpreterMacroAssembler::verify_oop(Register reg, TosState state) {
++  if (state == atos) {
++    MacroAssembler::verify_oop(reg);
++  }
++}
++
++void InterpreterMacroAssembler::verify_FPU(int stack_depth, TosState state) { ; }
++
++// Jump if ((*counter_addr += increment) & mask) satisfies the condition.
++void InterpreterMacroAssembler::increment_mask_and_jump(Address counter_addr,
++                                                        int increment, Address mask,
++                                                        Register scratch, bool preloaded, 
++                                                        Condition cond, Label* where) {
++  assert_different_registers(scratch, rcc);
++  if (!preloaded) {
++    ldwu(scratch, counter_addr);
++  }
++  incrementw(scratch, increment);
++  stw(scratch, counter_addr);
++  ldwu(rcc, mask);
++  andw(scratch, rcc, scratch);
++  if (where != NULL) {
++    jcc(cond, *where, scratch);
++  }
++}
++
++void InterpreterMacroAssembler::notify_method_entry() {SCOPEMARK_NAME(notify_method_entry, this)
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register rdx = rscratch4;
++  Register rarg = c_rarg1;
++  if (JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    get_thread(rthread);
++    ldw(rdx, Address(rthread, JavaThread::interp_only_mode_offset()));
++    jcc(Assembler::zero, L, rdx);
++    call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::post_method_entry));
++    bind(L);
++  }
++
++  {
++    SkipIfEqual skip_if(this, &DTraceMethodProbes, 0);
++    get_method(rarg);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++                 rthread, rarg);
++  }
++
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    NOT_LP64(get_thread(rthread);)
++    get_method(rarg);
++    call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      rthread, rarg);
++  }
++}
++
++
++void InterpreterMacroAssembler::notify_method_exit(
++    TosState state, NotifyMethodExitMode mode) {SCOPEMARK_NAME(notify_method_exit, this)
++  // Whenever JVMTI is interp_only_mode, method entry/exit events are sent to
++  // track stack depth.  If it is possible to enter interp_only_mode we add
++  // the code to check if the event should be sent.
++  Register rdx = rscratch4;
++  Register rarg = c_rarg1;
++  if (mode == NotifyJVMTI && JvmtiExport::can_post_interpreter_events()) {
++    Label L;
++    // Note: frame::interpreter_frame_result has a dependency on how the
++    // method result is saved across the call to post_method_exit. If this
++    // is changed then the interpreter_frame_result implementation will
++    // need to be updated too.
++
++    // template interpreter will leave the result on the top of the stack.
++    push(state);
++    ldw(rdx, Address(rthread, JavaThread::interp_only_mode_offset()));
++    jcc(Assembler::zero, L, rdx);
++    call_VM(noreg,
++            CAST_FROM_FN_PTR(address, InterpreterRuntime::post_method_exit));
++    bind(L);
++    pop(state);
++  }
++
++  {
++    SkipIfEqual skip(this, &DTraceMethodProbes, false);
++    push(state);
++    get_thread(rthread);
++    get_method(rarg);
++    call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++                 rthread, rarg);
++    pop(state);
++  }
++}
+diff --git a/src/hotspot/cpu/sw64/interp_masm_sw64.hpp b/src/hotspot/cpu/sw64/interp_masm_sw64.hpp
+new file mode 100644
+index 0000000000..f6bdd0c071
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/interp_masm_sw64.hpp
+@@ -0,0 +1,293 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2015, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_INTERP_MASM_SW64_64_HPP
++#define CPU_SW64_VM_INTERP_MASM_SW64_64_HPP
++
++#include "asm/macroAssembler.hpp"
++#include "interpreter/invocationCounter.hpp"
++#include "runtime/frame.hpp"
++
++// This file specializes the assember with interpreter-specific macros
++
++typedef ByteSize (*OffsetFunction)(uint);
++
++class InterpreterMacroAssembler: public MacroAssembler {
++ protected:
++
++  // Interpreter specific version of call_VM_base
++  using MacroAssembler::call_VM_leaf_base;
++  
++  // Interpreter specific version of call_VM_base
++  virtual void call_VM_leaf_base(address entry_point,
++                                 int number_of_arguments);
++
++  virtual void call_VM_base(Register oop_result,
++                            Register java_thread,
++                            Register last_java_sp,
++                            address  entry_point,
++                            int number_of_arguments,
++                            bool check_exceptions);
++
++  // base routine for all dispatches
++  void dispatch_base(TosState state, address* table, bool verifyoop = true, bool generate_poll = false);
++
++ public:
++  InterpreterMacroAssembler(CodeBuffer* code) : MacroAssembler(code), 
++    _locals_register(rlocals),
++    _bcp_register(rbcp) {}
++
++  void jump_to_entry(address entry);
++  
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  void load_earlyret_value(TosState state);
++
++  // Interpreter-specific registers
++  void save_bcp() {assert(_bcp_register == rbcp, "_bcp_register should rbcp");
++    stptr(_bcp_register, Address(rfp, frame::interpreter_frame_bcp_offset * wordSize));
++  }
++
++  void restore_bcp() {assert(_bcp_register == rbcp, "_bcp_register should rbcp");
++    ldptr(_bcp_register, Address(rfp, frame::interpreter_frame_bcp_offset * wordSize));
++  }
++
++  void restore_locals() {assert(_locals_register == rlocals, "_locals_register should rlocals");
++    ldptr(_locals_register, Address(rfp, frame::interpreter_frame_locals_offset * wordSize));
++  }
++
++  // Helpers for runtime call arguments/results
++  void get_method(Register reg) {
++    ldptr(reg, Address(rfp, frame::interpreter_frame_method_offset * wordSize));
++  }
++
++  void get_const(Register reg) {
++    get_method(reg);
++    ldptr(reg, Address(reg, Method::const_offset()));
++  }
++
++  void get_constant_pool(Register reg) {
++    get_const(reg);
++    ldptr(reg, Address(reg, ConstMethod::constants_offset()));
++  }
++
++  void get_constant_pool_cache(Register reg) {
++    get_constant_pool(reg);
++    ldptr(reg, Address(reg, ConstantPool::cache_offset_in_bytes()));
++  }
++
++  void get_cpool_and_tags(Register cpool, Register tags) {
++    get_constant_pool(cpool);
++    ldptr(tags, Address(cpool, ConstantPool::tags_offset_in_bytes()));
++  }
++
++  void get_unsigned_2_byte_index_at_bcp(Register reg, int bcp_offset);
++  void get_cache_and_index_at_bcp(Register cache,
++                                  Register index,
++                                  int bcp_offset,
++                                  size_t index_size = sizeof(u2));
++  void get_cache_and_index_and_bytecode_at_bcp(Register cache,
++                                               Register index,
++                                               Register bytecode,
++                                               int byte_no,
++                                               int bcp_offset,
++                                               size_t index_size = sizeof(u2));
++  void get_cache_entry_pointer_at_bcp(Register cache,
++                                      Register tmp,
++                                      int bcp_offset,
++                                      size_t index_size = sizeof(u2));
++  void get_cache_index_at_bcp(Register index,
++                              int bcp_offset,
++                              size_t index_size = sizeof(u2));
++
++  // load cpool->resolved_references(index);
++  void load_resolved_reference_at_index(Register result, Register index, Register tmp = rscratch2);
++
++  // load cpool->resolved_klass_at(index)
++  void load_resolved_klass_at_index(Register cpool,  // the constant pool (corrupted on return)
++                                    Register index,  // the constant pool index (corrupted on return)
++                                    Register klass); // contains the Klass on return
++
++  void pop_ptr(Register r = FSR);
++  void pop_i(Register r = FSR);
++  void push_ptr(Register r = FSR);
++  void push_i(Register r = FSR);
++  
++  
++  void push_f(FloatRegister r = FSF);
++  void pop_f(FloatRegister r = FSF);
++  void pop_d(FloatRegister r = FSF);
++  void push_d(FloatRegister r = FSF);
++
++  void pop_l(Register r = FSR);
++  void push_l(Register r = FSR);
++  
++  void pop(Register r) { ((MacroAssembler*)this)->pop(r); }
++  void push(Register r) { ((MacroAssembler*)this)->push(r); }
++  void push(int32_t imm ) { ((MacroAssembler*)this)->push(imm); }
++  
++  void pop(TosState state);        // transition vtos -> state
++  void push(TosState state);       // transition state -> vtos
++
++//  void pop(RegSet regs, Register stack) { ((MacroAssembler*)this)->pop(regs, stack); }
++//  void push(RegSet regs, Register stack) { ((MacroAssembler*)this)->push(regs, stack); }
++
++  void empty_expression_stack() {
++    ldptr(esp, Address(rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize));
++    // NULL last_sp until next java call
++    stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  }
++
++  // Helpers for swap and dup
++  void load_ptr(int n, Register val);
++  void store_ptr(int n, Register val);
++
++  // Generate a subtype check: branch to ok_is_subtype if sub_klass is
++  // a subtype of super_klass.
++  void gen_subtype_check( Register sub_klass, Label &ok_is_subtype );
++
++  // Dispatching
++  void dispatch_prolog(TosState state, int step = 0);
++  void dispatch_epilog(TosState state, int step = 0);
++  // dispatch via rbx (assume rbx is loaded already)
++  void dispatch_only(TosState state, bool generate_poll = false);
++  // dispatch normal table via rbx (assume rbx is loaded already)
++  void dispatch_only_normal(TosState state);
++  void dispatch_only_noverify(TosState state);
++  // load rbx from [_bcp_register + step] and dispatch via rbx
++  void dispatch_next(TosState state, int step = 0, bool generate_poll = false);
++  // load rbx from [_bcp_register] and dispatch via rbx and table
++  void dispatch_via (TosState state, address* table);
++
++  // jump to an invoked target
++  void prepare_to_jump_from_interpreted();
++  void jump_from_interpreted(Register method, Register temp);
++
++  // narrow int return value
++  void narrow(Register result);
++
++  // Returning from interpreted functions
++  //
++  // Removes the current activation (incl. unlocking of monitors)
++  // and sets up the return address.  This code is also used for
++  // exception unwindwing. In that case, we do not want to throw
++  // IllegalMonitorStateExceptions, since that might get us into an
++  // infinite rethrow exception loop.
++  // Additionally this code is used for popFrame and earlyReturn.
++  // In popFrame case we want to skip throwing an exception,
++  // installing an exception, and notifying jvmdi.
++  // In earlyReturn case we only want to skip throwing an exception
++  // and installing an exception.
++  void remove_activation(TosState state, Register ret_addr,
++                         bool throw_monitor_exception = true,
++                         bool install_monitor_exception = true,
++                         bool notify_jvmdi = true);
++  void get_method_counters(Register method, Register mcs, Label& skip);
++
++  // Object locking
++  void lock_object  (Register lock_reg);
++  void unlock_object(Register lock_reg);
++
++  // Interpreter profiling operations
++  void set_method_data_pointer_for_bcp();
++  void test_method_data_pointer(Register mdp, Label& zero_continue);
++  void verify_method_data_pointer();
++
++  void set_mdp_data_at(Register mdp_in, int constant, Register value);
++  void increment_mdp_data_at(Address data, bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, int constant,
++                             bool decrement = false);
++  void increment_mdp_data_at(Register mdp_in, Register reg, int constant,
++                             bool decrement = false);
++  void increment_mask_and_jump(Address counter_addr,
++                               int increment, Address mask,
++                               Register scratch, bool preloaded,
++                               Condition cond, Label* where);
++  void set_mdp_flag_at(Register mdp_in, int flag_constant);
++  void test_mdp_data_at(Register mdp_in, int offset, Register value,
++                        Register test_value_out,
++                        Label& not_equal_continue);
++
++  void record_klass_in_profile(Register receiver, Register mdp,
++                               Register reg2, bool is_virtual_call);
++  void record_klass_in_profile_helper(Register receiver, Register mdp,
++                                      Register reg2, int start_row,
++                                      Label& done, bool is_virtual_call);
++  void record_item_in_profile_helper(Register item, Register mdp,
++                                     Register reg2, int start_row, Label& done, int total_rows,
++                                     OffsetFunction item_offset_fn, OffsetFunction item_count_offset_fn,
++                                     int non_profiled_offset);
++
++  void update_mdp_by_offset(Register mdp_in, int offset_of_offset);
++  void update_mdp_by_offset(Register mdp_in, Register reg, int offset_of_disp);
++  void update_mdp_by_constant(Register mdp_in, int constant);
++  void update_mdp_for_ret(Register return_bci);
++
++  void profile_taken_branch(Register mdp, Register bumped_count);
++  void profile_not_taken_branch(Register mdp);
++  void profile_call(Register mdp);
++  void profile_final_call(Register mdp);
++  void profile_virtual_call(Register receiver, Register mdp,
++                            Register scratch2,
++                            bool receiver_can_be_null = false);
++  void profile_called_method(Register method, Register mdp, Register reg2) NOT_JVMCI_RETURN;
++  void profile_ret(Register return_bci, Register mdp);
++  void profile_null_seen(Register mdp);
++  void profile_typecheck(Register mdp, Register klass, Register scratch);
++  void profile_typecheck_failed(Register mdp);
++  void profile_switch_default(Register mdp);
++  void profile_switch_case(Register index_in_scratch, Register mdp,
++                           Register scratch2);
++
++  // Debugging
++  // only if +VerifyOops && state == atos
++  void verify_oop(Register reg, TosState state = atos);
++  // only if +VerifyFPU  && (state == ftos || state == dtos)
++  void verify_FPU(int stack_depth, TosState state = ftos);
++
++  typedef enum { NotifyJVMTI, SkipNotifyJVMTI } NotifyMethodExitMode;
++
++  // support for jvmti/dtrace
++  void notify_method_entry();
++  void notify_method_exit(TosState state, NotifyMethodExitMode mode);
++
++  void  get_2_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++  void  get_4_byte_integer_at_bcp(Register reg, Register tmp, int offset);
++
++ private:
++
++  Register _locals_register; // register that contains the pointer to the locals
++  Register _bcp_register; // register that contains the bcp
++
++ public:
++  void profile_obj_type(Register obj, const Address& mdo_addr);
++  void profile_arguments_type(Register mdp, Register callee, Register tmp, bool is_virtual);
++  void profile_return_type(Register mdp, Register ret, Register tmp);
++  void profile_parameters_type(Register mdp, Register tmp1, Register tmp2);
++
++};
++
++#endif // CPU_SW64_VM_INTERP_MASM_SW64_64_HPP
+diff --git a/src/hotspot/cpu/sw64/interpreterRT_sw64.cpp b/src/hotspot/cpu/sw64/interpreterRT_sw64.cpp
+new file mode 100644
+index 0000000000..9ecdde038f
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/interpreterRT_sw64.cpp
+@@ -0,0 +1,324 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/universe.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/signature.hpp"
++
++#define __ _masm->
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++// Implementation of SignatureHandlerGenerator
++
++InterpreterRuntime::SignatureHandlerGenerator::SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer) :
++    NativeSignatureIterator(method) {
++  _masm = new MacroAssembler(buffer);
++  _num_args = (method->is_static() ? 1 : 0);
++  _stack_offset = 0; // don't overwrite return address
++  _floatreg_start_index = FloatRegisterImpl::float_arg_base + 1; //because a0(16) must be env in JNI, so float parameter register should start 17. same reason in generator_slow_signature 
++}
++
++Register InterpreterRuntime::SignatureHandlerGenerator::from() { return rlocals; }
++Register InterpreterRuntime::SignatureHandlerGenerator::to()   { return esp; }
++Register InterpreterRuntime::SignatureHandlerGenerator::temp() { return rscratch1; }
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_int() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));  
++  switch (_num_args) {
++  case 0:
++    __ ldws(c_rarg1, src);
++    _num_args++;
++    break;
++  case 1:
++    __ ldws(c_rarg2, src);
++    _num_args++;
++    break;
++  case 2:
++    __ ldws(c_rarg3, src);
++    _num_args++;
++    break;
++  case 3:
++    __ ldws(c_rarg4, src);
++    _num_args++;
++    break;
++  case 4:
++    __ ldws(c_rarg5, src);
++    _num_args++;
++    break;
++  default:
++    __ ldws(V0, src);
++    __ stw(V0, Address(to(), _stack_offset));
++    _stack_offset += wordSize;
++    break;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_long() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++
++  switch (_num_args) {
++  case 0:
++    __ ldptr(c_rarg1, src);
++    _num_args++;
++    break;
++  case 1:
++    __ ldptr(c_rarg2, src);
++    _num_args++;
++    break;
++  case 2:
++    __ ldptr(c_rarg3, src);
++    _num_args++;
++    break;
++  case 3:
++    __ ldptr(c_rarg4, src);
++    _num_args++;
++    break;
++  case 4:
++    __ ldptr(c_rarg5, src);
++    _num_args++;
++    break;
++  default:
++    __ ldptr(V0, src);
++    __ stptr(V0, Address(to(), _stack_offset));
++    _stack_offset += wordSize;
++    break;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_float() {
++   const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++   
++    if (_num_args < Argument::n_float_register_parameters_c-1) {
++    __ flds(as_FloatRegister(_floatreg_start_index + _num_args), src);
++    _num_args++;
++  } else {
++    __ ldws(V0, src);
++    __ stw(V0, Address(to(), _stack_offset));
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_double() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset() + 1));
++  
++  if (_num_args < Argument::n_float_register_parameters_c-1) {
++    __ fldd(as_FloatRegister(_floatreg_start_index + _num_args), src);
++    _num_args++;
++  } else {
++    __ ldptr(V0, src);
++    __ stptr(V0, Address(to(), _stack_offset));
++    _stack_offset += wordSize;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::pass_object() {
++  const Address src(from(), Interpreter::local_offset_in_bytes(offset()));
++  Register rax = V0;
++  
++  switch (_num_args) {
++  case 0:
++    assert(offset() == 0, "argument register 1 can only be (non-null) receiver");
++    __ lea(c_rarg1, src);
++    _num_args++;
++    break;
++  case 1:
++    __ lea(rax, src);
++    __ movl(c_rarg2, R0);
++    __ cmpptr(src, R0);
++    __ cmove(Assembler::notEqual, c_rarg2, rax, c_rarg2);
++    _num_args++;
++    break;
++  case 2:
++    __ lea(rax, src);
++    __ movl(c_rarg3, R0);
++    __ cmpptr(src, R0);
++    __ cmove(Assembler::notEqual, c_rarg3, rax, c_rarg3);
++    _num_args++;
++    break;
++  case 3:
++    __ lea(rax, src);
++    __ movl(c_rarg4, R0);
++    __ cmpptr(src, R0);
++    __ cmove(Assembler::notEqual, c_rarg4, rax, c_rarg4);
++    _num_args++;
++    break;
++  case 4:
++    __ lea(rax, src);
++    __ movl(c_rarg5, R0);
++    __ cmpptr(src, R0);
++    __ cmove(Assembler::notEqual, c_rarg5, rax, c_rarg5);
++    _num_args++;
++    break;
++  default:
++    __ lea(rax, src);
++    __ movl(temp(), R0);
++    __ cmpptr(src, R0);
++    __ cmove(Assembler::notEqual, temp(), rax, temp());
++    __ stptr(temp(), Address(to(), _stack_offset));
++    _stack_offset += wordSize;
++    break;
++  }
++}
++
++void InterpreterRuntime::SignatureHandlerGenerator::generate(uint64_t fingerprint) {
++  // generate code to handle arguments
++  iterate(fingerprint);
++
++  // return result handler
++  __ lea(V0, ExternalAddress(Interpreter::result_handler(method()->result_type())));
++  // return
++  __ ret_sw();
++
++  __ flush();
++}
++
++
++// Implementation of SignatureHandlerLibrary
++
++void SignatureHandlerLibrary::pd_set_handler(address handler) {}
++
++
++class SlowSignatureHandler
++  : public NativeSignatureIterator {
++ private:
++  address   _from;
++  intptr_t* _to;
++  intptr_t* _reg_args;
++  intptr_t* _fp_identifiers;
++  unsigned int _num_args;
++
++  virtual void pass_int()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_int_register_parameters_c-1) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_long()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_int_register_parameters_c-1) {
++      *_reg_args++ = from_obj;
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_object()
++  {
++    intptr_t *from_addr = (intptr_t*)(_from + Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++    if (_num_args < Argument::n_int_register_parameters_c-1) {
++      *_reg_args++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++      _num_args++;
++    } else {
++      *_to++ = (*from_addr == 0) ? NULL : (intptr_t) from_addr;
++    }
++  }
++
++  virtual void pass_float()
++  {
++    jint from_obj = *(jint *)(_from+Interpreter::local_offset_in_bytes(0));
++    _from -= Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters_c-1) {
++      assert((_num_args*2) < BitsPerWord, "_num_args*2 is out of range");
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= ((intptr_t)0x01 << (_num_args*2)); // mark as float
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++  virtual void pass_double()
++  {
++    intptr_t from_obj = *(intptr_t*)(_from+Interpreter::local_offset_in_bytes(1));
++    _from -= 2*Interpreter::stackElementSize;
++
++    if (_num_args < Argument::n_float_register_parameters_c-1) {
++      assert((_num_args*2) < BitsPerWord, "_num_args*2 is out of range");
++      *_reg_args++ = from_obj;
++      *_fp_identifiers |= ((intptr_t)0x3 << (_num_args*2)); // mark as double
++      _num_args++;
++    } else {
++      *_to++ = from_obj;
++    }
++  }
++
++ public:
++  SlowSignatureHandler(const methodHandle& method, address from, intptr_t* to)
++    : NativeSignatureIterator(method)
++  {
++    _from = from;
++    _to   = to;
++
++    _reg_args = to - (method->is_static() ? 6 : 7);
++    _fp_identifiers = to - 2;
++    *(int*) _fp_identifiers = 0;
++    _num_args = (method->is_static() ? 1 : 0);
++  }
++};
++
++
++IRT_ENTRY(address,
++          InterpreterRuntime::slow_signature_handler(JavaThread* thread,
++                                                     Method* method,
++                                                     intptr_t* from,
++                                                     intptr_t* to))
++  methodHandle m(thread, (Method*)method);
++  assert(m->is_native(), "sanity check");
++
++  // handle arguments
++  SlowSignatureHandler(m, (address)from, to).iterate((uint64_t)CONST64(-1));//sw doesn't need to modify 'to' position
++
++  // return result handler
++  return Interpreter::result_handler(m->result_type());
++IRT_END
+diff --git a/src/hotspot/cpu/sw64/interpreterRT_sw64.hpp b/src/hotspot/cpu/sw64/interpreterRT_sw64.hpp
+new file mode 100644
+index 0000000000..a9de9f6c5b
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/interpreterRT_sw64.hpp
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 1998, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_INTERPRETERRT_SW64_HPP
++#define CPU_SW64_VM_INTERPRETERRT_SW64_HPP
++
++// This is included in the middle of class Interpreter.
++// Do not include files here.
++
++// native method calls
++
++class SignatureHandlerGenerator: public NativeSignatureIterator {
++ private:
++  MacroAssembler* _masm;
++  unsigned int _num_args;
++  int _stack_offset;
++  int _floatreg_start_index; //because a0(16) must be env in JNI, so float parameter register should start 17. same reason in generator_slow_signature 
++
++  void pass_int();
++  void pass_long();
++  void pass_float();
++  void pass_double();
++  void pass_object();
++
++ public:
++  // Creation
++  SignatureHandlerGenerator(const methodHandle& method, CodeBuffer* buffer);
++  
++  // Code generation
++  void generate(uint64_t fingerprint);
++
++  // Code generation support
++  static Register from();
++  static Register to();
++  static Register temp();
++};
++
++#endif // CPU_SW64_VM_INTERPRETERRT_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/javaFrameAnchor_sw64.hpp b/src/hotspot/cpu/sw64/javaFrameAnchor_sw64.hpp
+new file mode 100644
+index 0000000000..0efc2d0094
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/javaFrameAnchor_sw64.hpp
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_JAVAFRAMEANCHOR_SW64_HPP
++#define CPU_SW64_VM_JAVAFRAMEANCHOR_SW64_HPP
++
++private:
++
++  // FP value associated with _last_Java_sp:
++  intptr_t* volatile        _last_Java_fp;           // pointer is volatile not what it points to
++
++public:
++  // Each arch must define reset, save, restore
++  // These are used by objects that only care about:
++  //  1 - initializing a new state (thread creation, javaCalls)
++  //  2 - saving a current state (javaCalls)
++  //  3 - restoring an old state (javaCalls)
++
++  void clear(void) {
++    // clearing _last_Java_sp must be first
++    _last_Java_sp = NULL;
++    OrderAccess::release();
++    _last_Java_fp = NULL;
++    _last_Java_pc = NULL;
++  }
++
++  void copy(JavaFrameAnchor* src) {
++    // In order to make sure the transition state is valid for "this"
++    // We must clear _last_Java_sp before copying the rest of the new data
++    //
++    // Hack Alert: Temporary bugfix for 4717480/4721647
++    // To act like previous version (pd_cache_state) don't NULL _last_Java_sp
++    // unless the value is changing
++    //
++    if (_last_Java_sp != src->_last_Java_sp) {
++      _last_Java_sp = NULL;
++      OrderAccess::release();
++    }
++    _last_Java_fp = src->_last_Java_fp;
++    _last_Java_pc = src->_last_Java_pc;
++    // Must be last so profiler will always see valid frame if has_last_frame() is true
++    _last_Java_sp = src->_last_Java_sp;
++  }
++
++  bool walkable(void)                            { return _last_Java_sp != NULL && _last_Java_pc != NULL; }
++  void make_walkable(JavaThread* thread);
++  void capture_last_Java_pc(void);
++
++  intptr_t* last_Java_sp(void) const             { return _last_Java_sp; }
++
++  address last_Java_pc(void)                     { return _last_Java_pc; }
++
++private:
++
++  static ByteSize last_Java_fp_offset()          { return byte_offset_of(JavaFrameAnchor, _last_Java_fp); }
++
++public:
++
++  void set_last_Java_sp(intptr_t* sp)            { _last_Java_sp = sp; OrderAccess::release(); }
++
++  intptr_t*   last_Java_fp(void)                     { return _last_Java_fp; }
++  // Assert (last_Java_sp == NULL || fp == NULL)
++  void set_last_Java_fp(intptr_t* fp)                { OrderAccess::release(); _last_Java_fp = fp; }
++
++#endif // CPU_SW64_VM_JAVAFRAMEANCHOR_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/jniFastGetField_sw64.cpp b/src/hotspot/cpu/sw64/jniFastGetField_sw64.cpp
+new file mode 100644
+index 0000000000..40137edecf
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/jniFastGetField_sw64.cpp
+@@ -0,0 +1,252 @@
++/*
++ * Copyright (c) 2004, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/safepoint.hpp"
++
++#define __ masm->
++
++#define BUFFER_SIZE 30*wordSize
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Instead of issuing a LoadLoad barrier we create an address
++// dependency between loads; this might be more efficient.
++
++// Common register usage:
++// r0/v0:      result
++// c_rarg0:    jni env
++// c_rarg1:    obj
++// c_rarg2:    jfield id
++
++#define BUFFER_SIZE 30*wordSize
++
++static const Register rtmp          = T1;
++static const Register robj          = T2;
++static const Register rcounter      = T3;
++static const Register roffset       = T4;
++static const Register rcounter_addr = T5;
++
++address JNI_FastGetField::generate_fast_get_int_field0(BasicType type) {
++  const char *name = NULL;
++  switch (type) {
++    case T_BOOLEAN: name = "jni_fast_GetBooleanField"; break;
++    case T_BYTE:    name = "jni_fast_GetByteField";    break;
++    case T_CHAR:    name = "jni_fast_GetCharField";    break;
++    case T_SHORT:   name = "jni_fast_GetShortField";   break;
++    case T_INT:     name = "jni_fast_GetIntField";     break;
++    case T_LONG:    name = "jni_fast_GetLongField";    break;
++    default:        ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
++  __ ldwu  (rcounter, counter);
++  __ movl  (robj, c_rarg1);
++  __ testb (rcounter, 1);
++  __ jcc (Assembler::notZero, slow);
++  if (os::is_MP()) {
++    __ xorptr(robj, rcounter, robj);
++    __ xorptr(robj, rcounter, robj);                   // obj, since
++                                                // robj ^ rcounter ^ rcounter == robj
++                                                // robj is data dependent on rcounter.
++  }
++
++  __ movl  (roffset, c_rarg2);
++  __ srll(roffset, 2, roffset);                         // offset
++
++  // Both robj and rtmp are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, /* jni_env */ c_rarg0, robj, rtmp, slow);
++  DEBUG_ONLY(__ movw(rtmp, 0xDEADC0DE);)
++  
++  Register rax = V0;
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ ldbu   (rax, Address(robj, roffset, Address::times_1)); break;
++    case T_BYTE:    __ ldbu   (AT, Address(robj, roffset, Address::times_1));__ sextb(AT, rax); break;
++    case T_CHAR:    __ ldhu   (rax, Address(robj, roffset, Address::times_1)); break;
++    case T_SHORT:   __ ldhu   (AT, Address(robj, roffset, Address::times_1));__ sexth(AT, rax); break;
++    case T_INT:     __ ldws   (rax, Address(robj, roffset, Address::times_1)); break;
++    case T_LONG:    __ ldl    (rax, Address(robj, roffset, Address::times_1)); break;
++    default:        ShouldNotReachHere();
++  }
++  
++  if (os::is_MP()) {
++    __ lea(rcounter_addr, counter);
++    // ca is data dependent on rax.
++    __ xorptr(rcounter_addr, rax, rcounter_addr);
++    __ xorptr(rcounter_addr, rax, rcounter_addr);
++    __ cmpw (rcounter, Address(rcounter_addr, 0));
++  } else {
++    __ lea(rcounter_addr, counter);
++    __ cmpw (rcounter, Address(rcounter_addr, 0));
++  }
++  __ jcc (Assembler::notEqual, slow);
++
++  __ ret_sw();
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_BOOLEAN: slow_case_addr = jni_GetBooleanField_addr(); break;
++    case T_BYTE:    slow_case_addr = jni_GetByteField_addr();    break;
++    case T_CHAR:    slow_case_addr = jni_GetCharField_addr();    break;
++    case T_SHORT:   slow_case_addr = jni_GetShortField_addr();   break;
++    case T_INT:     slow_case_addr = jni_GetIntField_addr();     break;
++    case T_LONG:    slow_case_addr = jni_GetLongField_addr();    break;
++    default:                                                     break;
++  }
++  // tail call
++  __ jump (ExternalAddress(slow_case_addr));
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_boolean_field() {
++  return generate_fast_get_int_field0(T_BOOLEAN);
++}
++
++address JNI_FastGetField::generate_fast_get_byte_field() {
++  return generate_fast_get_int_field0(T_BYTE);
++}
++
++address JNI_FastGetField::generate_fast_get_char_field() {
++  return generate_fast_get_int_field0(T_CHAR);
++}
++
++address JNI_FastGetField::generate_fast_get_short_field() {
++  return generate_fast_get_int_field0(T_SHORT);
++}
++
++address JNI_FastGetField::generate_fast_get_int_field() {
++  return generate_fast_get_int_field0(T_INT);
++}
++
++address JNI_FastGetField::generate_fast_get_long_field() {
++  return generate_fast_get_int_field0(T_LONG);
++}
++
++address JNI_FastGetField::generate_fast_get_float_field0(BasicType type) {
++  const char *name =NULL;
++  switch (type) {
++    case T_FLOAT:     name = "jni_fast_GetFloatField";     break;
++    case T_DOUBLE:    name = "jni_fast_GetDoubleField";    break;
++    default:          ShouldNotReachHere();
++  }
++  ResourceMark rm;
++  BufferBlob* blob = BufferBlob::create(name, BUFFER_SIZE);
++  CodeBuffer cbuf(blob);
++  MacroAssembler* masm = new MacroAssembler(&cbuf);
++  address fast_entry = __ pc();
++
++  Label slow;
++
++  ExternalAddress counter(SafepointSynchronize::safepoint_counter_addr());
++  __ ldwu  (rcounter, counter);
++  __ movl  (robj, c_rarg1);
++  __ testb (rcounter, 1);
++  __ jcc (Assembler::notZero, slow);
++  if (os::is_MP()) {
++    __ xorptr(robj, rcounter, robj);
++    __ xorptr(robj, rcounter, robj);                   // obj, since
++                                                // robj ^ rcounter ^ rcounter == robj
++                                                // robj is data dependent on rcounter.
++  }
++
++  // Both robj and rtmp are clobbered by try_resolve_jobject_in_native.
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->try_resolve_jobject_in_native(masm, /* jni_env */ c_rarg0, robj, rtmp, slow);
++  DEBUG_ONLY(__ movw(rtmp, 0xDEADC0DE);)
++
++  __ movl  (roffset, c_rarg2);
++  __ srll  (roffset, 2, roffset);                         // offset
++
++  assert(count < LIST_CAPACITY, "LIST_CAPACITY too small");
++  speculative_load_pclist[count] = __ pc();
++  switch (type) {
++    case T_FLOAT:  __ load_float (FSF, Address(robj, roffset, Address::times_1)); break;
++    case T_DOUBLE: __ load_double(FSF, Address(robj, roffset, Address::times_1)); break;
++    default:        ShouldNotReachHere();
++  }
++
++  if (os::is_MP()) {
++    __ lea(rcounter_addr, counter);
++    __ fimovd (FSF, V0);
++    // counter address is data dependent on xmm0.
++    __ xorptr(rcounter_addr, V0, rcounter_addr);
++    __ xorptr(rcounter_addr, V0, rcounter_addr);
++    __ cmpw (rcounter, Address(rcounter_addr, 0));
++  } else {
++    __ lea(rcounter_addr, counter);
++    __ cmpw (rcounter, Address(rcounter_addr, 0));
++  }
++  __ jcc (Assembler::notEqual, slow);
++
++  __ ret_sw();
++
++  slowcase_entry_pclist[count++] = __ pc();
++  __ bind (slow);
++  address slow_case_addr = NULL;
++  switch (type) {
++    case T_FLOAT:     slow_case_addr = jni_GetFloatField_addr();  break;
++    case T_DOUBLE:    slow_case_addr = jni_GetDoubleField_addr(); break;
++    default:                                                      break;
++  }
++  // tail call
++  __ jump (ExternalAddress(slow_case_addr));
++
++  __ flush ();
++
++  return fast_entry;
++}
++
++address JNI_FastGetField::generate_fast_get_float_field() {
++  return generate_fast_get_float_field0(T_FLOAT);
++}
++
++address JNI_FastGetField::generate_fast_get_double_field() {
++  return generate_fast_get_float_field0(T_DOUBLE);
++}
+diff --git a/src/hotspot/cpu/sw64/jniTypes_sw64.hpp b/src/hotspot/cpu/sw64/jniTypes_sw64.hpp
+new file mode 100644
+index 0000000000..73bebb85fc
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/jniTypes_sw64.hpp
+@@ -0,0 +1,124 @@
++/*
++ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_JNITYPES_SW64_HPP
++#define CPU_SW64_VM_JNITYPES_SW64_HPP
++
++#include "jni.h"
++#include "memory/allocation.hpp"
++#include "oops/oop.hpp"
++
++// This file holds platform-dependent routines used to write primitive jni
++// types to the array of arguments passed into JavaCalls::call
++
++class JNITypes : AllStatic {
++  // These functions write a java primitive type (in native format)
++  // to a java stack slot array to be passed as an argument to JavaCalls:calls.
++  // I.e., they are functionally 'push' operations if they have a 'pos'
++  // formal parameter.  Note that jlong's and jdouble's are written
++  // _in reverse_ of the order in which they appear in the interpreter
++  // stack.  This is because call stubs (see stubGenerator_sparc.cpp)
++  // reverse the argument list constructed by JavaCallArguments (see
++  // javaCalls.hpp).
++
++private:
++
++  // 32bit Helper routines.
++  static inline void    put_int2r(jint *from, intptr_t *to)           { *(jint *)(to++) = from[1];
++                                                                        *(jint *)(to  ) = from[0]; }
++  static inline void    put_int2r(jint *from, intptr_t *to, int& pos) { put_int2r(from, to + pos); pos += 2; }
++
++public:
++  // Ints are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_int(jint  from, intptr_t *to)           { *(intptr_t *)(to +   0  ) =  from; }
++  static inline void    put_int(jint  from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) =  from; }
++  static inline void    put_int(jint *from, intptr_t *to, int& pos) { *(intptr_t *)(to + pos++) = *from; }
++
++  // Longs are stored in native format in one JavaCallArgument slot at
++  // *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  // *(to+1).
++  static inline void put_long(jlong  from, intptr_t *to) {
++    *(jlong*) (to + 1) = from;
++    *(jlong*) (to) = from;
++  }
++
++  static inline void put_long(jlong  from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = from;
++    *(jlong*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_long(jlong *from, intptr_t *to, int& pos) {
++    *(jlong*) (to + 1 + pos) = *from;
++    *(jlong*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // Oops are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_obj(oop  from, intptr_t *to)           { *(oop *)(to +   0  ) =  from; }
++  static inline void    put_obj(oop  from, intptr_t *to, int& pos) { *(oop *)(to + pos++) =  from; }
++  static inline void    put_obj(oop *from, intptr_t *to, int& pos) { *(oop *)(to + pos++) = *from; }
++
++  // Floats are stored in native format in one JavaCallArgument slot at *to.
++  static inline void    put_float(jfloat  from, intptr_t *to)           { *(jfloat *)(to +   0  ) =  from;  }
++  static inline void    put_float(jfloat  from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) =  from; }
++  static inline void    put_float(jfloat *from, intptr_t *to, int& pos) { *(jfloat *)(to + pos++) = *from; }
++
++#undef _JNI_SLOT_OFFSET
++#define _JNI_SLOT_OFFSET 0
++  // Doubles are stored in native word format in one JavaCallArgument
++  // slot at *(to).
++  // In theory, *(to + 1) is an empty slot. But, for several Java2D testing programs (TestBorderLayout, SwingTest),
++  //  *(to + 1) must contains a copy of the long value. Otherwise it will corrupts.
++  static inline void put_double(jdouble  from, intptr_t *to) {
++    *(jdouble*) (to + 1) = from;
++    *(jdouble*) (to) = from;
++  }
++
++  static inline void put_double(jdouble  from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = from;
++    *(jdouble*) (to + pos) = from;
++    pos += 2;
++  }
++
++  static inline void put_double(jdouble *from, intptr_t *to, int& pos) {
++    *(jdouble*) (to + 1 + pos) = *from;
++    *(jdouble*) (to + pos) = *from;
++    pos += 2;
++  }
++
++  // The get_xxx routines, on the other hand, actually _do_ fetch
++  // java primitive types from the interpreter stack.
++  // No need to worry about alignment on Intel.
++  static inline jint    get_int   (intptr_t *from) { return *(jint *)   from; }
++  static inline jlong   get_long  (intptr_t *from) { return *(jlong *)  (from + _JNI_SLOT_OFFSET); }
++  static inline oop     get_obj   (intptr_t *from) { return *(oop *)    from; }
++  static inline jfloat  get_float (intptr_t *from) { return *(jfloat *) from; }
++  static inline jdouble get_double(intptr_t *from) { return *(jdouble *)(from + _JNI_SLOT_OFFSET); }
++#undef _JNI_SLOT_OFFSET
++};
++
++#endif // CPU_SW64_VM_JNITYPES_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/jvmciCodeInstaller_sw64.cpp b/src/hotspot/cpu/sw64/jvmciCodeInstaller_sw64.cpp
+new file mode 100644
+index 0000000000..37d558d4ab
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/jvmciCodeInstaller_sw64.cpp
+@@ -0,0 +1,69 @@
++/*
++ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++#include "jvmci/jvmciCodeInstaller.hpp"
++#include "jvmci/jvmciRuntime.hpp"
++#include "jvmci/jvmciCompilerToVM.hpp"
++#include "jvmci/jvmciJavaClasses.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_sw64.inline.hpp"
++
++jint CodeInstaller::pd_next_offset(NativeInstruction* inst, jint pc_offset, Handle method, TRAPS) {
++  Unimplemented();
++  return 0;
++}
++
++void CodeInstaller::pd_patch_OopConstant(int pc_offset, Handle constant, TRAPS) {
++  Unimplemented();
++}
++
++void CodeInstaller::pd_patch_MetaspaceConstant(int pc_offset, Handle constant, TRAPS) {
++  Unimplemented();
++}
++
++void CodeInstaller::pd_patch_DataSectionReference(int pc_offset, int data_offset, TRAPS) {
++  Unimplemented();
++}
++
++void CodeInstaller::pd_relocate_ForeignCall(NativeInstruction* inst, jlong foreign_call_destination, TRAPS) {
++  Unimplemented();
++}
++
++void CodeInstaller::pd_relocate_JavaMethod(CodeBuffer &cbuf, Handle hotspot_method, jint pc_offset, TRAPS) {
++  Unimplemented();
++}
++
++void CodeInstaller::pd_relocate_poll(address pc, jint mark, TRAPS) {
++  Unimplemented();
++}
++
++// convert JVMCI register indices (as used in oop maps) to HotSpot registers
++VMReg CodeInstaller::get_hotspot_reg(jint jvmci_reg, TRAPS) {
++  return NULL;
++}
++
++bool CodeInstaller::is_general_purpose_reg(VMReg hotspotRegister) {
++  return false;
++}
+\ No newline at end of file
+diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64.cpp b/src/hotspot/cpu/sw64/macroAssembler_sw64.cpp
+new file mode 100755
+index 0000000000..2480539961
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/macroAssembler_sw64.cpp
+@@ -0,0 +1,5176 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "compiler/disassembler.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "gc/shared/collectedHeap.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "memory/resourceArea.hpp"
++#include "memory/universe.hpp"
++#include "oops/accessDecorators.hpp"
++#include "oops/klass.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/biasedLocking.hpp"
++#include "runtime/flags/flagSetting.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/objectMonitor.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.hpp"
++#include "utilities/macros.hpp"
++#include "utilities/globalDefinitions_gcc.hpp"
++//#include "crc32c.h"
++#ifdef COMPILER2
++#include "opto/intrinsicnode.hpp"
++#endif
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) block_comment(str)
++#define STOP(error) block_comment(error); stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++static Assembler::Condition reverse[] = {
++    Assembler::noOverflow     /* overflow      = 0x0 */ ,
++    Assembler::overflow       /* noOverflow    = 0x1 */ ,
++    Assembler::aboveEqual     /* carrySet      = 0x2, below         = 0x2 */ ,
++    Assembler::below          /* aboveEqual    = 0x3, carryClear    = 0x3 */ ,
++    Assembler::notZero        /* zero          = 0x4, equal         = 0x4 */ ,
++    Assembler::zero           /* notZero       = 0x5, notEqual      = 0x5 */ ,
++    Assembler::above          /* belowEqual    = 0x6 */ ,
++    Assembler::belowEqual     /* above         = 0x7 */ ,
++    Assembler::positive       /* negative      = 0x8 */ ,
++    Assembler::negative       /* positive      = 0x9 */ ,
++    Assembler::failed         /* success       = 0xa */ ,
++    Assembler::success        /* failed        = 0xb */ ,
++    Assembler::greaterEqual   /* less          = 0xc */ ,
++    Assembler::less           /* greaterEqual  = 0xd */ ,
++    Assembler::greater        /* lessEqual     = 0xe */ ,
++    Assembler::lessEqual      /* greater       = 0xf, */
++};
++
++Address MacroAssembler::as_Address(ArrayAddress adr, Register base_reg) {
++  AddressLiteral base = adr.base();
++  lea(base_reg, base);
++  Address index = adr.index();
++  assert(index._disp == 0, "must not have disp"); // maybe it can?
++  Address array(base_reg, index._index, index._scale, index._disp);
++  return array;
++}
++
++void MacroAssembler::call_VM_leaf_base(address entry_point,
++                                       int number_of_arguments) {SCOPEMARK_NAME(MacroAssembler::call_VM_base, this)
++  Label E, L;
++
++  testptr(esp, 0xf, rcc);
++  jcc(Assembler::zero, L, rcc);
++  
++  subptr(esp, 8, esp);
++  call(RuntimeAddress(entry_point));
++  addptr(esp, 8, esp);
++  jmp(E);
++
++  bind(L);
++  call(RuntimeAddress(entry_point));
++  bind(E);
++}
++
++void MacroAssembler::call_VM_leaf_base(address entry_point,
++                                       int number_of_arguments, 
++                                       Label *retaddr, Register rscratch) {SCOPEMARK_NAME(MacroAssembler::call_VM_base-label, this)
++  /*Label E, L, exit;
++
++  testptr(esp, 0xf, rcc);
++  jcc(Assembler::zero, L, rcc);
++  
++  mov_immediate64(rscratch1, 0xf7f7f7f7);
++  push(rscratch1);
++  jmp(E);
++          
++  bind(L);
++  //TODO:assert(esp[0] != 0xf7f7f7f7) jzy
++  
++  bind(E);
++  call(RuntimeAddress(entry_point));
++  if (retaddr)
++    bind(*retaddr);
++  mov_immediate64(rscratch1, 0xf7f7f7f7);
++  ldl(rscratch2, esp, 0);
++  cmpl(rscratch1, rscratch2);
++  jcc(Assembler::notEqual, exit);
++  addptr(esp, 8, esp);
++  bind(exit);*/
++  
++  //TODO:different from x86, stack not aligned is OK? jzy                                          
++  call(RuntimeAddress(entry_point), retaddr, rscratch);
++}
++
++int MacroAssembler::corrected_idivq(Register result, Register ra, Register rb,
++                                    bool want_remainder, Register scratch)
++{
++  ShouldNotReachHere();
++  int idivq_offset = offset();
++
++  return idivq_offset;
++}
++
++void MacroAssembler::decrementw(ExternalAddress dst, int value, Register tmp1, Register tmp2){
++  incrementw(dst, -value, tmp1, tmp2);
++}
++  
++void MacroAssembler::decrementw(Address dst, int value, Register tmp)
++{
++  incrementw(dst, -value, tmp);
++}
++
++void MacroAssembler::decrementw(Register reg, int value)
++{
++  decrementl(reg, value);
++  zapnot(reg, 0xf, reg);
++}
++
++void MacroAssembler::decrementl(ExternalAddress dst, int value , Register tmp1, Register tmp2){
++  incrementl(dst, -value, tmp1, tmp2);
++}
++
++void MacroAssembler::decrementl(Address dst, int value, Register tmp){
++  incrementl(dst, -value, tmp);
++}
++
++void MacroAssembler::decrementl(Register reg, int value) {
++  incrementl(reg, -value);
++}
++
++/**
++ * x86
++ * @param dst
++ * @param value
++ * @param tmp1
++ * @param tmp2
++ */
++void MacroAssembler::incrementw(AddressLiteral dst, int value, Register tmp1, Register tmp2) {
++  assert_different_registers(tmp1, tmp2);
++  if (!value) return;
++  
++  lea(tmp1, dst);
++  ldws(tmp2, Address(tmp1, 0));
++  if(is_simm16(value)) {
++    ldi(tmp2, value, tmp2);
++  } else {
++    ShouldNotReachHere();
++  }
++  stw(tmp2, Address(tmp1, 0));
++}
++
++/**
++ * x86
++ * @param dst
++ * @param value
++ * @param tmp_not_rcc
++ */
++void MacroAssembler::incrementw(Address dst, int value, Register tmp_not_rcc) {
++  if (!value) return;
++  ldws(tmp_not_rcc, dst);
++  if(is_simm16(value)) {
++    ldi(tmp_not_rcc, value, tmp_not_rcc);
++  } else {
++    ShouldNotReachHere();
++  }
++  stw(tmp_not_rcc, dst);
++}
++
++/**
++ * x86
++ * @param reg
++ * @param value
++ */
++void MacroAssembler::incrementw(Register reg, int value) {
++  incrementl(reg, value);
++  zapnot(reg, 0xf, reg);
++}
++
++void MacroAssembler::incrementl(ExternalAddress dst, int value, Register tmp1, Register tmp2){
++  assert_different_registers(tmp1, tmp2);
++  if (!value) return;
++  mov_immediate64(tmp1, (intptr_t)dst.target(), dst.rspec());
++  ldptr(tmp2, Address(tmp1, 0)); //ldwu
++  if (is_simm16(value)) {
++    ldi(tmp2, value, tmp2);
++  } else {
++    ShouldNotReachHere();
++  }
++  stptr(tmp2, Address(tmp1, 0));
++}
++
++void MacroAssembler::incrementl(Address dst, int value, Register tmp){
++  if (!value) return;
++  ldptr(tmp, dst);
++  if(is_simm16(value)) {
++    ldi(tmp, value, tmp);
++  } else {
++  ShouldNotReachHere();
++//    mov_immediate32(AT, value);
++//    addl(tmp, AT, tmp);
++  }
++  stptr(tmp, dst);
++}
++
++void MacroAssembler::incrementl(Register reg, int value) {
++  if (!value) return;
++  if (is_simm16(value)) {
++    ldi(reg, value, reg);
++  } else {
++  ShouldNotReachHere();
++//    mov_immediate32(AT, value);
++//    addl(reg, AT, reg);
++  }
++}
++
++// 32bit can do a case table jump in one instruction but we no longer allow the base
++// to be installed in the Address class
++void MacroAssembler::jump(ArrayAddress entry, Register tmp1, Register tmp2) {
++  assert_different_registers(tmp1, tmp2);
++  lea(tmp1, entry.base());
++  Address dispatch = entry.index();
++  assert(dispatch._base == noreg, "must be");
++  dispatch._base = tmp1;
++  jmp(dispatch, tmp2);
++}
++
++/**
++ * x86
++ *  lea(Register rd, Address addr)
++ * sw64
++ *  lea(Register rd, Address addr)
++ * note
++ *  No diffrence. No temp reg is needed and rd can be same with addr._base or addr._index
++ */
++void MacroAssembler::lea(Register rd, Address addr) {
++  ldi(rd, addr);
++}
++
++void MacroAssembler::lea(Register rd, AddressLiteral addr) {
++  mov_immediate64(rd, (intptr_t)addr.target(), addr.rspec());
++}
++
++void MacroAssembler::lea(Address dst, AddressLiteral addr, Register tmp_not_rcc) {
++  assert_different_registers(tmp_not_rcc, rcc);
++  lea(tmp_not_rcc, addr);
++  stl(tmp_not_rcc, dst, rcc);
++}
++
++//todo scw
++void MacroAssembler::leave() {
++  addptr(rfp, 2 * wordSize, esp);
++  ldl(RA, - 1 * wordSize, esp);
++  ldl(rfp, - 2 * wordSize, esp);
++}
++
++// Move an oop into a register.  immediate is true if we want
++// immediate instrcutions, i.e. we are not going to patch this
++// instruction while the code is being executed by another thread.  In
++// that case we can use move immediates rather than the constant pool.
++void MacroAssembler::movoop(Register dst, jobject obj, bool immediate) {
++  ShouldNotReachHere();
++}
++
++// Move a metadata address into a register.
++void MacroAssembler::mov_metadata(Register dst, Metadata* obj) {
++  int oop_index;
++  if (obj) {
++    oop_index = oop_recorder()->find_index(obj);
++  } else {
++    oop_index = oop_recorder()->allocate_metadata_index(obj);
++  }
++  RelocationHolder rspec = metadata_Relocation::spec(oop_index);
++  relocate(rspec);
++  prepare_patch_li48(dst, (long)(obj));
++}
++
++void MacroAssembler::reset_last_Java_frame(bool clear_fp) {
++  Register thread = rthread;
++  // we must set sp to zero to clear frame
++  std(R0, Address(thread, JavaThread::last_Java_sp_offset()));
++  // must clear fp, so that compiled frames are not confused; it is
++  // possible that we need it only for debugging
++  if (clear_fp) {
++    std(R0, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  std(R0, Address(thread, JavaThread::last_Java_pc_offset()));
++}
++
++/*void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {ShouldNotReachHere();
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
++
++  Register thread = rthread;
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    std(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++    relocate(relocInfo::internal_word_type);
++//    movptr(AT, (long)last_java_pc);
++    sd(AT, thread, in_bytes(JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  std(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()));
++}*/
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc, Register scratch) {  
++   // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
++
++  Register thread = rthread;
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    stptr(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()), scratch);
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc != NULL) {
++      relocate(relocInfo::internal_word_type);
++      prepare_patch_li48(scratch, (long)last_java_pc);
++      stptr(scratch, Address(rthread,
++                                JavaThread::frame_anchor_offset()
++                                + JavaFrameAnchor::last_Java_pc_offset()));
++  }
++
++  stptr(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()), scratch);
++  
++}
++//TODO:delete we don't need this edition jzy
++/*void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Register last_java_pc,
++                                         Register scratch) {
++ // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
++
++  Register thread = rthread;
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    stptr(last_java_fp, Address(thread, JavaThread::last_Java_fp_offset()), scratch);
++  }
++
++  // last_java_pc is optional
++  if (last_java_pc->is_valid()) {Unimplemented();
++      stptr(last_java_pc, Address(rthread,
++                                JavaThread::frame_anchor_offset()
++                                + JavaFrameAnchor::last_Java_pc_offset()), scratch);
++  }
++
++  stptr(last_java_sp, Address(thread, JavaThread::last_Java_sp_offset()), scratch);
++}*/
++
++
++void MacroAssembler::set_last_Java_frame(Register last_java_sp,
++                                         Register last_java_fp,
++                                         Label &L,
++                                         Register scratch, Register scratch2) {BLOCK_COMMENT("MacroAssembler::set_last_Java_frame enter");
++  //br scratch,0;
++  //add scratch,0,scratch; this instruction need patch TODO:check jzy
++  assert_different_registers(scratch, scratch2);
++  int offset = 0;
++  if(UseAddpi){
++      if (L.is_bound()) {
++          assert(false, "TODO:should check jzy");
++          offset = (target(L) - pc()/*add instruction*/) >> 2;
++      } else {
++          L.add_patch_at(code(), locator());
++      }
++      addpi(offset, scratch);//immediate need special flag when patch? jzy
++
++  }else {
++      br(scratch, 0);
++      if (L.is_bound()) {
++          assert(false, "TODO:should check jzy");
++          offset = (target(L) - pc()/*add instruction*/) >> 2;
++      } else {
++          L.add_patch_at(code(), locator());
++      }
++      ldi(scratch, offset, scratch);//immediate need special flag when patch? jzy
++  }
++  stptr(scratch, Address(rthread,
++                       JavaThread::frame_anchor_offset()
++                       + JavaFrameAnchor::last_Java_pc_offset()), scratch2);
++
++  set_last_Java_frame(last_java_sp, last_java_fp, NULL, scratch);  BLOCK_COMMENT("MacroAssembler::set_last_Java_frame leave");
++}
++
++static void pass_arg0(MacroAssembler* masm, Register arg) {
++  if (c_rarg0 != arg ) {
++    masm->movl(c_rarg0, arg);
++  }
++}
++
++static void pass_arg1(MacroAssembler* masm, Register arg) {
++  if (c_rarg1 != arg ) {
++    masm->movl(c_rarg1, arg);
++  }
++}
++
++static void pass_arg2(MacroAssembler* masm, Register arg) {
++  if (c_rarg2 != arg ) {
++    masm->movl(c_rarg2, arg);
++  }
++}
++
++void MacroAssembler::stop(const char* msg) {SCOPEMARK_NAME(MacroAssembler::stop, this);
++  if (ShowMessageBoxOnError) {
++      address rip = pc();
++      //  pusha(); // get regs on stack
++      lea(c_rarg1, InternalAddress(rip));
++  }
++  lea(c_rarg0, ExternalAddress((address) msg));
++//  movq(c_rarg2, rsp); // pass pointer to regs array
++//  andq(rsp, -16); // align stack as required by ABI
++  call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
++  brk(17);
++}
++
++void MacroAssembler::debug_stop(const char* msg) {block_comment("debug_stop { ");
++  ldi(rscratch1_GP, 0);
++  beq(rscratch1_GP, -1);block_comment("debug_stop } ");
++}
++
++void MacroAssembler::warn(const char* msg) {
++  warning("warning: %s", msg);
++}
++
++void MacroAssembler::align(int modulus) {
++  while (offset() % modulus != 0) nop();
++}
++
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2) {
++  Label again;
++  SizedScope sc(this, 64);
++  if(UseSW8A) {
++      mov(tmp_reg1, counter_addr);
++      bind(again);
++      lldw(tmp_reg2, 0, tmp_reg1);
++      addl(tmp_reg2, inc, tmp_reg2);
++      move(AT, tmp_reg2);
++      lstw(AT, 0, tmp_reg1);
++      beq_l(AT, again);
++  } else {
++      mov(tmp_reg1, counter_addr);
++      bind(again);
++      lldw(tmp_reg2, 0, tmp_reg1);
++      ldi(GP, 1, R0);
++      wr_f(GP);
++      addl(tmp_reg2, inc, tmp_reg2);
++      move(AT, tmp_reg2);
++      align(8); // must align
++      lstw(AT, 0, tmp_reg1);
++      rd_f(AT);
++      beq_l(AT, again);
++  }
++}
++
++void MacroAssembler::atomic_incw(Register counter_addr, Register tmp, Register tmp2) {
++  ShouldNotReachHere();
++}
++
++// Writes to stack successive pages until offset reached to check for
++// stack overflow + shadow pages.  This clobbers tmp.
++void MacroAssembler::bang_stack_size(Register size, Register tmp) {SCOPEMARK_NAME(bang_stack_size, this)
++  assert_different_registers(tmp, size, rscratch4);
++  movl(tmp, esp);
++  // Bang stack for total size given plus shadow page size.
++  // Bang one page at a time because large size can bang beyond yellow and
++  // red zones.
++  Label loop;
++  bind(loop);
++  mov_immediate64(rscratch4, (-os::vm_page_size()));
++  stw(size, Address(tmp, rscratch4));
++  //mov_immediate64(rscratch4, os::vm_page_size());
++  addptr(tmp, rscratch4, tmp);
++  addptr(size, rscratch4, size);
++  jcc(Assembler::greater, loop, size);
++
++  // Bang down shadow pages too.
++  // At this point, (tmp-0) is the last address touched, so don't
++  // touch it again.  (It was touched as (tmp-pagesize) but then tmp
++  // was post-decremented.)  Skip this address by starting at i=1, and
++  // touch a few more pages below.  N.B.  It is important to touch all
++  // the way down including all pages in the shadow zone.
++  for (int i = 1; i < ((int)JavaThread::stack_shadow_zone_size() / os::vm_page_size()); i++) {
++    // this could be any sized move but this is can be a debugging crumb
++    // so the bigger the better.
++    mov_immediate64(rscratch4, (-i*os::vm_page_size()));
++    stptr(size, Address(tmp, rscratch4));
++  }
++}
++
++void MacroAssembler::reserved_stack_check() {
++    // testing if reserved zone needs to be enabled
++  Label no_reserved_zone_enabling;
++  Register thread = rthread;
++
++  cmpptr(esp, Address(thread, JavaThread::reserved_stack_activation_offset()));
++  jcc(Assembler::below, no_reserved_zone_enabling);
++
++  call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::enable_stack_reserved_zone), thread);
++  jump(RuntimeAddress(StubRoutines::throw_delayed_StackOverflowError_entry()));
++  should_not_reach_here("throw_delayed_StackOverflowError_entry");
++
++  bind(no_reserved_zone_enabling);
++}
++
++int MacroAssembler::biased_locking_enter(Register lock_reg,
++                                         Register obj_reg,
++                                         Register swap_reg,
++                                         Register tmp_reg,
++                                         bool swap_reg_contains_mark,
++                                         Label& done,
++                                         Label* slow_case,
++                                         BiasedLockingCounters* counters) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++  assert(tmp_reg != noreg, "tmp_reg must be supplied");
++  assert_different_registers(lock_reg, obj_reg, swap_reg, tmp_reg);
++  assert(markOopDesc::age_shift == markOopDesc::lock_bits + markOopDesc::biased_lock_bits, "biased locking makes assumptions about bit layout");
++  Address mark_addr      (obj_reg, oopDesc::mark_offset_in_bytes());
++
++  if (PrintBiasedLockingStatistics && counters == NULL) {
++    counters = BiasedLocking::counters();
++  }
++  // Biased locking
++  // See whether the lock is currently biased toward our thread and
++  // whether the epoch is still valid
++  // Note that the runtime guarantees sufficient alignment of JavaThread
++  // pointers to allow age to be placed into low bits
++  // First check to see whether biasing is even enabled for this object
++  Label cas_label;
++  int null_check_offset = -1;
++  if (!swap_reg_contains_mark) {
++    null_check_offset = offset();
++    ldptr(swap_reg, mark_addr);
++  }
++  bis(R0, swap_reg, tmp_reg);
++  andptr(tmp_reg, markOopDesc::biased_lock_mask_in_place, tmp_reg);
++  cmpptr(tmp_reg, markOopDesc::biased_lock_pattern);
++  jcc(Assembler::notEqual, cas_label);
++  // The bias pattern is present in the object's header. Need to check
++  // whether the bias owner and the epoch are both still current.
++  if (swap_reg_contains_mark) {
++    null_check_offset = offset();
++  }
++  load_prototype_header(tmp_reg, obj_reg);
++  orptr(tmp_reg, rthread, tmp_reg);
++  xorptr(swap_reg, tmp_reg, tmp_reg);
++  Register header_reg = tmp_reg;
++  andptr(header_reg, ~((int) markOopDesc::age_mask_in_place), header_reg);
++  move(rcc, header_reg);
++  if (counters != NULL) {
++    Label L;
++    jcc(Assembler::notZero, L, header_reg);
++    atomic_inc32((address)counters->biased_lock_entry_count_addr(), 1, rscratch1, rscratch2);
++    jmp(done);
++    bind(L);
++  } else {
++    jcc(Assembler::equal, done, header_reg);
++  }
++
++  Label try_revoke_bias;
++  Label try_rebias;
++
++  // At this point we know that the header has the bias pattern and
++  // that we are not the bias owner in the current epoch. We need to
++  // figure out more details about the state of the header in order to
++  // know what operations can be legally performed on the object's
++  // header.
++
++  // If the low three bits in the xor result aren't clear, that means
++  // the prototype header is no longer biased and we have to revoke
++  // the bias on this object.
++  testptr(header_reg, markOopDesc::biased_lock_mask_in_place);
++  jcc(Assembler::notZero, try_revoke_bias);
++
++  // Biasing is still enabled for this data type. See whether the
++  // epoch of the current bias is still valid, meaning that the epoch
++  // bits of the mark word are equal to the epoch bits of the
++  // prototype header. (Note that the prototype header's epoch bits
++  // only change at a safepoint.) If not, attempt to rebias the object
++  // toward the current thread. Note that we must be absolutely sure
++  // that the current epoch is invalid in order to do this because
++  // otherwise the manipulations it performs on the mark word are
++  // illegal.
++  testptr(header_reg, markOopDesc::epoch_mask_in_place);
++  jcc(Assembler::notZero, try_rebias);
++
++  // The epoch of the current bias is still valid but we know nothing
++  // about the owner; it might be set or it might be clear. Try to
++  // acquire the bias of the object using an atomic operation. If this
++  // fails we will go in to the runtime to revoke the object's bias.
++  // Note that we first construct the presumed unbiased header so we
++  // don't accidentally blow away another thread's valid bias.
++  andptr(swap_reg,
++         markOopDesc::biased_lock_mask_in_place | markOopDesc::age_mask_in_place | markOopDesc::epoch_mask_in_place, 
++         swap_reg);
++  bis(R0, swap_reg, tmp_reg);
++  orptr(tmp_reg, rthread, tmp_reg);
++  cmpxchg(tmp_reg, mark_addr, swap_reg);
++  // If the biasing toward our thread failed, this means that
++  // another thread succeeded in biasing it toward itself and we
++  // need to revoke that bias. The revocation will occur in the
++  // interpreter runtime in the slow case.
++  if (slow_case != NULL) {
++    jcc(Assembler::failed, *slow_case);
++  }
++  if (counters != NULL) {
++    Label L;
++    jcc(Assembler::success, L);
++    atomic_inc32((address)counters->anonymously_biased_lock_entry_count_addr(), 1, rscratch1, rscratch2);
++    BIND(L);
++  }
++  jmp(done);
++
++  bind(try_rebias);
++  // At this point we know the epoch has expired, meaning that the
++  // current "bias owner", if any, is actually invalid. Under these
++  // circumstances _only_, we are allowed to use the current header's
++  // value as the comparison value when doing the cas to acquire the
++  // bias in the current epoch. In other words, we allow transfer of
++  // the bias from one thread to another directly in this situation.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  load_prototype_header(tmp_reg, obj_reg);
++  orptr(tmp_reg, rthread, tmp_reg);
++  cmpxchg(tmp_reg, mark_addr, swap_reg);
++  // If the biasing toward our thread failed, then another thread
++  // succeeded in biasing it toward itself and we need to revoke that
++  // bias. The revocation will occur in the runtime in the slow case.
++  if (slow_case != NULL) {
++    jcc(Assembler::failed, *slow_case);
++    }
++  if (counters != NULL) {
++    Label L;
++    jcc(Assembler::success, L);
++    atomic_inc32((address) counters->rebiased_lock_entry_count_addr(), 1, rscratch1, rscratch2);
++    BIND(L);
++  }
++  jmp(done);
++
++  bind(try_revoke_bias);
++  // The prototype mark in the klass doesn't have the bias bit set any
++  // more, indicating that objects of this data type are not supposed
++  // to be biased any more. We are going to try to reset the mark of
++  // this object to the prototype value and fall through to the
++  // CAS-based locking scheme. Note that if our CAS fails, it means
++  // that another thread raced us for the privilege of revoking the
++  // bias of this particular object, so it's okay to continue in the
++  // normal locking code.
++  //
++  // FIXME: due to a lack of registers we currently blow away the age
++  // bits in this situation. Should attempt to preserve them.
++  load_prototype_header(tmp_reg, obj_reg);
++  cmpxchg(tmp_reg, mark_addr, swap_reg);
++  // Fall through to the normal CAS-based lock, because no matter what
++  // the result of the above CAS, some thread must have succeeded in
++  // removing the bias bit from the object's header.
++  jcc(Assembler::failed, cas_label, AT);
++  if (counters != NULL) {
++    atomic_inc32((address) counters->revoked_lock_entry_count_addr(), 1, rscratch1, rscratch2);
++  }
++
++  bind(cas_label);
++
++  return null_check_offset;
++}
++
++void MacroAssembler::biased_locking_exit(Register obj_reg, Register temp_reg, Label& done) {
++  assert(UseBiasedLocking, "why call this otherwise?");
++
++  // Check for biased locking unlock case, which is a no-op
++  // Note: we do not have to check the thread ID for two reasons.
++  // First, the interpreter checks for IllegalMonitorStateException at
++  // a higher level. Second, if the bias was revoked while we held the
++  // lock, the object could not be rebiased toward another thread, so
++  // the bias bit would be clear.
++  ldl(temp_reg, oopDesc::mark_offset_in_bytes(), obj_reg);
++  andi(temp_reg, markOopDesc::biased_lock_mask_in_place, temp_reg);
++  addiu(R0, markOopDesc::biased_lock_pattern, AT);
++  cmpptr(AT, temp_reg);
++  jcc(Assembler::equal, done);
++}
++#ifdef COMPILER2
++// tmp_reg1 and tmp_reg2 should be saved outside of atomic_inc32 (caller saved).
++void MacroAssembler::atomic_incw(AddressLiteral counter_addr, int inc, Register tmp_reg1) {
++  Label again;
++  assert_different_registers(tmp_reg1, rscratch1_GP, rscratch2_AT);
++  assert(Assembler::operand_valid_for_simple_type_instruction_immediate(inc), "exceed limit");
++  Register tmp_reg2 = rscratch2_AT;//TODO:check we donot need tmp_reg2 jzy
++  SizedScope sc(this, 64);
++  if(UseSW8A) {
++      mov_immediate64(tmp_reg1, (intptr_t) counter_addr.target(), counter_addr.rspec());
++      bind(again);
++      lldw(tmp_reg2, 0, tmp_reg1);
++      addl(tmp_reg2, inc, tmp_reg2);
++      move(rscratch2_AT, tmp_reg2);
++      lstw(rscratch2_AT, 0, tmp_reg1);
++      beq_l(rscratch2_AT, again);
++  } else {
++      mov_immediate64(tmp_reg1, (intptr_t) counter_addr.target(), counter_addr.rspec());
++      bind(again);
++      lldw(tmp_reg2, 0, tmp_reg1);
++      ldi(rscratch1_GP, 1, R0);
++      wr_f(rscratch1_GP);
++      addl(tmp_reg2, inc, tmp_reg2);
++      move(rscratch2_AT, tmp_reg2);
++      align(8); // must align
++      lstw(rscratch2_AT, 0, tmp_reg1);
++      rd_f(rscratch2_AT);
++      beq_l(rscratch2_AT, again);
++  }
++}
++// Fast_Lock and Fast_Unlock used by C2
++
++// Because the transitions from emitted code to the runtime
++// monitorenter/exit helper stubs are so slow it's critical that
++// we inline both the stack-locking fast-path and the inflated fast path.
++//
++// See also: cmpFastLock and cmpFastUnlock.
++//
++// What follows is a specialized inline transliteration of the code
++// in slow_enter() and slow_exit().  If we're concerned about I$ bloat
++// another option would be to emit TrySlowEnter and TrySlowExit methods
++// at startup-time.  These methods would accept arguments as
++// (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
++// indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
++// marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
++// In practice, however, the # of lock sites is bounded and is usually small.
++// Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
++// if the processor uses simple bimodal branch predictors keyed by EIP
++// Since the helper routines would be called from multiple synchronization
++// sites.
++//
++// An even better approach would be write "MonitorEnter()" and "MonitorExit()"
++// in java - using j.u.c and unsafe - and just bind the lock and unlock sites
++// to those specialized methods.  That'd give us a mostly platform-independent
++// implementation that the JITs could optimize and inline at their pleasure.
++// Done correctly, the only time we'd need to cross to native could would be
++// to park() or unpark() threads.  We'd also need a few more unsafe operators
++// to (a) prevent compiler-JIT reordering of non-volatile accesses, and
++// (b) explicit barriers or fence operations.
++//
++// TODO:
++//
++// *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
++//    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
++//    Given TLAB allocation, Self is usually manifested in a register, so passing it into
++//    the lock operators would typically be faster than reifying Self.
++//
++// *  Ideally I'd define the primitives as:
++//       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
++//       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
++//    Unfortunately ADLC bugs prevent us from expressing the ideal form.
++//    Instead, we're stuck with a rather awkward and brittle register assignments below.
++//    Furthermore the register assignments are overconstrained, possibly resulting in
++//    sub-optimal code near the synchronization site.
++//
++// *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
++//    Alternately, use a better sp-proximity test.
++//
++// *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
++//    Either one is sufficient to uniquely identify a thread.
++//    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
++//
++// *  Intrinsify notify() and notifyAll() for the common cases where the
++//    object is locked by the calling thread but the waitlist is empty.
++//    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
++//
++// *  use jccb and jmpb instead of jcc and jmp to improve code density.
++//    But beware of excessive branch density on AMD Opterons.
++//
++// *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
++//    or failure of the fast-path.  If the fast-path fails then we pass
++//    control to the slow-path, typically in C.  In Fast_Lock and
++//    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
++//    will emit a conditional branch immediately after the node.
++//    So we have branches to branches and lots of ICC.ZF games.
++//    Instead, it might be better to have C2 pass a "FailureLabel"
++//    into Fast_Lock and Fast_Unlock.  In the case of success, control
++//    will drop through the node.  ICC.ZF is undefined at exit.
++//    In the case of failure, the node will branch directly to the
++//    FailureLabel
++
++
++// obj: object to lock
++// box: on-stack box address (displaced header location) - KILLED
++// rax,: tmp -- KILLED
++// scr: tmp -- KILLED
++void MacroAssembler::fast_lock(Register objReg, Register boxReg, Register tmpReg,
++                               Register scrReg, Register cx1Reg, Register cx2Reg,
++                               BiasedLockingCounters* counters,
++                               Metadata* method_data,
++                               bool use_rtm, bool profile_rtm) {
++  // Ensure the register assignments are disjoint
++  assert(tmpReg == V0, "");
++  use_rtm     = false;
++  profile_rtm = false;
++
++  if (use_rtm) {//TODO:sw doesnot need this, we should delete this code jzy
++    assert_different_registers(objReg, boxReg, tmpReg, scrReg, cx1Reg, cx2Reg);
++  } else {
++    assert(cx1Reg == noreg, "");
++    assert(cx2Reg == noreg, "");
++    assert_different_registers(objReg, boxReg, tmpReg, scrReg);
++  }
++  
++  if (counters != NULL) {
++    atomic_incw(ExternalAddress((address)counters->total_entry_count_addr()), 1, rscratch3);//TODO:swjdk8 use OK? jzy
++  }
++  if (EmitSync & 1) {
++      // set box->dhw = markOopDesc::unused_mark()
++      // Force all sync thru slow-path: slow_enter() and slow_exit()
++      mov_immediate32s(rscratch3, (int32_t)intptr_t(markOopDesc::unused_mark()));
++      stl(rscratch3, Address(boxReg, 0));
++      cmpptr (esp, R0);
++  } else {
++    // Possible cases that we'll encounter in fast_lock
++    // ------------------------------------------------
++    // * Inflated
++    //    -- unlocked
++    //    -- Locked
++    //       = by self
++    //       = by other
++    // * biased
++    //    -- by Self
++    //    -- by other
++    // * neutral
++    // * stack-locked
++    //    -- by self
++    //       = sp-proximity test hits
++    //       = sp-proximity test generates false-negative
++    //    -- by other
++    //
++
++    Label IsInflated, DONE_LABEL;
++
++    // it's stack-locked, biased or neutral
++    // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
++    // order to reduce the number of conditional branches in the most common cases.
++    // Beware -- there's a subtle invariant that fetch of the markword
++    // at [FETCH], below, will never observe a biased encoding (*101b).
++    // If this invariant is not held we risk exclusion (safety) failure.
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++      biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, counters);
++    }
++
++    ldptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));          // [FETCH]
++    testptr(tmpReg, markOopDesc::monitor_value); // inflated vs stack-locked|neutral|biased
++    jcc(Assembler::notZero, IsInflated);
++
++    // Attempt stack-locking ...
++    orptr (tmpReg, markOopDesc::unlocked_value, tmpReg);
++    stptr(tmpReg, Address(boxReg, 0));          // Anticipate successful CAS
++    if (os::is_MP()) {
++      memb();
++    }
++    cmpxchg(boxReg, Address(objReg, oopDesc::mark_offset_in_bytes()), tmpReg);      // Updates tmpReg
++    //mov_immediate32(rcc, 1);
++    //cmove(Assembler::success, rcc, R0, rcc, rscratch2_AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++    
++    if (counters != NULL) {
++      Label skip;
++      jcc(Assembler::notZero, skip);//failed
++      atomic_incw(ExternalAddress((address)counters->fast_path_entry_count_addr()), 1, rscratch3);
++      bind(skip);
++    }
++    jcc(Assembler::zero, DONE_LABEL);           // Success
++
++    // Recursive locking.
++    // The object is stack-locked: markword contains stack pointer to BasicLock.
++    // Locked by current thread if difference with current SP is less than one page.
++    subptr(tmpReg, esp, tmpReg);
++    // Next instruction set ZFlag == 1 (Success) if difference is less then one page.
++    mov_immediate32s(rscratch3, (int32_t)(7 - os::vm_page_size() ));
++    andptr(tmpReg, rscratch3, tmpReg);//TODO:which value? jzy
++    move(rcc, tmpReg);
++    stptr(tmpReg, Address(boxReg, 0));
++    if (counters != NULL) {
++      Label skip;
++      jcc(Assembler::notEqual, skip);
++      atomic_incw(ExternalAddress((address)counters->fast_path_entry_count_addr()), 1, rscratch3);
++      bind(skip);
++    }
++    jmp(DONE_LABEL);
++
++    bind(IsInflated);
++    // The object is inflated. tmpReg contains pointer to ObjectMonitor* + markOopDesc::monitor_value
++
++    // It's inflated
++    movl(scrReg, tmpReg);
++    movl(tmpReg, R0);
++
++    if (os::is_MP()) {
++      memb();
++    }
++    cmpxchg(rthread, Address(scrReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), tmpReg);
++    //mov_immediate32(rcc, 1);
++    //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++    // Unconditionally set box->_displaced_header = markOopDesc::unused_mark().
++    // Without cast to int32_t movptr will destroy r10 which is typically obj.
++    mov_immediate32s(rscratch3, (int32_t)intptr_t(markOopDesc::unused_mark()));
++    stl(rscratch3, Address(boxReg, 0));
++    // Intentional fall-through into DONE_LABEL ...
++    // Propagate ICC.ZF from CAS above into DONE_LABEL.
++
++    // DONE_LABEL is a hot target - we'd really like to place it at the
++    // start of cache line by padding with NOPs.
++    // See the AMD and Intel software optimization manuals for the
++    // most efficient "long" NOP encodings.
++    // Unfortunately none of our alignment mechanisms suffice.
++    bind(DONE_LABEL);
++
++    // At DONE_LABEL the icc ZFlag is set as follows ...
++    // Fast_Unlock uses the same protocol.
++    // ZFlag == 1 -> Success
++    // ZFlag == 0 -> Failure - force control through the slow-path
++  }
++}
++
++// obj: object to unlock
++// box: box address (displaced header location), killed.  Must be EAX.
++// tmp: killed, cannot be obj nor box.
++//
++// Some commentary on balanced locking:
++//
++// Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
++// Methods that don't have provably balanced locking are forced to run in the
++// interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
++// The interpreter provides two properties:
++// I1:  At return-time the interpreter automatically and quietly unlocks any
++//      objects acquired the current activation (frame).  Recall that the
++//      interpreter maintains an on-stack list of locks currently held by
++//      a frame.
++// I2:  If a method attempts to unlock an object that is not held by the
++//      the frame the interpreter throws IMSX.
++//
++// Lets say A(), which has provably balanced locking, acquires O and then calls B().
++// B() doesn't have provably balanced locking so it runs in the interpreter.
++// Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
++// is still locked by A().
++//
++// The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
++// Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
++// should not be unlocked by "normal" java-level locking and vice-versa.  The specification
++// doesn't specify what will occur if a program engages in such mixed-mode locking, however.
++// Arguably given that the spec legislates the JNI case as undefined our implementation
++// could reasonably *avoid* checking owner in Fast_Unlock().
++// In the interest of performance we elide m->Owner==Self check in unlock.
++// A perfectly viable alternative is to elide the owner check except when
++// Xcheck:jni is enabled.
++
++void MacroAssembler::fast_unlock(Register objReg, Register boxReg, Register tmpReg, bool use_rtm) {SCOPEMARK_NAME(MacroAssembler::fast_unlock, this)
++  assert(boxReg == V0, "");
++  assert_different_registers(objReg, boxReg, tmpReg);
++  
++  if (EmitSync & 4) {
++    // Disable - inhibit all inlining.  Force control through the slow-path
++    cmpptr (esp, 0);
++  } else {
++    Label DONE_LABEL, Stacked, CheckSucc;
++
++    // Critically, the biased locking test must have precedence over
++    // and appear before the (box->dhw == 0) recursive stack-lock test.
++    if (UseBiasedLocking && !UseOptoBiasInlining) {
++       biased_locking_exit(objReg, tmpReg, DONE_LABEL);
++    }
++
++    cmpptr(Address(boxReg, 0), R0); // Examine the displaced header
++    jcc   (Assembler::zero, DONE_LABEL);            // 0 indicates recursive stack-lock
++    ldptr(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()));             // Examine the object's markword
++    testptr(tmpReg, markOopDesc::monitor_value);    // Inflated?
++    jcc  (Assembler::zero, Stacked);
++
++    // Despite our balanced locking property we still check that m->_owner == Self
++    // as java routines or native JNI code called by this thread might
++    // have released the lock.
++    // Refer to the comments in synchronizer.cpp for how we might encode extra
++    // state in _succ so we can avoid fetching EntryList|cxq.
++    //
++    // I'd like to add more cases in fast_lock() and fast_unlock() --
++    // such as recursive enter and exit -- but we have to be wary of
++    // I$ bloat, T$ effects and BP$ effects.
++    //
++    // If there's no contention try a 1-0 exit.  That is, exit without
++    // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
++    // we detect and recover from the race that the 1-0 exit admits.
++    //
++    // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
++    // before it STs null into _owner, releasing the lock.  Updates
++    // to data protected by the critical section must be visible before
++    // we drop the lock (and thus before any other thread could acquire
++    // the lock and observe the fields protected by the lock).
++    // IA32's memory-model is SPO, so STs are ordered with respect to
++    // each other and there's no need for an explicit barrier (fence).
++    // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
++#ifndef _LP64
++
++#else // _LP64
++    // It's inflated
++    if (EmitSync & 1024) {
++      // Emit code to check that _owner == Self
++      // We could fold the _owner test into subsequent code more efficiently
++      // than using a stand-alone check, but since _owner checking is off by
++      // default we don't bother. We also might consider predicating the
++      // _owner==Self check on Xcheck:jni or running on a debug build.
++      ldptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
++      xorptr(boxReg, rthread, boxReg);
++    } else {
++      movl(boxReg, R0);
++    }
++    ldptr(rscratch3, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(recursions)));
++    orptr(boxReg, rscratch3, rcc);//result should put in rcc
++    jcc  (Assembler::notZero, DONE_LABEL);
++    ldptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(cxq)));
++    ldptr(rscratch3, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(EntryList)));
++    orptr(boxReg, rscratch3, rcc);//result should put in rcc
++    jcc  (Assembler::notZero, CheckSucc);
++    stptr(R0, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
++    jmp  (DONE_LABEL);
++
++    if ((EmitSync & 65536) == 0) {block_comment(";;EmitSync & 65536");
++      // Try to avoid passing control into the slow_path ...
++      Label LSuccess, LGoSlowPath ;
++      bind  (CheckSucc);
++
++      // The following optional optimization can be elided if necessary
++      // Effectively: if (succ == null) goto SlowPath
++      // The code reduces the window for a race, however,
++      // and thus benefits performance.
++      cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), R0);
++      jcc  (Assembler::zero, LGoSlowPath);
++
++      movl(boxReg, R0);
++      if ((EmitSync & 16) && os::is_MP()) {
++        xchgptr(boxReg, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
++      } else {
++        stptr(R0, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)));
++        if (os::is_MP()) {
++          // Memory barrier/fence
++          // Dekker pivot point -- fulcrum : ST Owner; MEMBAR; LD Succ
++          // Instead of MFENCE we use a dummy locked add of 0 to the top-of-stack.
++          // This is faster on Nehalem and AMD Shanghai/Barcelona.
++          // See https://blogs.oracle.com/dave/entry/instruction_selection_for_volatile_fences
++          // We might also restructure (ST Owner=0;barrier;LD _Succ) to
++          // (mov box,0; xchgq box, &m->Owner; LD _succ) .
++          //lock(); addl(Address(rsp, 0), 0);
++          memb();//TODO:how to resolve this ? jzy
++        }
++      }
++      cmpptr(Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(succ)), R0);
++      jcc  (Assembler::notZero, LSuccess);
++
++      // Rare inopportune interleaving - race.
++      // The successor vanished in the small window above.
++      // The lock is contended -- (cxq|EntryList) != null -- and there's no apparent successor.
++      // We need to ensure progress and succession.
++      // Try to reacquire the lock.
++      // If that fails then the new owner is responsible for succession and this
++      // thread needs to take no further action and can exit via the fast path (success).
++      // If the re-acquire succeeds then pass control into the slow path.
++      // As implemented, this latter mode is horrible because we generated more
++      // coherence traffic on the lock *and* artifically extended the critical section
++      // length while by virtue of passing control into the slow path.
++
++      // box is really RAX -- the following CMPXCHG depends on that binding
++      // cmpxchg R,[M] is equivalent to rax = CAS(M,rax,R)
++      if (os::is_MP()) { memb(); }
++      cmpxchg(rthread, Address(tmpReg, OM_OFFSET_NO_MONITOR_VALUE_TAG(owner)), boxReg);
++      //mov_immediate32(rcc, 1);
++      //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++      // There's no successor so we tried to regrab the lock.
++      // If that didn't work, then another thread grabbed the
++      // lock so we're done (and exit was a success).
++      jcc  (Assembler::notEqual, LSuccess);
++      // Intentional fall-through into slow-path
++
++      BIND  (LGoSlowPath);
++      mov_immediate32u  (rcc, 1);                      // set ICC.ZF=0 to indicate failure
++      jmp  (DONE_LABEL);
++
++      BIND  (LSuccess);
++      mov_immediate32u (rcc, 0);                      // set ICC.ZF=1 to indicate success
++      jmp  (DONE_LABEL);
++    }
++
++    BIND  (Stacked);
++    ldptr(tmpReg, Address (boxReg, 0));      // re-fetch
++    if (os::is_MP()) { memb(); }
++    cmpxchg(tmpReg, Address(objReg, oopDesc::mark_offset_in_bytes()), boxReg); // Uses RAX which is box
++    assert(oopDesc::mark_offset_in_bytes() == 0, "offset of _mark is not 0");
++    //mov_immediate32(rcc, 1);
++    //cmove(Assembler::success, rcc, R0, rcc, rscratch2_AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++    
++    if (EmitSync & 65536) {
++       bind (CheckSucc);
++    }
++#endif
++    BIND(DONE_LABEL);
++  }
++}
++#endif // COMPILER2
++void MacroAssembler::generate_fill(BasicType t, bool aligned,
++                                   Register to, Register value, Register count,
++                                   Register rtmp) {
++  //ShortBranchVerifier sbv(this); //sw need this? jzy
++  assert_different_registers(to, value, count, rtmp);
++  Label L_exit, L_skip_align1, L_skip_align2, L_fill_byte;
++  Label L_fill_2_bytes, L_fill_4_bytes;
++
++  int shift = -1;
++  switch (t) {
++    case T_BYTE:
++      shift = 2;
++      break;
++    case T_SHORT:
++      shift = 1;
++      break;
++    case T_INT:
++      shift = 0;
++      break;
++    default: ShouldNotReachHere();
++  }
++
++  if (t == T_BYTE) {
++    andw(value, 0xff, value);
++    movl(rtmp, value);
++    slll(rtmp, 8, rtmp);
++    orw(value, rtmp, value);
++  }
++  if (t == T_SHORT) {
++    andw(value, 0xffff, value);
++  }
++  if (t == T_BYTE || t == T_SHORT) {
++    movw(rtmp, value);
++    slll(rtmp, 16, rtmp);
++    orw(value, rtmp, value);
++  }
++
++  cmpw(count, 2<<shift); // Short arrays (< 8 bytes) fill by element
++  jcc(Assembler::below, L_fill_4_bytes); // use unsigned cmp
++  if (!aligned && (t == T_BYTE || t == T_SHORT)) {
++    // align source address at 4 bytes address boundary
++    if (t == T_BYTE) {
++      // One byte misalignment happens only for byte arrays
++      testptr(to, 1);
++      jcc(Assembler::zero, L_skip_align1);
++      stb(value, Address(to, 0));
++      increment(to);
++      decrement(count);
++      BIND(L_skip_align1);
++    }
++    // Two bytes misalignment happens only for byte and short (char) arrays
++    testptr(to, 2);
++    jcc(Assembler::zero, L_skip_align2);
++    sth(value, Address(to, 0));
++    addptr(to, 2, to);
++    subw(count, 1<<(shift-1), count);
++    BIND(L_skip_align2);
++  }
++  {
++    Label L_fill_32_bytes_loop, L_check_fill_8_bytes, L_fill_8_bytes_loop, L_fill_8_bytes;
++    // Fill 32-byte chunks
++    subw(count, 8 << shift, count);
++    jcc(Assembler::less, L_check_fill_8_bytes, count);
++    align(16);
++
++    BIND(L_fill_32_bytes_loop);
++
++    for (int i = 0; i < 32; i += 4) {
++      stw(value, Address(to, i));
++    }
++
++    addptr(to, 32, to);
++    subw(count, 8 << shift, count);
++    jcc(Assembler::greaterEqual, L_fill_32_bytes_loop, count);
++    BIND(L_check_fill_8_bytes);
++    addw(count, 8 << shift, count);
++    jcc(Assembler::zero, L_exit, count);
++    jmp(L_fill_8_bytes);
++
++    //
++    // length is too short, just fill qwords
++    //
++    BIND(L_fill_8_bytes_loop);
++    stw(value, Address(to, 0));
++    stw(value, Address(to, 4));
++    addptr(to, 8, to);
++    BIND(L_fill_8_bytes);
++    subw(count, 1 << (shift + 1), count);
++    jcc(Assembler::greaterEqual, L_fill_8_bytes_loop, count);
++    // fall through to fill 4 bytes
++  }
++  // fill trailing 4 bytes
++  bind(L_fill_4_bytes);
++  testw(count, 1<<shift);
++  jcc(Assembler::zero, L_fill_2_bytes);
++  stw(value, Address(to, 0));
++  if (t == T_BYTE || t == T_SHORT) {
++    addptr(to, 4, to);
++    bind(L_fill_2_bytes);
++    // fill trailing 2 bytes
++    testw(count, 1<<(shift-1));
++    jcc(Assembler::zero, L_fill_byte);
++    sth(value, Address(to, 0));
++    if (t == T_BYTE) {
++      addptr(to, 2, to);
++      bind(L_fill_byte);
++      // fill trailing byte
++      testw(count, 1);
++      jcc(Assembler::zero, L_exit);
++      stb(value, Address(to, 0));
++    } else {
++      bind(L_fill_byte);
++    }
++  } else {
++    bind(L_fill_2_bytes);
++  }
++  bind(L_exit);
++}
++
++
++void MacroAssembler::c2bool(Register x) {
++  Assembler::selne(x, 1, R0, x);
++}
++
++void MacroAssembler::call(Register entry, Label *retAddr) {
++  if (entry != pv) movl(pv, entry);
++  Assembler::call(RA, pv, 0);
++  if (retAddr) 
++    bind(*retAddr);
++  if(UseSetfpec)
++    setfpec1();
++}
++
++void MacroAssembler::call(Register entry, address& retAddr) {
++  if (entry != pv) movl(pv, entry);
++  Assembler::call(RA, pv, 0);
++  retAddr = pc();
++  if(UseSetfpec)
++    setfpec1();
++}
++
++void MacroAssembler::call(AddressLiteral addr, Label *retAddr, Register tmp) {
++  mov_address64(tmp, (intptr_t)addr.target(), addr.rspec());
++  Assembler::call(RA, tmp, 0);
++  if (retAddr) 
++    bind(*retAddr);
++  if(UseSetfpec)
++    setfpec1();
++}
++
++void MacroAssembler::call(RuntimeAddress addr, Label *retAddr, Register tmp) {
++  mov_address64(tmp, (intptr_t)addr.target(), addr.rspec());
++  Assembler::call(RA, tmp, 0);
++  if (retAddr)
++    bind(*retAddr);
++  if(UseSetfpec)
++    setfpec1();
++}
++
++void MacroAssembler::ic_call(address entry, jint method_index) {
++  //ShouldNotReachHere(); //TODO:just check jzy
++//  return 0;
++  RelocationHolder rh = virtual_call_Relocation::spec(pc(), method_index);
++  prepare_patch_li48(V0, (intptr_t)Universe::non_oop_word()); //V0 is IC_Klass ?? LSP 
++  call(AddressLiteral(entry, rh));
++//  return pc();
++}
++
++// Implementation of call_VM versions
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             bool check_exceptions) {
++  call_VM_helper(oop_result, entry_point, 0, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  if (arg_1!=A1) bis(R0, arg_1, A1);
++  call_VM_helper(oop_result, entry_point, 1, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  if (arg_1!=A1) bis(R0, arg_1, A1);
++  if (arg_2!=A2) bis(R0, arg_2, A2);
++  assert(arg_2 != A1, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 2, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  if (arg_1!=A1) bis(R0, arg_1, A1);
++  if (arg_2!=A2) bis(R0, arg_2, A2); assert(arg_2 != A1, "smashed argument");
++  if (arg_3!=A3) bis(R0, arg_3, A3); assert(arg_3 != A1 && arg_3 != A2, "smashed argument");
++  call_VM_helper(oop_result, entry_point, 3, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             int number_of_arguments,
++                             bool check_exceptions) {
++  call_VM_base(oop_result, rthread, last_java_sp, entry_point, number_of_arguments, check_exceptions);
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             bool check_exceptions) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             bool check_exceptions) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::call_VM(Register oop_result,
++                             Register last_java_sp,
++                             address entry_point,
++                             Register arg_1,
++                             Register arg_2,
++                             Register arg_3,
++                             bool check_exceptions) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::call_VM_base(Register oop_result,
++                                  Register java_thread,
++                                  Register last_java_sp,
++                                  address  entry_point,
++                                  int      number_of_arguments,
++                                  bool     check_exceptions) {SCOPEMARK_NAME(MacroAssembler::call_VM_base, this)
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++    java_thread = rthread;
++  }
++
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
++
++  // debugging support
++  assert(number_of_arguments >= 0   , "cannot have negative number of arguments");
++  assert(number_of_arguments <= 4   , "6 - rthread - ?? ");
++  assert(java_thread == rthread     , "unexpected register");
++#ifdef ASSERT
++  // TraceBytecodes does not use r12 but saves it over the call, so don't verify
++  // r12 is the heapbase.
++  if ((UseCompressedOops || UseCompressedClassPointers) && !TraceBytecodes) verify_heapbase("call_VM_base: heap base corrupted?");
++#endif // ASSERT
++  
++  movl(c_rarg0, rthread);
++  
++  // set last Java frame before call
++  assert(last_java_sp != rfp, "can't use ebp/rbp");
++  
++  // set last Java frame before call
++  address before_call_pc = (address)pc();
++  set_last_Java_frame(last_java_sp, rfp, before_call_pc, rscratch1);
++//  Label l;
++//  set_last_Java_frame(last_java_sp, rfp, l, rscratch1);
++  // do the call
++//  MacroAssembler::call_VM_leaf_base(entry_point, number_of_arguments, &l);
++//  call(entry_point, relocInfo::runtime_call_type);
++  call(RuntimeAddress(entry_point));
++  // reset last Java frame
++  reset_last_Java_frame(java_thread, true); //<TODO: why false? jzy>
++
++  check_and_handle_popframe(java_thread);
++  check_and_handle_earlyret(java_thread);
++  
++  if (check_exceptions) {
++    // check for pending exceptions (java_thread is set upon return)
++    cmpptr(Address(java_thread, Thread::pending_exception_offset()), (int32_t) NULL_WORD);
++    
++    // This used to conditionally jump to forward_exception however it is
++    // possible if we relocate that the branch will not reach. So we must jump
++    // around so we can always reach
++
++    Label ok;
++    jcc(Assembler::equal, ok);
++    jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++    bind(ok);
++  }
++
++  // get oop result if there is one and reset the value in the thread
++  if (oop_result->is_valid()) {
++   get_vm_result(oop_result, java_thread);
++  }
++}
++
++void MacroAssembler::call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions) {SCOPEMARK_NAME(MacroAssembler::call_VM_helper, this)
++  bis(R0, esp, V0);
++  call_VM_base(oop_result, noreg, V0, entry_point, number_of_arguments, check_exceptions);
++}
++
++// Use this method when MacroAssembler version of call_VM_leaf_base() should be called from Interpreter.
++void MacroAssembler::call_VM_leaf0(address entry_point) {
++  MacroAssembler::call_VM_leaf_base(entry_point, 0);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, int number_of_arguments) {
++  call_VM_leaf_base(entry_point, number_of_arguments);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  call_VM_leaf(entry_point, 1);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  call_VM_leaf(entry_point, 2);
++}
++
++void MacroAssembler::call_VM_leaf(address entry_point, Register arg_0,
++                                  Register arg_1, Register arg_2) {
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  call_VM_leaf(entry_point, 3);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0) {
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 1);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1) {
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 2);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2) {
++  assert(arg_0 != c_rarg2, "smashed arg");
++  assert(arg_1 != c_rarg2, "smashed arg");
++  pass_arg2(this, arg_2);
++  assert(arg_0 != c_rarg1, "smashed arg");
++  pass_arg1(this, arg_1);
++  pass_arg0(this, arg_0);
++  MacroAssembler::call_VM_leaf_base(entry_point, 3);
++}
++
++void MacroAssembler::super_call_VM_leaf(address entry_point, Register arg_0, Register arg_1, Register arg_2, Register arg_3) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::get_vm_result(Register oop_result, Register java_thread) {
++  ld(oop_result, Address(java_thread, JavaThread::vm_result_offset()));
++  std(R0, Address(java_thread, JavaThread::vm_result_offset()));
++  verify_oop(oop_result, "broken oop in call_VM_base");
++}
++
++void MacroAssembler::get_vm_result_2(Register metadata_result, Register java_thread) {
++  ld(metadata_result, Address(java_thread, JavaThread::vm_result_2_offset()));
++  std(R0, Address(java_thread, JavaThread::vm_result_2_offset()));
++}
++
++// these are no-ops overridden by InterpreterMacroAssembler
++
++void MacroAssembler::check_and_handle_earlyret(Register java_thread) { }
++
++void MacroAssembler::check_and_handle_popframe(Register java_thread) { }
++
++void MacroAssembler::cmpb(Register lh, int imm8, Register ccReg) {
++  if (is_uimm8(imm8)) {
++    ldi(ccReg, -imm8, lh);
++  } else {
++    Unimplemented();
++  }  
++}
++
++void MacroAssembler::cmpb(Address addr, int imm8, Register ccReg) {
++  ldbu(ccReg, addr);
++  cmpb(ccReg, imm8, ccReg);
++}
++
++void MacroAssembler::cmpb(AddressLiteral src1, int imm8, Register ccReg) {
++  mov_immediate64(ccReg, (intptr_t)src1.target(), src1.rspec());
++  ldbu(ccReg, 0, ccReg);
++  cmpb(ccReg, imm8, ccReg);
++}
++
++void MacroAssembler::cmph(Address addr, int imm16, Register ccReg) {
++  ldhu(ccReg, addr);
++  ldi(ccReg, -imm16, ccReg);
++}
++
++/**
++ * x86
++ *  Assembler::cmpl(Register dst, int32_t imm32)
++ * sw64
++ *  MacroAssembler::cmpw(Register lh, int rh, Register ccReg=rcc)
++ * note
++ *  lh is signed 64bit int in register.
++ *  rh is a sign extended 64bit int.
++ */
++void MacroAssembler::cmpw(Register lh, int rh, Register ccReg) {
++  //if (-(1 << 16-1) <= rh  &&  rh < ( 1 << 16-1)) {
++  if (Assembler::operand_valid_for_storage_type_instruction_immediate(rh)) {
++    ldi(ccReg, -rh, lh);
++  } else {
++    assert_different_registers(lh, ccReg);
++    mov_immediate64(ccReg, rh);// 考虑到rh如果是负数会占满64位, 因此要mov_imm64
++    subl(lh, ccReg, ccReg);
++  }
++}
++
++/**
++ * x86
++ *  Assembler::cmpl(Register dst, Register src)
++ * sw64
++ *  MacroAssembler::cmpw(Register lh, Register rh, Register ccReg=rcc)
++ * note
++ *  it's a convention that lh and rh are signed extended int in 64bit reg,
++ */
++void MacroAssembler::cmpw(Register lh, Register rh, Register ccReg) {
++  assert_different_registers(lh, ccReg);
++  assert_different_registers(rh, ccReg);
++  subl(lh, rh, ccReg);// 考虑到lh和rh都是符号扩展到64位的int, 这里用subl肯定不会有下溢或上溢的问题
++}
++
++/**
++ * x86
++ *  Assembler::cmpl(Register dst, Address  src)
++ * sw64
++ *  MacroAssembler::cmpw(Register lh, Address rh, Register ccReg=rcc)
++ * note
++ *  lh holds a signed extended 64bit int.
++ */
++void MacroAssembler::cmpw(Register lh, Address rh, Register ccReg) {
++  assert_different_registers(lh, ccReg);
++  ldws(ccReg, rh);
++  subl(lh, ccReg, ccReg);
++}
++
++/**
++ * x86
++ *  Assembler::cmpl(Address dst, int32_t imm32)
++ * sw64
++ *  MacroAssembler::cmpw(Address lh, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1)
++ * note
++ *  imm will be treated as a signed extened 64bit int.
++ */
++void MacroAssembler::cmpw(Address lh, int32_t imm, Register ccReg, Register tmp) {
++  assert_different_registers(ccReg, tmp);
++  ldws(tmp, lh);
++  if (imm == 0) {
++    movl(ccReg, tmp);
++  } else {
++    mov_immediate64(ccReg, imm);
++    subl(tmp, ccReg, ccReg);
++  }
++}
++
++/**
++ * x86
++ *  to del
++ * @param lh
++ * @param rh
++ * @param ccReg
++ */
++void MacroAssembler::cmpw(Address lh, Register rh,  Register ccReg) {
++  cmpw(rh, lh, ccReg);
++  subl(R0, ccReg, ccReg);
++}
++
++/**
++ * x86
++ *
++ * sw64
++ *  MacroAssembler::cmpw(AddressLiteral src1, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1)
++ * note
++ *  imm is a sign extended 64bit int.
++ *  ccReg and tmp can't be the same reg.
++ */
++void MacroAssembler::cmpw(AddressLiteral src1, int32_t imm, Register ccReg, Register tmp) {
++  ldws(tmp, src1);
++  cmpw(tmp, imm, ccReg);
++}
++
++void MacroAssembler::cmpw(AddressLiteral src1, Register rh, Register ccReg) {
++  ldws(ccReg, src1);
++  subl(ccReg, rh, ccReg);
++}
++
++/**
++ * x86
++ *  
++ * sw64
++ *  
++ * note
++ *  compare lh and rh as unsigned word
++ */
++void MacroAssembler::cmpwu(Register lh, Address rh, Register ccReg) {
++  ldwu(ccReg, rh);
++  movw(lh, lh);      //as unsigned int
++  subl(lh, ccReg, ccReg);
++  addw(lh, R0, lh);
++}
++
++void MacroAssembler::cmpl(Register lh, int rh, Register ccReg) {
++  // yj todo: is ldi ok here?
++  guarantee(-(1 << 16-1) <= rh  &&  rh < ( 1 << 16-1), "rh value out of simm16");
++  ldi(ccReg, -rh, lh);
++}
++
++/**
++ * x86
++ *  cmpq(Register dst, Register src)
++ * sw64
++ *  cmpl_raw(Register lh, Register rh, Register ccReg=rcc)
++ * note
++ *  64bit compare and set result into ccReg.
++ *  just sub lh to rh, don't consider overflow and underflow of the result, use carefully
++ */
++void MacroAssembler::cmpl_raw(Register lh, Register rh, Register ccReg) {
++  subl(lh, rh, ccReg);
++}
++
++// use cmpl_raw ASAP
++void MacroAssembler::cmpq(Register lh, Register rh, Register ccReg) {
++  set_cmp_insn_mark(lh, rh, true);
++  subl(lh, rh, ccReg);
++}
++
++//similar to cmpl
++void MacroAssembler::cmpUL(Register lh, Register rh, Register ccReg) {
++  assert_different_registers(lh, ccReg);
++  assert_different_registers(rh, ccReg);
++  xorptr(lh, rh, ccReg); //check sign
++  bge(ccReg, 2);// if same sign, just sub
++  selge(rh, 1, rh, ccReg);
++  Assembler::br(R0, 1);
++  subl(lh, rh, ccReg);
++}
++
++void MacroAssembler::set_cmp_insn_mark(Register lh, Register rh, bool lcmp) {
++  cmp_insn_mark = pc();
++  cmp_lh = lh;
++  cmp_rh = rh;
++  cmp_long = lcmp;
++}
++
++void MacroAssembler::clear_cmp_insn_mark() {
++  cmp_insn_mark = NULL;
++  cmp_lh = noreg;
++  cmp_rh = noreg;
++  cmp_long = false;
++}
++
++bool MacroAssembler::cmp_insn_marked() {
++  return cmp_insn_mark != NULL;
++}
++
++// beside cmp, there can be test before jcc or nothing, and sub/add can set cc too
++void MacroAssembler::jccb(Condition cc, Label& L) {
++  switch(cc) {
++    case equal:
++//    case zero:
++//    case carryClear:
++      beq_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case notEqual:
++//    case notZero:
++//    case carrySet:
++      bne_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case greaterEqual:
++      if (cmp_insn_marked() && cmp_long) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmple(cmp_rh, cmp_lh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        bge_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case notNegative:
++      bge_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case aboveEqual:// unsigned >=
++      if (cmp_insn_marked()) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmpule(cmp_rh, cmp_lh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        bge_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case greater:
++      if (cmp_insn_marked() && cmp_long) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmplt(cmp_rh, cmp_lh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        bgt_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case positive:
++      bgt_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case above:// unsigned >
++      if (cmp_insn_marked()) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmpult(cmp_rh, cmp_lh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        bgt_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case lessEqual:
++      if (cmp_insn_marked() && cmp_long) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmple(cmp_lh, cmp_rh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        ble_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case belowEqual: //unsigned <=
++      if (cmp_insn_marked()) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmpule(cmp_lh, cmp_rh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        ble_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case less:
++      if (cmp_insn_marked() && cmp_long) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmplt(cmp_lh, cmp_rh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        blt_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case below: // unsigned <
++      if (cmp_insn_marked()) {
++        InstructionMark mark(this);
++        code_section()->set_end(cmp_insn_mark);
++        cmpult(cmp_lh, cmp_rh, rcc);
++        code_section()->set_end(inst_mark());
++        bne_l(rcc, L);
++      } else
++        blt_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    case negative:
++      blt_l(rcc, L);
++      clear_cmp_insn_mark();
++      break;
++    default:
++      Unimplemented();
++  }
++}
++
++/**
++ * x86
++ *  cmpq(Register dst, Register src)
++ * sw64
++ *  cmpl(Register lh, Register rh, Register ccReg=rcc)
++ * note
++ *  64bit compare and set result into ccReg.
++ *  just sub lh to rh can cause overflow or underflow of the result, so this compare is more complex.
++ */
++void MacroAssembler::cmpl(Register lh, Register rh, Register ccReg) {
++  assert_different_registers(lh, ccReg);
++  assert_different_registers(rh, ccReg);
++  xorptr(lh, rh, ccReg); //check sign
++  bge(ccReg, 2);// if same sign, just sub
++  selge(lh, 1, lh, ccReg); // if not && lh >= 0, ccReg=1, else ccReg = -1
++  Assembler::br(R0, 1);
++  subl(lh, rh, ccReg);
++}
++
++void MacroAssembler::cmpptr(Register lh, Address rh, Register ccReg) {
++  ldptr(ccReg, rh);
++  cmpptr(lh, ccReg, ccReg);
++}
++
++void MacroAssembler::cmpptr(Address lh, Register rh,  Register ccReg) {
++  ldptr(ccReg, lh);
++  cmpptr(ccReg, rh, ccReg);
++}
++
++void MacroAssembler::cmpptr(Address lh, int32_t rh,  Register ccReg) {
++  ldptr(ccReg, lh);
++  cmpl(ccReg, rh, ccReg);//TODO:refactor jzy
++}
++
++void MacroAssembler::cmpptr(Register lh, int rh, Register ccReg) {
++  guarantee(-(1 << 16-1) <= rh  &&  rh < ( 1 << 16-1), "rh value out of simm16");
++  ldi(ccReg, -rh, lh);
++}
++
++/**
++ * x86:
++ *  cmpptr(Register src1, Register src2)
++ * sw64:
++ *  cmpptr(Register src1, Register src2, Register ccReg=rcc)
++ * note:
++ *  Sw64 use `rcc` as default compare result reg.
++ *  The result should be consumed by instructions (e.g. `jcc`) ASAP with no interleaving 
++ *  instructions (e.g. `stx(reg, Address)`) that will clobber rcc by default.
++ *  Note that ldx(reg, Address) don't need temp reg.
++ */
++void MacroAssembler::cmpptr(Register src1, Register src2, Register ccReg) {
++  subl(src1, src2, ccReg);
++}
++
++/**
++ * x86
++ * 
++ * sw64
++ * 
++ * note
++ * 
++ */
++void MacroAssembler::cmpptr(Register lh, AddressLiteral rh, Register ccReg) {
++  if (rh.is_lval()) {
++    lea(ccReg, rh);
++    cmpptr(lh, ccReg, ccReg);
++  } else {
++    ldptr(ccReg, rh);
++    cmpptr(lh, ccReg, ccReg);
++  }
++}
++
++/**
++ * x86:
++ *  movq/movl/movw(Register ra, Address addr)
++ * sw64:
++ *  ldl/ldl_u/ldptr/ldw/ldhu/ldbu(Register ra, Address addr)
++ * note:
++ *  SW64 don't need temp reg for the load, and ra can be same with addr._base or addr._index.
++ *  
++ *  ldl_u will 8-byte align the addr then load 64bit
++ *  ldl   will load 64bit
++ *  ldw   will sign extend the 32bit
++ *  ldhu  will zero extend the 16bit
++ *  ldbu  will zero extend the 8bit
++ */
++#define LDFROMADDR_DEF(LDX) \
++  void MacroAssembler::LDX(Register ra, Address addr) {\
++    if (addr.getMode() == Address::base_index_scale_disp) {\
++      addr.setTmp(ra);\
++    }\
++    addr.emit(ra, this, op_##LDX);\
++  }
++
++  LDINSNLIST(LDFROMADDR_DEF)
++  
++#undef LDFROMADDR_DEF
++
++/**
++ * x86
++ *  Assembler::movq/movl/movw/movb(Address, addr, Register ra)
++ * sw64
++ *  MacroAssembler::stl/stl_u/stw/sth/stb(Register ra, Address addr, Register _tmp=rcc)
++ * note
++ *  ra can't same with rcc, but can be same with addr._base, or addr._index
++ */
++#define ST2ADDR_DEF(STX) \
++  void MacroAssembler::STX(Register ra, Address addr, Register _tmp) {\
++    if (addr.getMode() == Address::base_index_scale_disp) {\
++      assert_different_registers(ra, _tmp);\
++      addr.setTmp(_tmp);\
++    }\
++    addr.emit(ra, this, op_##STX);\
++  }
++  
++  STINSNLIST(ST2ADDR_DEF)
++  
++#undef ST2ADDR_DEF
++
++#define ADDR_DEF(FLOATINSN) \
++  void MacroAssembler::FLOATINSN(FloatRegister fa, Address addr, Register _tmp) {\
++    if (addr.getMode() == Address::base_index_scale_disp) {\
++      addr.setTmp(_tmp);\
++    }\
++    addr.emit(fa, this, op_##FLOATINSN);\
++  }
++  
++  FLOATINSNLIST(ADDR_DEF)
++  
++#undef ADDR_DEF
++
++void MacroAssembler::cmpoop(Register src1, Register src2, Register ccReg) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->obj_equals(this, src1, src2, ccReg);
++}
++  
++static void change_cmxchg_result(MacroAssembler* _masm) {
++  _masm->mov_immediate32(rcc, 1);
++  _masm->cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy  
++}
++//todo scw
++// if c_reg == content(dest) { c_reg = dest ; store x_reg to dest;} else { c_reg = dest; }
++void MacroAssembler::cmpxchg(Register x_reg, Address dest, Register c_reg) {SCOPEMARK_NAME(MacroAssembler::cmpxchg, this);
++    assert_different_registers(AT, GP, rscratch3, c_reg, dest.base());
++    assert_different_registers(AT, GP, rscratch3, x_reg, dest.base());
++    SizedScope sc(this, 64);
++    Label done, again, nequal;
++    if (UseSW8A) {
++        if(UseCAS) {
++            move(GP, x_reg);
++            if(dest.disp() != 0) {
++                ldi(AT, dest.disp(), dest.base());
++                casl(c_reg, AT, GP);
++            } else {
++                casl(c_reg, dest.base(), GP);
++            }
++            cmpeq(c_reg, GP, AT);
++            move(c_reg, GP);
++        } else {
++            BIND(again);
++            lldl(AT, dest.disp(), dest.base());
++            bne_c(AT, c_reg, nequal);
++
++            move(AT, x_reg);
++            lstl(AT, dest.disp(), dest.base());
++            beq_l(AT, again);
++            beq_l(R0, done);
++
++            // not xchged
++            BIND(nequal);
++            memb();
++            move(c_reg, AT);
++            move(AT, R0);
++
++            BIND(done);
++        }
++    } else {
++        //subl(esp, 16, esp);
++        //stl(rscratch3, 0, esp);
++        BIND(again);
++        lldl(rscratch3, dest.disp(), dest.base());
++        cmpeq(rscratch3, c_reg, GP);
++        wr_f(GP);
++        move(AT, x_reg);
++        align(8);
++        lstl(AT, dest.disp(), dest.base());
++        rd_f(AT);
++        beq_l(GP, nequal);
++        beq_l(AT, again);
++        // not xchged
++        BIND(nequal);
++        //if(rscratch3 != c_reg) bis(R0, rscratch3, c_reg);
++        bis(R0, rscratch3, c_reg);
++
++        //ldl(rscratch3, 0, esp);
++        //addl(esp, 16, esp);
++        //ornot(R0, AT, rcc);
++    }
++    change_cmxchg_result(this);
++//    mov_immediate32(rcc, 1);
++//    cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++}
++
++//todo scw
++// if c_reg == content(dest) { Address(dest) = x_reg ;} else { c_reg = content(dest); }
++void MacroAssembler::cmpxchg32(Register x_reg, Address dest, Register c_reg) {
++  assert_different_registers(AT, GP, rscratch3, c_reg, dest.base());
++  assert_different_registers(AT, GP, rscratch3, x_reg, dest.base());
++  SizedScope sc(this, 64);
++  Label done, again, nequal;
++  if (UseSW8A) {
++      if(UseCAS) {
++          move(GP, x_reg);
++          if (dest.disp() != 0) {
++              ldi(AT, dest.disp(), dest.base());
++              casw(c_reg, AT, GP);
++          } else {
++              casw(c_reg, dest.base(), GP);
++          }
++          cmpeq(c_reg, GP , AT);
++          move(c_reg, GP);
++      } else {
++          BIND(again);
++          lldw(AT, dest.disp(), dest.base());
++          bne_c(AT, c_reg, nequal);
++
++          move(AT, x_reg);
++          lstw(AT, dest.disp(), dest.base());
++          beq_l(AT, again);
++          beq_l(R0, done);
++
++          // not xchged
++          BIND(nequal);
++          memb();
++          move(c_reg, AT);
++          move(AT, R0);
++
++          BIND(done);
++      }
++  } else {
++      //subl(esp, 16, esp);
++      //stl(rscratch3, 0, esp);
++      BIND(again);
++      lldw(rscratch3, dest.disp(), dest.base());
++      cmpeq(rscratch3, c_reg, GP);
++      wr_f(GP);
++      move(AT, x_reg);
++      align(8);
++      lstw(AT, dest.disp(), dest.base());
++      rd_f(AT);
++      beq_l(GP, nequal);
++      beq_l(AT, again);
++      // not xchged
++      BIND(nequal);
++      bis(R0, rscratch3, c_reg);
++
++      //ldl(rscratch3, 0, esp);
++      //addl(esp, 16, esp);
++      //xor_ins(AT, R0, rcc);
++      //ornot(R0, AT, rcc);
++  }
++  change_cmxchg_result(this);
++  //mov_immediate32(rcc, 1);
++  //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++}
++
++void MacroAssembler::fill_to_size(address start, int size) {
++  if (pc() - start > size) should_not_reach_here("size expanded!");
++  while (pc() - start < size)
++    nop();
++}
++
++//If oldval == *dest then store newval into dest
++void MacroAssembler::storeLcon(Register oldval, Address dest, Register newval) {
++  //guarantee((dest.index()->encoding() == sp->encoding()), "impossible encoding storeLCon 1");
++  //guarantee((dest.disp() == 0), "impossible encoding storeLCon 2");
++  //guarantee((dest.index()->encoding() == sp->encoding() && dest.disp() == 0), "impossible encoding storeLCon");
++  SizedScope sc(this, 64);
++  assert_different_registers(AT, GP, oldval, newval, dest.base());
++  Label again, failure;
++  if(UseSW8A) {
++      if(UseCAS) {
++          move(GP, newval);
++          if(dest.disp() != 0) {
++              ldi(AT, dest.disp(), dest.base());
++              casl(oldval, AT, GP);
++          } else {
++              casl(oldval, dest.base(), GP);
++          }
++          cmpeq(oldval, GP, AT);
++      } else {
++          BIND(again);
++          lldl(GP, dest.disp(), dest.base());
++          cmpeq(GP, oldval, AT);
++          beq_l(AT, failure);
++          move(AT, newval);
++          lstl(AT, dest.disp(), dest.base());
++          beq_l(AT, again);
++          BIND(failure);
++      }
++  } else {
++      BIND(again);
++      lldl(AT, dest.disp(), dest.base());
++      cmpeq(AT, oldval, GP);
++      wr_f(GP);
++      move(AT, newval);
++      align(8);
++      lstl(AT, dest.disp(), dest.base());
++      rd_f(AT);
++      beq_l(GP, failure);
++      beq_l(AT, again);
++      BIND(failure);
++  }
++  //xor_ins(AT, R0, rcc);//need it ??
++  change_cmxchg_result(this);
++  //mov_immediate32(rcc, 1);
++  //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++}
++
++void MacroAssembler::storeIcon(Register oldval, Address dest, Register newval) {
++  //guarantee((dest.index() == sp && dest.disp() == 0), "impossible encoding storeICon");
++  SizedScope sc(this, 64);
++  assert_different_registers(AT, GP, oldval, newval, dest.base());
++  Label again, failure;
++  if(UseSW8A) {
++      if(UseCAS) {
++          move(GP, newval);
++          if(dest.disp() != 0) {
++              ldi(AT, dest.disp(), dest.base());
++              casw(oldval, AT, GP);
++          } else {
++              casw(oldval, dest.base(), GP);
++          }
++          cmpeq(oldval, GP, AT);
++      } else {
++          BIND(again);
++          lldw(GP, dest.disp(), dest.base());
++          cmpeq(GP, oldval, AT);
++          beq_l(AT, failure);
++          move(AT, newval);
++          lstw(AT, dest.disp(), dest.base());
++          beq_l(AT, again);
++          BIND(failure);
++      }
++  } else {
++      BIND(again);
++      lldw(AT, dest.disp(), dest.base());
++      cmpeq(AT, oldval, GP);
++      wr_f(GP);
++      move(AT, newval);
++      align(8);
++      lstw(AT, dest.disp(), dest.base());
++      rd_f(AT);
++      beq_l(GP, failure);
++      beq_l(AT, again);
++      BIND(failure);
++  }
++  //xor_ins(AT, R0, rcc);// need it?
++  change_cmxchg_result(this);
++  //mov_immediate32(rcc, 1);
++  //cmove(Assembler::success, rcc, R0, rcc, AT); //TODO:refactor we should refactor cmpxchg, not add this instruction jzy
++}
++
++void MacroAssembler::cmpxchgptr(Register xreg, AddressLiteral adr, Register creg, Register tmp) {
++  assert_different_registers(tmp, AT);
++  lea(tmp, adr);
++  cmpxchg(xreg, Address(tmp, 0), creg);
++}
++void MacroAssembler::xchgptr(Register src1, Register src2) {
++    movl(rscratch2, src1);
++    movl(src1, src2);
++    movl(src2, rscratch2);
++}
++
++int MacroAssembler::corrected_idivl(Register result, Register ra, Register rb,
++                                    bool want_remainder, Register scratch)
++{
++  ShouldNotReachHere();
++  int idivl_offset = offset();
++  return idivl_offset;
++}
++
++void MacroAssembler::enter() {
++  subptr(esp, 16, esp);
++  stl(rfp, 0, esp);
++  stl(RA, 8, esp);
++  movl(rfp, esp);
++}
++
++void MacroAssembler::load_float(FloatRegister rd, AddressLiteral addr, Register tmp) {
++  mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec());
++  flds(rd, 0, tmp);
++}
++
++void MacroAssembler::load_double(FloatRegister rd, AddressLiteral addr, Register tmp) {
++  mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec());
++  fldd(rd, 0, tmp);
++}
++
++void MacroAssembler::load_float(FloatRegister fa, Address src, Register tmp) {
++  flds(fa, src, tmp);
++}
++
++void MacroAssembler::load_double(FloatRegister fa, Address src, Register tmp) {
++  fldd(fa, src, tmp);
++}
++
++void MacroAssembler::store_float(FloatRegister fa, Address src, Register tmp) {
++  fsts(fa, src, tmp);
++}
++
++void MacroAssembler::store_double(FloatRegister fa, Address src, Register tmp) {
++  fstd(fa, src, tmp);
++}
++
++void MacroAssembler::jump(AddressLiteral addr, Register tmp) {
++  mov_address64(T12, (intptr_t)addr.target(), addr.rspec());
++  Assembler::jmp(tmp, T12, 0);// set ra=AT for debug
++}
++
++void MacroAssembler::jump(RuntimeAddress addr, Register tmp) {
++  mov_address64(T12, (intptr_t)addr.target(), addr.rspec());
++  Assembler::jmp(tmp, T12, 0);// set ra=AT for debug
++}
++
++//TODO:check right jzy
++void MacroAssembler::jump_cc(Condition cc, AddressLiteral dst, Register ccReg, Register tmp) {
++  Label skip;
++  jcc(reverse[cc], skip, ccReg);
++  jump(dst, tmp);
++  bind(skip);
++}
++
++void MacroAssembler::jcc(Condition cc, Label& L, Register ccReg, ConditionLength cl) {
++  if (cl == bitw) {
++    subw(ccReg, 0, ccReg);
++  }
++  switch(cc) {
++//    case equal:
++    case zero:
++      beq_l(ccReg, L);
++      break;
++//    case notEqual:
++    case notZero:
++      bne_l(ccReg, L);
++      break;
++    case greaterEqual:
++    case aboveEqual:
++    case notNegative:
++      bge_l(ccReg, L);
++      break;
++    case greater:
++    case positive:
++    case above://unsigned>
++      bgt_l(ccReg, L);
++      break;
++    case lessEqual:
++    case belowEqual: //unsigned<=
++      ble_l(ccReg, L);
++      break;
++    case less:
++    case below:
++    case negative:
++      blt_l(ccReg, L);
++      break;
++    case success:// for cas success
++      bne_l(AT, L);
++      break;
++    case failed:// for cas failed
++      beq_l(AT, L);
++      break;
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmpws(int cc, Register op1, Register op2, Register ccReg) {
++  switch((Condition)cc) {
++    case equal:
++      cmpeq(op1, op2, ccReg);
++      break;
++    case notEqual:
++      subw(op1, op2, ccReg);
++      break;
++    case greater:
++      cmplt(op2, op1, ccReg);
++      break;
++    case greaterEqual:
++      cmple(op2, op1, ccReg);
++      break;
++    case less:
++      cmplt(op1, op2, ccReg);
++      break;
++    case lessEqual:
++      cmple(op1, op2, ccReg);
++      break;
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmpls(int cc, Register op1, Register op2, Register ccReg) {
++  switch((Condition)cc) {
++    case equal:
++      cmpeq(op1, op2, ccReg);
++      break;
++    case notEqual:
++      subl(op1, op2, ccReg);
++      break;
++    case greater:
++      cmplt(op2, op1, ccReg);
++      break;
++    case greaterEqual:
++      cmple(op2, op1, ccReg);
++      break;
++    case less:
++      cmplt(op1, op2, ccReg);
++      break;
++    case lessEqual:
++      cmple(op1, op2, ccReg);
++      break;
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmpwu(int cc, Register op1, Register op2, Register ccReg) {
++  switch((Condition)cc) {
++    case equal:
++      cmpeq(op1, op2, ccReg);
++      break;
++    case notEqual:
++      subw(op1, op2, ccReg);//TODO:refactor jzy use subl to replace?
++      break;
++    case above://unsigned>
++      cmpult(op2, op1, ccReg);
++      break;
++    case aboveEqual:
++      cmpule(op2, op1, ccReg);
++      break;
++    case below:
++      cmpult(op1, op2, ccReg);
++      break;
++    case belowEqual: //unsigned<=
++      cmpule(op1, op2, ccReg);
++      break;
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmplu(int cc, Register op1, Register op2, Register ccReg) {
++  switch((Condition)cc) {
++    case equal:
++      cmpeq(op1, op2, ccReg);
++      break;
++    case notEqual:
++      subl(op1, op2, ccReg);
++      break;
++    case above://unsigned>
++      cmpult(op2, op1, ccReg);
++      break;
++    case aboveEqual:
++      cmpule(op2, op1, ccReg);
++      break;
++    case below:
++      cmpult(op1, op2, ccReg);
++      break;
++    case belowEqual: //unsigned<=
++      cmpule(op1, op2, ccReg);
++      break;
++    default:
++      Unimplemented();
++  }
++}
++
++void MacroAssembler::cmpfs(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg, bool is_order) {SCOPEMARK_NAME(MacroAssembler::cmpfs, this);
++  switch((Condition)cc) {
++    case equal:
++      fcmpeq(op1, op2, ccReg);
++      break;
++    case notEqual:
++      fcmpeq(op1, op2, ccReg);
++      fcmpeq(ccReg, fzero, ccReg);
++      break;
++    case greater:
++      c_olt_s(op2, op1);
++      break;
++    case greaterEqual:
++      c_ole_s(op2, op1);
++      break;
++    case less:
++      block_comment("less;;");
++      if (is_order) {
++        c_olt_s(op1, op2);
++      } else {
++        c_ult_s(op1, op2);
++      }
++      break;
++    case lessEqual:
++      block_comment("lessEqual;;");
++      if (is_order) {
++        c_ole_s(op1, op2);
++      } else {
++        c_ule_s(op1, op2);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::cmpfd(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg, bool is_order) {SCOPEMARK_NAME(MacroAssembler::cmpfd, this);
++  switch((Condition)cc) {
++    case equal:
++      fcmpeq(op1, op2, ccReg);
++      break;
++    case notEqual:
++      //TODO:performance jzy
++//      mov_immediate64(rscratch3, 1);
++//      ifmovd(rscratch3, fscratch1);
++//      fcmpeq(op1, op2, ccReg);
++//      fseleq(ccReg, fscratch1, fzero, ccReg);
++      fcmpeq(op1, op2, ccReg);
++      fcmpeq(ccReg, fzero, ccReg);
++      break;
++    case greater:
++      c_olt_d(op2, op1);
++      break;
++    case greaterEqual:
++      c_ole_d(op2, op1);
++      break;
++    case less:
++      block_comment("less;;");
++      if (is_order) {
++        c_olt_d(op1, op2);
++      } else {
++        c_ult_d(op1, op2);
++      }
++      break;
++    case lessEqual:
++      if (is_order) {
++        c_ole_d(op1, op2);
++      } else {
++        c_ule_d(op1, op2);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::load_unsigned_short(Register dst, Address src) {
++  ldhu(dst, src);
++}
++
++void MacroAssembler::load_unsigned_byte(Register dst, Address src) {
++  ldbu(dst, src);
++}
++
++void MacroAssembler::load_signed_short(Register rd, Address addr) {
++  ldhu(rd, addr);
++  sexth(rd, rd);
++}
++
++void MacroAssembler::load_signed_byte32(Register rd, Address addr, Register tmp) {
++  ldbu(rd, addr);
++  sextb(rd, rd);
++  movw(rd, rd);
++}
++
++void MacroAssembler::load_signed_byte64(Register rd, Address addr, Register tmp) {
++  ldbu(rd, addr);
++  sextb(rd, rd);
++}
++
++void MacroAssembler::load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2) {
++  switch (size_in_bytes) {
++  case  8:  ld(dst, src); break;
++  case  4:  lw(dst, src); break;
++  case  2:  if (is_signed)  load_signed_short(dst, src); else load_unsigned_short(dst, src); break;
++  case  1:  if (is_signed)  load_signed_byte32( dst, src); else load_unsigned_byte( dst, src); break;
++  default:  ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::null_check(Register reg, int offset) {
++  if (needs_explicit_null_check(offset)) {
++    // provoke OS NULL exception if reg = NULL by
++    // accessing M[reg] w/o changing any registers
++    // NOTE: this is plenty to provoke a segv
++    cmpptr(V0, Address(reg, 0));
++  } else {
++    // nothing to do, (later) access of M[reg + offset]
++    // will provoke OS NULL exception if reg = NULL
++  }
++}
++
++void MacroAssembler::unimplemented(const char* what) {
++  const char* buf = NULL;
++  {
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("unimplemented: %s", what);
++    buf = code_string(ss.as_string());
++  }
++  stop(buf);
++}
++
++void MacroAssembler::pop_CPU_state(bool restore_vectors) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::push_CPU_state(bool save_vectors) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::reset_last_Java_frame(Register java_thread, bool clear_fp) {
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++    java_thread = rthread;
++  }
++  // we must set sp to zero to clear frame
++  stl(R0, in_bytes(JavaThread::last_Java_sp_offset()), java_thread);
++  // must clear fp, so that compiled frames are not confused; it is possible
++  // that we need it only for debugging
++  if(clear_fp) {
++    stl(R0, in_bytes(JavaThread::last_Java_fp_offset()), java_thread);
++  }
++
++  // Always clear the pc because it could have been set by make_walkable()
++  stl(R0, in_bytes(JavaThread::last_Java_pc_offset()), java_thread);
++}
++
++void MacroAssembler::round_to(Register reg, int modulus) {
++  addptr(reg, modulus - 1, reg);
++  andptr(reg, -modulus, reg);
++}
++
++void MacroAssembler::serialize_memory(Register thread, Register tmp) {
++  assert_different_registers(rscratch2_AT, tmp);
++  movwu(tmp, thread);
++  srll(tmp, os::get_serialize_page_shift_count(), tmp);
++  mov_immediate64(rscratch2_AT, (os::vm_page_size() - sizeof(int)));
++  andptr(tmp, rscratch2_AT, tmp);
++  
++  mov_immediate64(rscratch2_AT, (intptr_t)os::get_memory_serialize_page());
++  addl(rscratch2_AT, tmp, rscratch2_AT);
++  stw(tmp, Address(rscratch2_AT, 0));
++  //stw(tmp, Address(tmp, (intptr_t)os::get_memory_serialize_page()));//TODO:use intptr_t's transform is OK? jzy
++  
++}
++
++/*void MacroAssembler::set_last_Java_frame(Register java_thread,
++                                         Register last_java_sp,
++                                         Register last_java_fp,
++                                         address  last_java_pc) {ShouldNotReachHere();
++  // determine java_thread register
++  if (!java_thread->is_valid()) {
++    java_thread = rthread;
++  }
++  // determine last_java_sp register
++  if (!last_java_sp->is_valid()) {
++    last_java_sp = esp;
++  }
++
++  // last_java_fp is optional
++  if (last_java_fp->is_valid()) {
++    stptr(last_java_fp, Address(java_thread, JavaThread::last_Java_fp_offset()));
++  }
++
++  if (last_java_pc != NULL) {
++    lea(Address(java_thread,
++                 JavaThread::frame_anchor_offset() + JavaFrameAnchor::last_Java_pc_offset()),
++        InternalAddress(last_java_pc), AT);
++  } else {
++    ShouldNotReachHere();
++  }
++  stptr(last_java_sp, Address(java_thread, JavaThread::last_Java_sp_offset()));
++}*/
++
++void MacroAssembler::testb(Register lh, int imm8, Register ccReg) {
++  if (is_uimm8(imm8)) {
++    and_ins(lh, imm8, ccReg);
++  } else {
++    Unimplemented();
++  }
++}
++
++void MacroAssembler::testb(Register lh, Register rh, Register ccReg) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::testb(Address addr, int imm8, Register ccReg) {
++  ldbu(ccReg, addr);
++  if (is_uimm8(imm8)) {
++    and_ins(ccReg, imm8, ccReg);
++  } else {
++    Unimplemented();
++  }
++}
++
++/**
++ * x86
++ *  Assembler::testl(Register dst, int32_t imm32)
++ * sw64
++ *  testw(Register lh, int rh, Register ccReg, Register scratch)
++ * note
++ *  test 32bit of lh and rh. the msb32 of lh don't cares
++ */
++void MacroAssembler::testw(Register lh, int rh, Register ccReg, Register scratch) {
++  andw(lh, rh, ccReg, scratch);
++}
++
++void MacroAssembler::testw(Register lh, Register rh, Register ccReg) {
++  andw(lh, rh, ccReg);
++}
++
++void MacroAssembler::testl(Register lh, long rh, Register res, Register scratch) {
++  testptr(lh, rh, res, scratch);
++}
++
++void MacroAssembler::testl(Register lh, Register rh, Register ccReg) {
++  testptr(lh, rh, ccReg);
++}
++
++void MacroAssembler::testptr(Register lh, long rh, Register res, Register scratch) {
++  andptr(lh, rh, res, scratch);
++}
++
++void MacroAssembler::testptr(Register lh, Register rh, Register ccReg) {
++  and_ins(lh, rh, ccReg);
++}
++
++void MacroAssembler::resolve_jobject(Register value, 
++                                     Register thread, 
++                                     Register tmp) {SCOPEMARK_NAME(MacroAssembler::resolve_jobject, this);
++  assert_different_registers(value, thread, tmp);
++  Label done, not_weak;
++  testptr(value, value);
++  jcc(Assembler::zero, done);                // Use NULL as-is.
++  testptr(value, JNIHandles::weak_tag_mask); // Test for jweak tag.
++  jcc(Assembler::zero, not_weak);
++  // Resolve jweak.
++  access_load_at(T_OBJECT, IN_NATIVE | ON_PHANTOM_OOP_REF,
++                 value, Address(value, -JNIHandles::weak_tag_value), tmp, thread);
++  verify_oop(value);
++  jmp(done);
++  bind(not_weak);
++  // Resolve (untagged) jobject.
++  access_load_at(T_OBJECT, IN_NATIVE, value, Address(value, 0), tmp, thread);
++  verify_oop(value);
++  bind(done);
++}
++
++// Defines obj, preserves var_size_in_bytes, okay for t2 == var_size_in_bytes.
++void MacroAssembler::tlab_allocate(Register thread, Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Register t2,
++                                   Label& slow_case) {
++  BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->tlab_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, t2, slow_case);
++}
++
++// Defines obj, preserves var_size_in_bytes
++void MacroAssembler::eden_allocate(Register thread, Register obj,
++                                   Register var_size_in_bytes,
++                                   int con_size_in_bytes,
++                                   Register t1,
++                                   Label& slow_case) {SCOPEMARK_NAME(eden_allocate, this)
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  bs->eden_allocate(this, thread, obj, var_size_in_bytes, con_size_in_bytes, t1, slow_case);
++}
++
++// Zero words; len is in bytes
++// Destroys all registers except addr
++// len must be a nonzero multiple of wordSize
++void MacroAssembler::zero_memory(Register addr, Register len, Register t1) {
++  should_not_reach_here("zero_memory");
++}
++
++// Look up the method for a megamorphic invokeinterface call.
++// The target method is determined by <intf_klass, itable_index>.
++// The receiver klass is in recv_klass.
++// On success, the result will be in method_result, and execution falls through.
++// On failure, execution transfers to the given label.
++void MacroAssembler::lookup_interface_method(Register recv_klass,
++                                             Register intf_klass,
++                                             RegisterOrConstant itable_index,
++                                             Register method_result,
++                                             Register scan_temp,
++                                             Label& L_no_such_interface,
++                                             bool return_method) {
++  assert_different_registers(recv_klass, intf_klass, scan_temp, rcc);
++  assert_different_registers(method_result, intf_klass, scan_temp, rcc);
++  assert(recv_klass != method_result || !return_method,
++         "recv_klass can be destroyed when method isn't needed");
++
++  assert(itable_index.is_constant() || itable_index.as_register() == method_result,
++         "caller must use same register for non-constant itable index as for method");
++
++  // Compute start of first itableOffsetEntry (which is at the end of the vtable)
++  int vtable_base = in_bytes(Klass::vtable_start_offset());
++  int itentry_off = itableMethodEntry::method_offset_in_bytes();
++  int scan_step   = itableOffsetEntry::size() * wordSize;
++  int vte_size    = vtableEntry::size_in_bytes();
++  Address::ScaleFactor times_vte_scale = Address::times_ptr;
++  assert(vte_size == wordSize, "else adjust times_vte_scale");
++
++  ldws(scan_temp, Address(recv_klass, Klass::vtable_length_offset()));
++
++  // %%% Could store the aligned, prescaled offset in the klassoop.
++  lea(scan_temp, Address(recv_klass, scan_temp, times_vte_scale, vtable_base));
++
++  if (return_method) {
++    // Adjust recv_klass by scaled itable_index, so we can free itable_index.
++    assert(itableMethodEntry::size() * wordSize == wordSize, "adjust the scaling in the code below");
++    lea(recv_klass, Address(recv_klass, itable_index, Address::times_ptr, itentry_off));
++    }    
++
++  // for (scan = klass->itable(); scan->interface() != NULL; scan += scan_step) {
++  //   if (scan->interface() == intf) {
++  //     result = (klass + scan->offset() + itable_index);
++  //   }
++  // }
++  Label search, found_method;
++
++  for (int peel = 1; peel >= 0; peel--) {
++    ldptr(method_result, Address(scan_temp, itableOffsetEntry::interface_offset_in_bytes()));
++    cmpptr(intf_klass, method_result);
++
++    if (peel) {
++      jcc(Assembler::equal, found_method);
++    } else {
++      jcc(Assembler::notEqual, search);
++      // (invert the test to fall through to found_method...)
++    }
++
++    if (!peel)  break;
++
++    bind(search);
++
++    // Check that the previous entry is non-null.  A null entry means that
++    // the receiver class doesn't implement the interface, and wasn't the
++    // same as when the caller was compiled.
++    jcc(Assembler::zero, L_no_such_interface, method_result);
++    addptr(scan_temp, scan_step, scan_temp);
++  }
++
++  bind(found_method);
++
++  if (return_method) {
++  // Got a hit.
++    ldws(scan_temp, Address(scan_temp, itableOffsetEntry::offset_offset_in_bytes()));
++    ldptr(method_result, Address(recv_klass, scan_temp, Address::times_1));
++  }
++}
++
++
++// virtual method calling
++void MacroAssembler::lookup_virtual_method(Register recv_klass,
++                                           RegisterOrConstant vtable_index,
++                                           Register method_result) {
++  const int base = in_bytes(Klass::vtable_start_offset());
++  assert(vtableEntry::size() * wordSize == wordSize, "else adjust the scaling in the code below");
++    Address vtable_entry_addr(recv_klass,
++      vtable_index, Address::times_ptr,
++      base + vtableEntry::method_offset_in_bytes());
++    ldptr(method_result, vtable_entry_addr);
++  }
++
++
++void MacroAssembler::check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success) {
++  Label L_failure;
++  check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_failure, NULL);
++  check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, noreg, &L_success, NULL);
++  bind(L_failure);
++}
++
++
++void MacroAssembler::check_klass_subtype_fast_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   Label* L_slow_path,
++                                        RegisterOrConstant super_check_offset) {
++  assert_different_registers(sub_klass, super_klass, temp_reg);
++  bool must_load_sco = (super_check_offset.constant_or_zero() == -1);
++  if (super_check_offset.is_register()) {
++    assert_different_registers(sub_klass, super_klass,
++                               super_check_offset.as_register());
++  } else if (must_load_sco) {
++    assert(temp_reg != noreg, "supply either a temp or a register offset");
++  }
++
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  if (L_slow_path == NULL) { L_slow_path = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  int sco_offset = in_bytes(Klass::super_check_offset_offset());
++  Address super_check_offset_addr(super_klass, sco_offset);
++
++  // Hacked jcc, which "knows" that L_fallthrough, at least, is in
++  // range of a jccb.  If this routine grows larger, reconsider at
++  // least some of these.
++#define local_jcc(assembler_cond, label)                                \
++  if (&(label) == &L_fallthrough)  jcc(assembler_cond, label);         \
++  else                             jcc( assembler_cond, label) /*omit semi*/
++
++  // Hacked jmp, which may only be used just before L_fallthrough.
++#define final_jmp(label)                                                \
++  if (&(label) == &L_fallthrough) { /*do nothing*/ }                    \
++  else                            jmp(label)                /*omit semi*/
++
++  // If the pointers are equal, we are done (e.g., String[] elements).
++  // This self-check enables sharing of secondary supertype arrays among
++  // non-primary types such as array-of-interface.  Otherwise, each such
++  // type would need its own customized SSA.
++  // We move this check to the front of the fast path because many
++  // type checks are in fact trivially successful in this manner,
++  // so we get a nicely predicted branch right at the start of the check.
++  cmpptr(sub_klass, super_klass);
++  local_jcc(Assembler::equal, *L_success);
++
++  // Check the supertype display:
++  if (must_load_sco) {
++    // Positive movl does right thing on LP64.
++    ldws(temp_reg, super_check_offset_addr);
++    super_check_offset = RegisterOrConstant(temp_reg);
++  }
++  Address super_check_addr(sub_klass, super_check_offset, Address::times_1, 0);
++  cmpptr(super_klass, super_check_addr); // load displayed supertype
++
++  // This check has worked decisively for primary supers.
++  // Secondary supers are sought in the super_cache ('super_cache_addr').
++  // (Secondary supers are interfaces and very deeply nested subtypes.)
++  // This works in the same check above because of a tricky aliasing
++  // between the super_cache and the primary super display elements.
++  // (The 'super_check_addr' can address either, as the case requires.)
++  // Note that the cache is updated below if it does not help us find
++  // what we need immediately.
++  // So if it was a primary super, we can just fail immediately.
++  // Otherwise, it's the slow path for us (no success at this point).
++
++  if (super_check_offset.is_register()) {
++    local_jcc(Assembler::equal, *L_success);
++    cmpw(super_check_offset.as_register(), sc_offset);
++    if (L_failure == &L_fallthrough) {
++      local_jcc(Assembler::equal, *L_slow_path);
++    } else {
++      local_jcc(Assembler::notEqual, *L_failure);
++      final_jmp(*L_slow_path);
++    }
++  } else if (super_check_offset.as_constant() == sc_offset) {
++    // Need a slow path; fast failure is impossible.
++    if (L_slow_path == &L_fallthrough) {
++      local_jcc(Assembler::equal, *L_success);
++    } else {
++      local_jcc(Assembler::notEqual, *L_slow_path);
++      final_jmp(*L_success);
++    }
++  } else {
++    // No slow path; it's a fast decision.
++    if (L_failure == &L_fallthrough) {
++      local_jcc(Assembler::equal, *L_success);
++    } else {
++      local_jcc(Assembler::notEqual, *L_failure);
++      final_jmp(*L_success);
++    }
++  }
++
++  bind(L_fallthrough);
++
++#undef local_jcc
++#undef final_jmp
++}
++
++
++void MacroAssembler::check_klass_subtype_slow_path(Register sub_klass,
++                                                   Register super_klass,
++                                                   Register temp_reg,
++                                                   Register temp2_reg,
++                                                   Label* L_success,
++                                                   Label* L_failure,
++                                                   bool set_cond_codes) {
++  if (temp2_reg == noreg) temp2_reg = AT;
++  assert_different_registers(temp_reg, noreg);
++  assert_different_registers(sub_klass, super_klass, temp_reg, temp2_reg);
++
++  const Register rcx = temp2_reg;
++  const Register rdi = temp_reg;
++  
++  Label L_fallthrough;
++  int label_nulls = 0;
++  if (L_success == NULL)   { L_success   = &L_fallthrough; label_nulls++; }
++  if (L_failure == NULL)   { L_failure   = &L_fallthrough; label_nulls++; }
++  assert(label_nulls <= 1, "at most one NULL in the batch");
++
++  // a couple of useful fields in sub_klass:
++  int ss_offset = in_bytes(Klass::secondary_supers_offset());
++  int sc_offset = in_bytes(Klass::secondary_super_cache_offset());
++  Address secondary_supers_addr(sub_klass, ss_offset);
++  Address super_cache_addr(     sub_klass, sc_offset);
++
++  // Do a linear scan of the secondary super-klass chain.
++  // This code is rarely used, so simplicity is a virtue here.
++  // The repne_scan instruction uses fixed registers, which we must spill.
++  // Don't worry too much about pre-existing connections with the input regs.
++
++#ifndef PRODUCT
++  int* pst_counter = &SharedRuntime::_partial_subtype_ctr;
++  ExternalAddress pst_counter_addr((address) pst_counter);
++  lea(rcx, pst_counter_addr);
++  ldws(rdi, Address(rcx, 0));
++  addw(rdi, 1, rdi);
++  stw(rdi, Address(rcx, 0));
++#endif //PRODUCT
++
++  // We will consult the secondary-super array.
++  ldptr(rdi, secondary_supers_addr);
++  // Load the array length.  (Positive movl does right thing on LP64.)
++  ldws(rcx, Address(rdi, Array<Klass*>::length_offset_in_bytes()));
++  // Skip to start of data.
++  addptr(rdi, Array<Klass*>::base_offset_in_bytes(), rdi);
++
++  // Scan RCX words at [RDI] for an occurrence of super_klass.
++  Label Loop, found;
++  bind(Loop);
++  jcc(Assembler::zero, *L_failure, rcx);
++  cmpptr(Address(rdi, 0), super_klass);
++  jcc(Assembler::equal, found);
++  addptr(rdi, 1 * wordSize, rdi);
++  subw(rcx, 1, rcx);
++  jmp(Loop);
++
++  bind(found);
++  // Success.  Cache the super we found and proceed in triumph.
++  stptr(super_klass, super_cache_addr);
++  if (L_success != &L_fallthrough) {
++    jmp(*L_success);
++  }
++
++  bind(L_fallthrough);
++}
++
++
++void MacroAssembler::verify_oop(Register reg, const char* s) {
++  if (!VerifyOops) return;
++
++  // Pass register number to verify_oop_subroutine
++  const char * b = NULL;
++  {//<TODO:why cannot do this? jzy?
++    ResourceMark rm;
++    stringStream ss;
++    ss.print("verify_oop: %s: %s", reg->name(), s);
++    b = code_string(ss.as_string());
++  }
++  block_comment("verify_oop {");
++  const Register rax = V0;
++  //push(rscratch1);                    // trash by call, sw doesn't trash rscratch1
++  push(rax);                          // save rax,
++  push(reg);                          // pass register argument
++  ExternalAddress buffer((address) b);
++  lea(rax, buffer.addr());
++  push(rax);
++  // call indirectly to solve generation ordering problem
++  ldptr(rax, ExternalAddress(StubRoutines::verify_oop_subroutine_entry_address()));
++  push(RA);
++  call(rax);
++  pop(RA);
++  addl(esp, 8, esp);//just pop
++  pop(reg);
++  pop(rax);
++  // Caller pops the arguments (oop, message) and restores rax, r10
++  block_comment("} verify_oop");
++}
++
++//todo scw
++RegisterOrConstant MacroAssembler::delayed_value_impl(intptr_t* delayed_value_addr,
++                                                      Register tmp,
++                                                      int offset) {
++  ShouldNotReachHere();
++  intptr_t value = *delayed_value_addr;
++  if (value != 0)
++    return RegisterOrConstant(value + offset);
++//  Address a(delayed_value_addr);
++  // load indirectly to solve generation ordering problem
++  // movptr(tmp, ExternalAddress((address) delayed_value_addr));
++  // ld(tmp, a);
++  if (offset != 0)
++    addiu(tmp, offset, tmp);
++
++  return RegisterOrConstant(tmp);
++}
++
++
++Address MacroAssembler::argument_address(RegisterOrConstant arg_slot,
++                                         int extra_slot_offset) {
++  // cf. TemplateTable::prepare_invoke(), if (load_receiver).
++  int stackElementSize = Interpreter::stackElementSize;
++  int offset = Interpreter::expr_offset_in_bytes(extra_slot_offset+0);
++#ifdef ASSERT
++  int offset1 = Interpreter::expr_offset_in_bytes(extra_slot_offset+1);
++  assert(offset1 - offset == stackElementSize, "correct arithmetic");
++#endif
++  Register             scale_reg    = noreg;
++  Address::ScaleFactor scale_factor = Address::no_scale;
++  if (arg_slot.is_constant()) {
++    offset += arg_slot.as_constant() * stackElementSize;
++  } else {
++    scale_reg    = arg_slot.as_register();
++    scale_factor = Address::times(stackElementSize);
++  }
++  // offset += wordSize;           // return PC is on stack // yj todo: we don't push PC on stack??
++  return Address(esp, scale_reg, scale_factor, offset);
++}
++
++
++void MacroAssembler::verify_oop_addr(Address addr, const char* s) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::verify_tlab() {
++#ifdef ASSERT
++  if (UseTLAB && VerifyOops) {
++    Label next, ok;
++    Register thread_reg = rthread;
++    Register t1 = rscratch1;
++    Register t2 = rscratch2;
++    get_thread(thread_reg);
++    //push(t1);
++    ldptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
++    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_start_offset())));
++    jcc(Assembler::aboveEqual, next);
++    STOP("assert(top >= start)");
++    should_not_reach_here("assert(top >= start)");
++
++    bind(next);
++    ldptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_end_offset())));
++    cmpptr(t1, Address(thread_reg, in_bytes(JavaThread::tlab_top_offset())));
++    jcc(Assembler::aboveEqual, ok);
++    STOP("assert(top <= end)");
++    should_not_reach_here("assert(top <= end)");
++    //pop(t1);
++    bind(ok);
++  }
++#endif
++}
++
++void MacroAssembler::verify_FPU(int stack_depth, const char* s) {
++  //Unimplemented();
++}
++
++// ((OopHandle)result).resolve();
++void MacroAssembler::resolve_oop_handle(Register result, Register tmp) {//warn("TODO:modify resolve_oop_handle jzy");
++  assert_different_registers(result, tmp);
++  // OopHandle::resolve is an indirection.
++  access_load_at(T_OBJECT, IN_NATIVE, result, Address(result, 0), tmp, noreg);
++}
++
++void MacroAssembler::load_mirror(Register mirror, Register method, Register tmp) {
++  // get mirror
++  const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++  ldl(mirror, Address(method, Method::const_offset()));
++  ldl(mirror, Address(mirror, ConstMethod::constants_offset()));
++  ldl(mirror, Address(mirror, ConstantPool::pool_holder_offset_in_bytes()));
++  ldl(mirror, Address(mirror, mirror_offset));
++  resolve_oop_handle(mirror, tmp);
++}
++
++void MacroAssembler::load_klass(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    ldwu(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++    decode_klass_not_null(dst);
++  } else {
++    ldptr(dst, Address(src, oopDesc::klass_offset_in_bytes()));
++  }
++}
++
++void MacroAssembler::load_prototype_header(Register dst, Register src) {
++  load_klass(dst, src);
++  ld(dst, Address(dst, Klass::prototype_header_offset()));
++}
++
++void MacroAssembler::store_klass(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    encode_klass_not_null(src);
++    stw(src, oopDesc::klass_offset_in_bytes(), dst);
++  } else {
++    stl(src, oopDesc::klass_offset_in_bytes(), dst);
++  }
++}
++
++void MacroAssembler::access_load_at(BasicType type, DecoratorSet decorators,
++                                    Register dst, Address src,
++                                    Register tmp1, Register thread_tmp) {SCOPEMARK_NAME(MacroAssembler::access_load_at, this)
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->load_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::access_store_at(BasicType type, DecoratorSet decorators,
++                                     Address dst, Register src,
++                                     Register tmp1, Register thread_tmp) {SCOPEMARK_NAME(MacroAssembler::access_store_at, this)
++  BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++  decorators = AccessInternal::decorator_fixup(decorators);
++  bool as_raw = (decorators & AS_RAW) != 0;
++  if (as_raw) {
++    bs->BarrierSetAssembler::store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  } else {
++    bs->store_at(this, decorators, type, dst, src, tmp1, thread_tmp);
++  }
++}
++
++void MacroAssembler::load_heap_oop(Register dst, Address src, Register tmp1,
++                                   Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::load_heap_oop_not_null(Register dst, Address src, Register tmp1,
++                                            Register thread_tmp, DecoratorSet decorators) {
++  access_load_at(T_OBJECT, IN_HEAP | IS_NOT_NULL | decorators, dst, src, tmp1, thread_tmp);
++}
++
++void MacroAssembler::store_heap_oop(Address dst, Register src, Register tmp1,
++                                    Register thread_tmp, DecoratorSet decorators) {
++  access_store_at(T_OBJECT, IN_HEAP | decorators, dst, src, tmp1, thread_tmp);
++}
++
++// Used for storing NULLs.
++void MacroAssembler::store_heap_oop_null(Address dst) {
++  access_store_at(T_OBJECT, IN_HEAP, dst, noreg, noreg, noreg);
++}
++
++void MacroAssembler::store_klass_gap(Register dst, Register src) {
++  if (UseCompressedClassPointers) {
++    stw(src, oopDesc::klass_gap_offset_in_bytes(), dst);
++  }
++}
++
++#ifdef ASSERT
++void MacroAssembler::verify_heapbase(const char* msg) {SCOPEMARK_NAME(MacroAssembler::verify_heapbase, this)
++  assert (UseCompressedOops || UseCompressedClassPointers, "should be compressed");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  if (CheckCompressedOops) {
++    Label ok;
++//    push(1 << rscratch1->encoding(), sp); 
++//    push(rscratch1); // cmpptr trashes rscratch1
++    cmpptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
++    jcc(Assembler::equal, ok);
++    STOP(msg);
++    bind(ok);
++//    pop(1 << rscratch1->encoding(), sp);
++//    pop(rscratch1);
++  }
++}
++#endif
++
++
++// Algorithm must match CompressedOops::encode.
++void MacroAssembler::encode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop:heap base corrupted?");
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop");
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      srll(src, LogMinObjAlignmentInBytes, dst);
++    } else {
++      if (dst != src) bis(R0, src, dst);
++    }
++  } else {
++    if (dst == src) {
++      seleq(dst, r12_heapbase, dst, dst);
++      subl(dst, r12_heapbase, dst);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        srll(dst, LogMinObjAlignmentInBytes, dst);
++      }
++    } else {
++      subl(src, r12_heapbase, dst);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        srll(dst, LogMinObjAlignmentInBytes, dst);
++      }
++      seleq(src, R0, dst, dst);
++    }
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register r) {
++//  stop("encode_heap_oop_not_null not check lsp");
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop_not_null: heap base corrupted?");
++  if (CheckCompressedOops) {
++    Label ok;
++    testl(r, r);
++    jcc(Assembler::notEqual, ok);
++    STOP("null oop passed to encode_heap_oop_not_null");
++    bind(ok);
++  }
++#endif
++  verify_oop(r, "broken oop in encode_heap_oop_not_null");
++  if (Universe::narrow_oop_base() != NULL) {
++    subl(r, r12_heapbase, r);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    srll(r, LogMinObjAlignmentInBytes, r);
++  }
++}
++
++void MacroAssembler::encode_heap_oop_not_null(Register dst, Register src) {
++//  stop("encode_heap_oop_not_null 2 not check lsp");
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::encode_heap_oop_not_null2: heap base corrupted?");
++  if (CheckCompressedOops) {
++    Label ok;
++    testl(src, src);
++    jcc(Assembler::notEqual, ok);
++    STOP("null oop passed to encode_heap_oop_not_null2");
++    bind(ok);
++  }
++#endif
++  verify_oop(src, "broken oop in encode_heap_oop_not_null2");
++  if (dst != src) {
++    movl(dst, src);
++  }
++  if (Universe::narrow_oop_base() != NULL) {
++    subl(dst, r12_heapbase, dst);
++  }
++  if (Universe::narrow_oop_shift() != 0) {
++    assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    srll(dst, LogMinObjAlignmentInBytes, dst);
++  }
++}
++
++void  MacroAssembler::decode_heap_oop(Register dst, Register src) {
++#ifdef ASSERT
++  verify_heapbase("MacroAssembler::decode_heap_oop: heap base corrupted?");
++#endif
++  if (Universe::narrow_oop_base() == NULL) {
++    if (Universe::narrow_oop_shift() != 0) {
++      assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++      if (dst != src) nop(); // DON'T DELETE THIS GUY.
++      slll(src, LogMinObjAlignmentInBytes, dst);
++    } else {
++      if (dst != src) bis(R0, src, dst);
++    }
++  } else {
++    if (dst == src) {
++      if (dst != AT) bis(R0, dst, AT);
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        slll(dst, LogMinObjAlignmentInBytes, dst);
++      }
++      addl(dst, r12_heapbase, dst);
++      seleq(AT, R0, dst, dst);
++    } else {
++      if (Universe::narrow_oop_shift() != 0) {
++        assert (LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++        slll(src, LogMinObjAlignmentInBytes, dst);
++        addl(dst, r12_heapbase, dst);
++      } else {
++        addl(src, r12_heapbase, dst);
++      }
++      seleq(src, R0, dst, dst);
++    }
++  }
++  verify_oop(dst, "broken oop in decode_heap_oop");
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register r) {
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    slll(r, LogMinObjAlignmentInBytes, r);
++    if (Universe::narrow_oop_base() != NULL) {
++      addl(r, r12_heapbase, r);
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++  }
++}
++
++void  MacroAssembler::decode_heap_oop_not_null(Register dst, Register src) {
++//  stop("decode_heap_oop_not_null 2 not check lsp");
++      // Note: it will change flags
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_oop_shift() != 0) {
++    assert(LogMinObjAlignmentInBytes == Universe::narrow_oop_shift(), "decode alg wrong");
++    if (LogMinObjAlignmentInBytes == Address::times_8) {
++      lea(dst, Address(r12_heapbase, src, Address::times_8, 0));
++    } else {
++      if (dst != src) {
++        movl(dst, src);
++      }
++      slll(dst, LogMinObjAlignmentInBytes, dst);
++      if (Universe::narrow_oop_base() != NULL) {
++        addl(dst, r12_heapbase, dst);
++      }
++    }
++  } else {
++    assert (Universe::narrow_oop_base() == NULL, "sanity");
++    if (dst != src) {
++      movl(dst, src);
++    }
++  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register r) {
++  if (Universe::narrow_klass_base() != NULL) {
++    assert(r != rscratch3, "Encoding a klass in rcc");
++    set64(rscratch3, (int64_t)Universe::narrow_klass_base());
++    subl(r, rscratch3, r);
++  }
++  if (Universe::narrow_klass_shift() != 0) {
++    assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    srll(r, LogKlassAlignmentInBytes, r);
++  }
++//  if (Universe::narrow_klass_base() != NULL) {
++//    reinit_heapbase();
++//  }
++}
++
++void MacroAssembler::encode_klass_not_null(Register dst, Register src) {
++  if (dst == src) {
++    encode_klass_not_null(src);
++  } else {
++    if (Universe::narrow_klass_base() != NULL) {
++      set64(dst, (int64_t)Universe::narrow_klass_base());
++      subl(src, dst, dst);
++    } else {
++      movl(dst, src);
++    }
++    if (Universe::narrow_klass_shift() != 0) {
++      assert (LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      srll(dst, LogKlassAlignmentInBytes, dst);
++    }
++  }
++}
++
++// !!! If the instructions that get generated here change then function
++// instr_size_for_decode_klass_not_null() needs to get updated.
++void  MacroAssembler::decode_klass_not_null(Register r) {
++  // Note: it will change flags
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert(r != r12_heapbase, "Decoding a klass in r12");
++  // Cannot assert, unverified entry point counts instructions (see .ad file)
++  // vtableStubs also counts instructions in pd_code_size_limit.
++  // Also do not verify_oop as this is called by verify_oop.
++  if (Universe::narrow_klass_shift() != 0) {
++    assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++    slll(r, LogKlassAlignmentInBytes, r);
++  }
++  if (Universe::narrow_klass_base() != NULL) {
++    set64(r12_heapbase, (int64_t)Universe::narrow_klass_base());
++    addl(r, r12_heapbase, r);
++    reinit_heapbase();
++  }
++}
++
++void  MacroAssembler::decode_klass_not_null(Register dst, Register src) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++
++  if (dst == src) {
++    decode_klass_not_null(dst);
++  } else {
++    // Cannot assert, unverified entry point counts instructions (see .ad file)
++    // vtableStubs also counts instructions in pd_code_size_limit.
++    // Also do not verify_oop as this is called by verify_oop.
++    set64(dst, (int64_t)Universe::narrow_klass_base());
++    if (Universe::narrow_klass_shift() != 0) {
++      assert(LogKlassAlignmentInBytes == Universe::narrow_klass_shift(), "decode alg wrong");
++      assert(LogKlassAlignmentInBytes == Address::times_8, "klass not aligned on 64bits?");
++      lea(dst, Address(dst, src, Address::times_8, 0));
++    } else {
++      addl(dst, src, dst);
++    }
++  }
++}
++
++void MacroAssembler::emit_data(RelocationHolder const& rspec, int format) {
++    if (rspec.type() !=  relocInfo::none) {
++#ifdef ASSERT
++        //check_relocation(rspec, format);//sw will be wrong
++#endif
++        if (format == call32_operand){
++            ShouldNotReachHere();
++            code_section()->relocate(code_section()->end(), rspec, disp32_operand);
++        }
++        else
++        code_section()->relocate(code_section()->end(), rspec, format);
++    }
++}
++
++void MacroAssembler::mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec){
++    emit_data(rspec, narrow_oop_operand);
++    int16_t high = (imm32 - (int16_t)(imm32))>>16;
++    int16_t low = (int16_t)(imm32);
++    ldih(dst, high, R0);
++    ldi(dst, low, dst);
++    // if imm32=0x0000ffff, ldih/ldi will result in 0x10000ffff, so we must zapnot
++    zapnot(dst, 0xf, dst);
++}
++
++void  MacroAssembler::set_narrow_oop(Register dst, jobject obj) {
++  assert (UseCompressedOops, "should only be used for compressed headers");
++  assert (Universe::heap() != NULL, "java heap should be initialized");
++  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++  int oop_index = oop_recorder()->find_index(obj);
++  RelocationHolder rspec = oop_Relocation::spec(oop_index);
++  mov_narrow_oop(dst, oop_index, rspec);
++  //code_section()->relocate(pc(), rspec);
++  //prepare_patch_li48(dst, oop_index);
++}
++
++void  MacroAssembler::set_narrow_oop(Address dst, jobject obj) {
++  should_not_reach_here("set_narrow_oop");
++}
++
++void  MacroAssembler::cmp_narrow_oop(Register dst, jobject obj, Register ccReg) {
++  should_not_reach_here("cmp_narrow_oop");
++}
++
++void  MacroAssembler::cmp_narrow_oop(Address dst, jobject obj, Register ccReg) {
++  should_not_reach_here("cmp_narrow_oop");
++}
++
++void  MacroAssembler::set_narrow_klass(Register dst, Klass* k) {
++  assert (UseCompressedClassPointers, "should only be used for compressed headers");
++  assert (oop_recorder() != NULL, "this assembler needs an OopRecorder");
++  int klass_index = oop_recorder()->find_index(k);
++  RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++  mov_narrow_oop(dst,Klass::encode_klass(k),rspec);
++  //code_section()->relocate(pc(), rspec);
++  //prepare_patch_li48(dst, Klass::encode_klass(k));
++}
++
++void  MacroAssembler::set_narrow_klass(Address dst, Klass* k) {
++  should_not_reach_here("set_narrow_klass");
++}
++
++void  MacroAssembler::cmp_narrow_klass(Register dst, Klass* k, Register ccReg) {
++  should_not_reach_here("cmp_narrow_klass");
++}
++
++void  MacroAssembler::cmp_narrow_klass(Address dst, Klass* k, Register ccReg) {
++  should_not_reach_here("cmp_narrow_klass");
++}
++
++void MacroAssembler::reinit_heapbase() {
++  if (UseCompressedOops || UseCompressedClassPointers) {
++    if (Universe::heap() != NULL) {
++      if (Universe::narrow_oop_base() == NULL) {
++        movl(r12_heapbase, R0);
++      } else {
++        mov_immediate64(r12_heapbase, (int64_t)Universe::narrow_ptrs_base());
++      }
++    } else {
++      ldptr(r12_heapbase, ExternalAddress((address)Universe::narrow_ptrs_base_addr()));
++    }
++  }
++}
++
++// Search for str1 in str2 and return index or -1
++void MacroAssembler::string_indexof(Register str2, Register str1,
++                                    Register cnt2, Register cnt1,
++                                    Register tmp1, Register tmp2,
++                                    Register tmp3, Register tmp4,
++                                    Register tmp5, Register tmp6,
++                                    int icnt1, Register result, int ae) {
++  should_not_reach_here("string_indexof");
++}
++
++void MacroAssembler::string_indexof_char(Register str1, Register cnt1,
++                                         Register ch, Register result,
++                                         Register tmp1, Register tmp2, Register tmp3)
++{
++  should_not_reach_here("string_indexof_char");
++}
++
++// Compare strings.
++void MacroAssembler::string_compare(Register str1, Register str2,
++    Register cnt1, Register cnt2, Register result, Register tmp1, Register tmp2,
++    FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3, int ae) {
++  should_not_reach_here("string_compare");
++}
++
++// This method checks if provided byte array contains byte with highest bit set.
++void MacroAssembler::has_negatives(Register ary1, Register len, Register result) {
++  // a1: byte array
++  // a2: len
++  // v0: result
++  //ShortBranchVerifier sbv(this);
++  Register tmp1 = rscratch3;
++  assert_different_registers(ary1, len, result, tmp1);
++  //assert_different_registers(vec1, vec2);
++  Label TRUE_LABEL, FALSE_LABEL, DONE, COMPARE_CHAR, COMPARE_VECTORS, COMPARE_BYTE;
++
++  // len == 0
++  //testl(len, len);
++  jcc(Assembler::zero, FALSE_LABEL, len);
++
++  movwu(result, len); // copy
++  
++  // Compare 4-byte vectors
++  andw(len, 0xfffffffc, len); // vector count (in bytes)
++  jcc(Assembler::zero, COMPARE_CHAR, len);
++
++  lea(ary1, Address(ary1, len, Address::times_1));
++  negptr(len);
++
++  bind(COMPARE_VECTORS);
++  ldwu(tmp1, Address(ary1, len, Address::times_1));
++  andw(tmp1, 0x80808080, tmp1);
++  jcc(Assembler::notZero, TRUE_LABEL, tmp1);
++  addptr(len, 4, len);
++  jcc(Assembler::notZero, COMPARE_VECTORS, len);
++
++  // Compare trailing char (final 2 bytes), if any
++  bind(COMPARE_CHAR);
++  testl(result, 0x2);   // tail  char
++  jcc(Assembler::zero, COMPARE_BYTE);
++  load_unsigned_short(tmp1, Address(ary1, 0));
++  andw(tmp1, 0x00008080, tmp1);
++  jcc(Assembler::notZero, TRUE_LABEL, tmp1);
++  subptr(result, 2, result);
++  lea(ary1, Address(ary1, 2));
++
++  bind(COMPARE_BYTE);
++  testw(result, 0x1);   // tail  byte
++  jcc(Assembler::zero, FALSE_LABEL);
++  load_unsigned_byte(tmp1, Address(ary1, 0));
++  andw(tmp1, 0x00000080, tmp1);
++  jcc(Assembler::notEqual, TRUE_LABEL, tmp1);
++  jmp(FALSE_LABEL);
++
++  bind(TRUE_LABEL);
++  mov_immediate32u(result, 1);   // return true
++  jmp(DONE);
++
++  bind(FALSE_LABEL);
++  mov_immediate32u(result, 0); // return false
++
++  // That's it
++  bind(DONE);
++}
++
++void MacroAssembler::arrays_equals(Register a1, Register a2, Register tmp3,
++                                   Register tmp4, Register tmp5, Register result,
++                                   Register cnt1, int elem_size) {
++  should_not_reach_here("arrays_equals not implement");
++}
++
++// Intrinsic for sun/nio/cs/ISO_8859_1$Encoder.implEncodeISOArray and
++// java/lang/StringUTF16.compress.
++void MacroAssembler::encode_iso_array(Register src, Register dst,
++                      Register len, Register result,
++                      FloatRegister Vtmp1, FloatRegister Vtmp2,
++                      FloatRegister Vtmp3, FloatRegister Vtmp4)
++{
++  should_not_reach_here("encode_iso_array not implement");
++}
++
++/**
++ * Helpers for multiply_to_len().
++ */
++void MacroAssembler::add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                                     Register src1, Register src2) {
++  ShouldNotReachHere();
++}
++
++/**
++ * Multiply 64 bit by 64 bit first loop.
++ */
++void MacroAssembler::multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                                           Register y, Register y_idx, Register z,
++                                           Register carry, Register product,
++                                           Register idx, Register kdx) {
++  ShouldNotReachHere();
++}
++
++/**
++ * Multiply 128 bit by 128. Unrolled inner loop.
++ *
++ */
++void MacroAssembler::multiply_128_x_128_loop(Register y, Register z,
++                                             Register carry, Register carry2,
++                                             Register idx, Register jdx,
++                                             Register yz_idx1, Register yz_idx2,
++                                             Register tmp, Register tmp3, Register tmp4,
++                                             Register tmp6, Register product_hi) {
++  ShouldNotReachHere();
++}
++
++/**
++ * Code for BigInteger::multiplyToLen() instrinsic.
++ *
++ * i0: x
++ * i1: xlen
++ * i2: y
++ * i3: ylen
++ * i4:  z
++ * i5: zlen
++ * i10: tmp1
++ * i11: tmp2
++ * i12: tmp3
++ * i13: tmp4
++ * i14: tmp5
++ * i15: tmp6
++ * i16: tmp7
++ *
++ */
++void MacroAssembler::multiply_to_len(Register x, Register xlen, Register y, Register ylen,
++                                     Register z, Register zlen,
++                                     Register tmp1, Register tmp2, Register tmp3, Register tmp4,
++                                     Register tmp5, Register tmp6, Register product_hi) {
++  ShouldNotReachHere();
++}
++
++// Code for BigInteger::mulAdd instrinsic
++// out     = i0
++// in      = i1
++// offset  = i2  (already out.length-offset)
++// len     = i3
++// k       = i4
++//
++// pseudo code from java implementation:
++// carry = 0;
++// offset = out.length-offset - 1;
++// for (int j=len-1; j >= 0; j--) {
++//     product = (in[j] & LONG_MASK) * kLong + (out[offset] & LONG_MASK) + carry;
++//     out[offset--] = (int)product;
++//     carry = product >>> 32;
++// }
++// return (int)carry;
++void MacroAssembler::mul_add(Register out, Register in, Register offset,
++      Register len, Register k) {
++  ShouldNotReachHere();
++}
++
++/**
++ * Emits code to update CRC-32 with a byte value according to constants in table
++ *
++ * @param [in,out]crc   Register containing the crc.
++ * @param [in]val       Register containing the byte to fold into the CRC.
++ * @param [in]table     Register containing the table of crc constants.
++ *
++ * uint32_t crc;
++ * val = crc_table[(val ^ crc) & 0xFF];
++ * crc = val ^ (crc >> 8);
++ *
++ */
++void MacroAssembler::update_byte_crc32(Register crc, Register val, Register table) {
++  xorr(val, crc, val);
++  and_ins(val, 0xFF, val);
++  srll(crc, 8, crc); // unsigned shift
++//  zapnot(crc, 0xF, crc);
++  
++  dsll(AT, val, Address::times_4);
++  addl(table, AT, AT);
++  ldw(AT, 0, AT);
++  zapnot(AT, 0xF, AT);
++  xorr(crc, AT, crc);
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param table register that will contain address of CRC table
++ * @param tmp   scratch register
++ */
++void MacroAssembler::kernel_crc32(Register crc, Register buf, Register len,
++        Register table0, Register table1, Register table2, Register table3,
++        Register tmp, Register tmp2, Register tmp3) {
++  ShouldNotReachHere();
++}
++
++// Compress char[] array to byte[].
++void MacroAssembler::char_array_compress(Register src, Register dst, Register len,
++                                         FloatRegister tmp1Reg, FloatRegister tmp2Reg,
++                                         FloatRegister tmp3Reg, FloatRegister tmp4Reg,
++                                         Register result) {
++  should_not_reach_here("char_array_compress");
++}
++
++// Inflate byte[] array to char[].
++void MacroAssembler::byte_array_inflate(Register src, Register dst, Register len,
++                                        FloatRegister vtmp1, FloatRegister vtmp2, FloatRegister vtmp3,
++                                        Register tmp4) {
++  should_not_reach_here("byte_array_inflate");
++}
++
++SkipIfEqual::SkipIfEqual(
++    MacroAssembler* masm, const bool* flag_addr, bool value) {
++  _masm = masm;
++  _masm->mov(AT, (address)flag_addr);
++  _masm->ldbu(AT, 0, AT);
++  _masm->addiu(AT, -value, AT);
++  _masm->beq_l(AT,_label);
++}
++
++SkipIfEqual::~SkipIfEqual() {
++  _masm->bind(_label);
++}
++
++// get_thread() can be called anywhere inside generated code so we
++// need to save whatever non-callee save context might get clobbered
++// by the call to JavaThread::sw64_get_thread_helper() or, indeed,
++// the call setup code.
++//
++// sw64_get_thread_helper() clobbers only i0, i1, and flags.
++//
++void MacroAssembler::get_thread(Register thread) {
++  pushad(thread);
++  MacroAssembler::call_VM_leaf_base(CAST_FROM_FN_PTR(address, Thread::current), 0);
++
++  int off;//depending on the sd sequence in pushad();
++
++  /*
++   * in [assembler_sw64.cpp] pushad(), F12 is inserted between A7 and T0.
++   * Therefore, the offsets before A7 need to be adjusted by 8 bytes.
++   *
++   * NOTE: I have tried removing the push action of F12 from pushad(), but failed.
++   * Maybe other modules in Hotspot depend on this special layout.
++   */
++  move(thread, V0);
++  popad(thread);
++}
++
++//---------------------------------------------------------------------------------------------------------------
++
++Register temp_regs[] = {T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, A0, A1, A2, A3, A4, A5, GP, V0, AT};
++void MacroAssembler::saveTRegisters(){
++  int i, index;
++
++  // Fixed-point registers
++  int len = sizeof(temp_regs) / sizeof(temp_regs[0]);
++
++  addiu(esp, -1 * len * wordSize, esp);
++  for (i = 0, index = len - 1; i < len; i++) {
++        stl(temp_regs[i], index * wordSize, esp);
++        index--; //index not equal i
++  }
++
++}
++
++void MacroAssembler::restoreTRegisters(){
++  int i, index;
++  /* Fixed-point registers */
++  int len = sizeof(temp_regs) / sizeof(temp_regs[0]);
++  for (i = len-1, index = 0; i >= 0; i--) {
++        ldl(temp_regs[i], index * wordSize, esp);
++        index++;
++  }
++  addiu(esp, index * wordSize, esp);
++}
++
++Register caller_saved_registers[] = {V0, T0, T1, T2, T3, T4, T5, T6, T7, rfp, A0, A1, A2, A3, A4, A5, T8, T9, T10, T11, RA, T12, AT, GP};
++
++// In SW64, F0~23 are all caller-saved registers
++FloatRegister caller_saved_fpu_registers[] = {f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15, F16, F17, f18, f19, f20, f21, f22, f23};
++
++//We preserve all caller-saved register
++void  MacroAssembler::pushad(Register skip){
++  int i, index;
++
++  // Fixed-point registers
++  int len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  //int stack_len = skip == noreg ? len : (len-1);
++  
++  for (i = 0, index = 1; i < len; i++) {
++    if (skip != caller_saved_registers[i]) { 
++        stl(caller_saved_registers[i], -1 * index * wordSize, esp);
++        index++; //index not equal i
++    }
++  }
++  addiu(esp, -1 * (index-1) * wordSize, esp);
++
++  /* Floating-point registers */
++  len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  addiu(esp, -1 * len * wordSize, esp);
++  for (i = 0; i < len; i++) {
++    fstd(caller_saved_fpu_registers[i], (len - i - 1) * wordSize, esp); 
++  }
++};
++
++void  MacroAssembler::popad(Register skip){
++  int i, index;
++
++  /* Floating-point registers */
++  int len = sizeof(caller_saved_fpu_registers) / sizeof(caller_saved_fpu_registers[0]);
++  for (i = 0; i < len; i++) {
++    fldd(caller_saved_fpu_registers[i], (len - i - 1) * wordSize, esp);
++  }
++  addiu(esp, len * wordSize, esp);
++
++  /* Fixed-point registers */
++  len = sizeof(caller_saved_registers) / sizeof(caller_saved_registers[0]);
++  for (i = len-1, index = 0; i >= 0; i--) {
++    if (skip != caller_saved_registers[i]) {   
++        ldl(caller_saved_registers[i], index * wordSize, esp);
++        index++;
++    }
++  }
++  addiu(esp, index * wordSize, esp);
++};
++
++
++
++void MacroAssembler::notw(Register rd, Register rs) {
++  ornot(R0, rs, rd);
++//  zapnot(rd, 0xf, rd);
++}
++
++/**
++ * x86
++ *  Assembler::movl(Register dst, Address src)
++ * sw64
++ *  MacroAssembler::ldws(Register rd, Address addr)
++ * note
++ *  load 32bit into reg.
++ *  for x86 the reg can be viewed just as 32bit, they don't care the msb32 since their instructions can operate 32bit directly.
++ *  for sw64 the msb32 cares and ldws sign extend into msb32.
++ *  it's recommend to use ldws to substitue movl when transplanting, except for the ocassions mentioned in ldwu
++ */
++void MacroAssembler::ldws(Register rd, Address addr) {
++  ldw(rd, addr);
++}
++
++/**
++ * x86
++ *  Assembler::movl(Register dst, Address src)
++ * sw64
++ *  MacroAssembler::ldwu(Register rd, Address addr)
++ * note
++ *  load 32bit into reg.
++ *  for x86 the reg can be viewed just as 32bit, they don't care the msb32 since their instructions can operate 32bit directly.
++ *  for sw64 the msb32 cares and ldwu zero the msb32.
++ *  if rd is loaded as a flag, a status, a mode, following by a test, a and, we must use ldwu
++ */
++void MacroAssembler::ldwu(Register rd, Address addr) {
++  ldw(rd, addr);
++  zapnot(rd, 0xf, rd);
++}
++
++void MacroAssembler::ldptr(Register rd, Address addr, Register tmp) {
++  ldl(rd, addr);
++}
++
++/**
++ * x86
++ *  MacroAssembler::movptr(Address dst, Register src)
++ * sw64
++ *  MacroAssembler::stptr(Register rd, Address addr, Register tmp=rcc)
++ * note
++ *  rd can't be same with tmp
++ */
++void MacroAssembler::stptr(Register rd, Address addr, Register tmp) {
++  assert_different_registers(rd, tmp);
++  stl(rd, addr, tmp);
++}
++
++void MacroAssembler::addptr(Register rd, Address addr) {
++  assert_different_registers(rd, rcc);
++  ldptr(rcc, addr);
++  addptr(rd, rcc, rd);
++}
++
++/**
++ * x86
++ *  no corresponding
++ * sw64
++ *  MacroAssembler::ldws(Register rd, AddressLiteral addr)
++ * note
++ *  use ldws ASAP
++ */
++void MacroAssembler::ldws(Register rd, AddressLiteral addr) {
++  mov_immediate64(rd, (intptr_t)addr.target(), addr.rspec());
++  ldw(rd, 0, rd);
++}
++
++/**
++ * x86
++ *  Assembler::
++ * sw64
++ *  MacroAssembler::ldwu(Register rd, AddressLiteral addr)
++ * note
++ *  use when load a flag/status/mode
++ */
++void MacroAssembler::ldwu(Register rd, AddressLiteral addr) {
++  ldws(rd, addr);
++  zapnot(rd, 0xf, rd);
++}
++
++/**
++ * x86
++ *  movptr
++ * sw64
++ *  ldptr
++ * note
++ *  same
++ */
++void MacroAssembler::ldptr(Register rd, AddressLiteral addr) {
++  mov_immediate64(rd, (intptr_t)addr.target(), addr.rspec());
++  ldl(rd, 0, rd);
++}
++
++/**
++ * x86
++ *  jmp
++ * sw64
++ *  jmp(Address rd, Register tmp=T12)
++ * note
++ *  sw use t12 as jump target, especially when jump into runtime
++ */
++void MacroAssembler::jmp(Address rd, Register tmp) {
++  ldl(T12, rd);
++  Assembler::jmp(tmp, T12, 0);// set ra=AT for debug
++}
++
++/**
++ * x86
++ *  jmp
++ * sw64
++ *  jmp(Register rd, Register tmp=T12);
++ * note
++ *  sw use AT as link reg for debug
++ */
++void MacroAssembler::jmp(Register rd, Register tmp) {
++  assert_different_registers(rd, tmp);
++  if (rd != T12)
++    movl(T12, rd);
++  Assembler::jmp(tmp, T12, 0);// set ra=tmp for debug
++}
++
++void MacroAssembler::jmp(Label& lbl) {
++  beq_l(R0, lbl);
++}
++
++/**
++ * x86
++ *  Assembler::movzwl(Register dst, Address src)
++ * sw64
++ *  MacroAssembler::ldhu_unaligned(Register rd, Address addr, Register tmp=rcc)
++ * note
++ *  load and zero-extend a 16bit into a reg.
++ *  movzwl and ldhu_unaligned are all little endian, so maybe have to swap in some occasion.
++ *  x86 zero-extends a 16bit into 32bit, sw64 zero-extends 16bit into a 64bit reg.
++ *  tmp can't be same with rd.
++ */
++void MacroAssembler::ldhu_unaligned(Register rd, Address addr, Register tmp) {
++  assert_different_registers(rd, tmp);
++  lea(tmp, addr);
++  Assembler::ldbu(rd, 1, tmp);
++  slll(rd, 8, rd);
++  Assembler::ldbu(tmp, 0, tmp);
++  bis(tmp, rd, rd);
++}
++
++/**
++ * x86
++ *  Assembler::movzwl(Register dst, Address src)
++ * sw64
++ *  MacroAssembler::ldhu_unaligned_be(Register rd, Address addr, Register tmp=rcc)
++ * note
++ *  load and zero-extend a 16bit into a reg.
++ *  movzwl is little endian, so have to bswapl after movzwl.
++ *  ldhu_unaligned_be is big endian, so don't have to swap.
++ *  x86 zero-extend a 16bit into 32bit, we zero-extend into a 64bit reg.
++ *  tmp can't be same with rd.
++ */
++void MacroAssembler::ldhu_unaligned_be(Register rd, Address addr, Register tmp) {
++//  Assembler::ldhu(rd, addr);// unaligned exception may occur here
++  assert_different_registers(rd, tmp);
++  lea(tmp, addr);
++  Assembler::ldbu(rd, 1, tmp);
++  Assembler::ldbu(tmp, 0, tmp);
++  slll(tmp, 8, tmp);
++  bis(tmp, rd, rd);
++}
++
++void MacroAssembler::cmove(Condition cc, Register dst, Register src1, Register src2, Register ccReg) {
++  switch(cc) {
++//    case equal:
++    case zero:
++      seleq(ccReg, src1, src2, dst);
++      break;
++//    case notEqual:
++    case notZero:
++      selne(ccReg, src1, src2, dst);
++      break;
++    case greaterEqual:
++    case aboveEqual:
++      selge(ccReg, src1, src2, dst);
++      break;
++    case greater:
++    case positive:
++      selgt(ccReg, src1, src2, dst);
++      break;
++    case lessEqual:
++      selle(ccReg, src1, src2, dst);
++      break;
++    case less:
++    case below:
++      sellt(ccReg, src1, src2, dst);
++      break;
++      
++    case success:
++      selne(ccReg, src1, src2, dst);
++      break;
++      
++    case failed:
++      ShouldNotReachHere();
++      break;
++    default:
++      Unimplemented();
++  }
++}
++
++// Patch any kind of instruction; there may be several instructions.
++// Return the total length (in bytes) of the instructions.
++int MacroAssembler::pd_patch_instruction_size(address branch, address target) {
++  ShouldNotReachHere();
++  return 0;
++}
++
++int MacroAssembler::patch_oop(address insn_addr, address o) {
++  ShouldNotReachHere();
++  return 0;
++}
++
++int MacroAssembler::patch_narrow_klass(address insn_addr, narrowKlass n) {
++  ShouldNotReachHere();
++  return 0;
++}
++
++void MacroAssembler::prepare_patch_li48(Register rd, long imm) {
++  assert_not_delayed();
++  assert(is_simm16(imm >> 32), "Not a 48-bit address");
++
++  int16_t msb_l, lsb_h, lsb_l;
++  NativeInstruction::imm48_split(imm, msb_l, lsb_h, lsb_l);
++  block_comment(";;li48 {");
++  ldi(rd, msb_l, R0);
++  slll(rd, 32, rd);
++  ldih(rd, lsb_h, rd);
++  ldi(rd, lsb_l, rd);
++  char buf[50];
++  sprintf(buf, "0x%lx }", imm);
++  block_comment(buf);
++}
++
++address MacroAssembler::target_addr_for_insn(address insn_addr, unsigned insn) {
++  ShouldNotReachHere();
++  return 0;
++}
++
++//without check, maybe fixed
++int MacroAssembler::patched_branch(int dest_pos, int inst, int inst_pos) {
++  int m; // mask for displacement field
++  int v; // new value for displacement field
++  const int word_aligned_ones = -4;
++  if (sw2_arith_op(inst) == op_addpi) {
++      m = ins_mask(25, 13);
++      v = wdisp2(dest_pos, 4, 25, 13);
++      return  inst & ~m  |  v;
++    }
++  switch (sw2_op(inst)) {
++    default: ShouldNotReachHere();
++    case op_ret:
++    case op_jmp:
++    case op_call:   m = wdisp(word_aligned_ones, 0, 16); v = wdisp(  dest_pos, inst_pos+4, 16);    break;
++    case op_br:
++    case op_bsr:
++    case op_beq:
++    case op_bne:
++    case op_blt:
++    case op_ble:
++    case op_bgt:
++    case op_bge:
++    case op_blbc:
++    case op_blbs:
++    case op_fbeq:
++    case op_fbne:
++    case op_fblt:
++    case op_fble:
++    case op_fbgt:
++    case op_fbge:   m = wdisp(word_aligned_ones, 0, 21); v = wdisp(  dest_pos, inst_pos+4, 21);  break;
++    case op_ldi:    m = simm(-1, 16);                      v = simm(dest_pos-inst_pos, 16);          break;
++  //  case op_addpi:  m = ins_mask(25, 13);                v = wdisp2(dest_pos, inst_pos + 4, 25, 13);
++  }
++
++  return  inst & ~m  |  v;
++}
++
++// used registers :  T0, T1
++void MacroAssembler::verify_oop_subroutine() {
++  // RA: ra
++  // A0: char* error message
++  // A1: oop   object to verify
++
++  Label exit, error;
++  // increment counter
++  mov(T0, (long)StubRoutines::verify_oop_count_addr());
++  ldw(AT, 0, T0);
++  addiu(AT, 1, AT);
++  stw(AT, 0, T0);
++
++  // make sure object is 'reasonable'
++  beq_l(A1, exit);         // if obj is NULL it is ok
++
++  // Check if the oop is in the right area of memory
++  //const int oop_mask = Universe::verify_oop_mask();
++  //const int oop_bits = Universe::verify_oop_bits();
++  const uintptr_t oop_mask = Universe::verify_oop_mask();
++  const uintptr_t oop_bits = Universe::verify_oop_bits();
++  if (Assembler::is_simm8(oop_mask)) {
++    and_ins(A1, oop_mask, T0);
++  } else {
++    mov(AT, oop_mask);
++    and_ins(A1, AT, T0);
++  }
++  if (Assembler::is_simm8(oop_bits)) {
++    cmpeq(T0, oop_bits, AT);
++    beq(AT, offset(target(error)));
++  } else {
++    mov(AT, oop_bits);
++    bne_c(T0, AT, error);
++  }
++
++  // make sure klass is 'reasonable'
++  //add for compressedoops
++  reinit_heapbase();
++  //add for compressedoops
++  load_klass(T0, A1);
++  beq_l(T0, error);                        // if klass is NULL it is broken
++  // return if everything seems ok
++  BIND(exit);
++
++  ret_sw();
++
++  // handle errors
++  BIND(error);
++  pushad();
++  call_patch(CAST_FROM_FN_PTR(address, MacroAssembler::debug), relocInfo::runtime_call_type);
++  popad();
++  ret_sw();
++}
++
++// MacroAssembler protected routines needed to implement
++// public methods
++
++//void MacroAssembler::mov(Register r, Address dest) {
++//  code_section()->relocate(pc(), dest.rspec());
++//  u_int64_t imm64 = (u_int64_t)dest.target();
++////  movptr(r, imm64);
++//}
++
++// Move a constant pointer into r.  In Sw64 mode the virtual
++// address space is 48 bits in size, so we only need three
++// instructions to create a patchable instruction sequence that can
++// reach anywhere.
++//void MacroAssembler::movptr(Register r, long imm64) {
++//  assert_not_delayed();
++//  assert(is_simm16(imm64 >> 32), "Not a 48-bit address");
++//  
++//  int16_t msb_l, lsb_h, lsb_l;
++//  imm48_split(imm64, msb_l, lsb_h, lsb_l);
++//  ldi(r, msb_l, R0);
++//  slll(r, 32, r);
++//  ldih(r, lsb_h, r);
++//  ldi(r, lsb_l, r);
++//}
++
++// must get argument(a double) in F16/F17
++//void MacroAssembler::trigfunc(char trig, bool preserve_cpu_regs, int num_fpu_regs_in_use) {
++//We need to preseve the register which maybe modified during the Call
++void MacroAssembler::trigfunc(char trig, int num_fpu_regs_in_use) {
++//save all modified register here
++//FIXME, in the disassembly of tirgfunc, only used V0,T4,T12, SP,RA,so we ony save V0,T4,T12
++  pushad();
++//we should preserve the stack space before we call
++  addiu(esp, -wordSize * 2, esp);
++  switch (trig){
++    case 's' :
++      call_patch( CAST_FROM_FN_PTR(address, SharedRuntime::dsin), relocInfo::runtime_call_type );
++      break;
++    case 'c':
++      call_patch( CAST_FROM_FN_PTR(address, SharedRuntime::dcos), relocInfo::runtime_call_type );
++      break;
++    case 't':
++      call_patch( CAST_FROM_FN_PTR(address, SharedRuntime::dtan), relocInfo::runtime_call_type );
++      break;
++    default:assert (false, "bad intrinsic");
++      break;
++  }
++  addiu(esp, wordSize * 2, esp);
++  popad();
++}
++
++/**
++ * x86
++ *  Assembler::movl(Address dst, int32_t imm32)
++ * sw64
++ *  MacroAssembler::stw(int src, Address dst, Register tmp=rcc)
++ * note
++ *  store a imm32 to a Address. only support base_plus_disp type Address. tmp can be any reg.
++ */
++void MacroAssembler::stw(int imm32, Address dst, Register tmp) {
++  if (dst.getMode() == Address::base_plus_disp) {
++    mov_immediate32(tmp, imm32);
++    stw(tmp, dst);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64) {
++//  int32_t lsb32 = (int32_t) (imm64);
++//  int32_t msb32 = (int32_t) ((imm64 - lsb32) >> 32);
++//  int16_t msb_h = (msb32-(int16_t)msb32) >> 16;
++//  int16_t msb_l = (int16_t)msb32;
++//  int16_t lsb_h = (lsb32-(int16_t)lsb32) >> 16;
++//  int16_t lsb_l = (int16_t)lsb32;
++//  block_comment(";;imm64 {");
++//  if(msb_h == 0) {
++//    ldi(dst, msb_l, R0);
++//    } else {
++//    ldih(dst, msb_h, R0);
++//    if(msb_l != 0)
++//      ldi(dst, msb_l, dst);
++//    }
++//  slll(dst, 32, dst);
++//  if( ((int)lsb_h == -32768) && (lsb_l < 0) ) {
++//    ldih(dst, 0x4000, dst);// yj todo
++//    ldih(dst, 0x4000, dst);
++//    ldi(dst, lsb_l, dst);
++//      } else {
++//    ldih(dst, lsb_h, dst);
++//    ldi(dst, lsb_l, dst);
++//      }
++//  char buf[50];
++//  sprintf(buf, "0x%lx }", imm64);
++//  block_comment(buf);
++  int32_t lo = (int32_t) (imm64);
++  int32_t hi = (int32_t) ((imm64 - lo) >> 32);
++
++  int16_t lo_h16 = (lo - (int16_t)(lo))>>16;
++  int16_t lo_l16 = (int16_t)(lo);
++  int16_t hi_h16 = (hi - (int16_t)(hi))>>16;
++  int16_t hi_l16 = (int16_t)(hi);
++  block_comment(";;imm64 {");
++  if ( is_simm16(imm64) ) {
++    ldi(dst, imm64, R0);
++  } else if ( hi != 0 ) {
++    if ( is_simm16(hi) ) {
++      ldi(dst, hi, R0);
++    } else {
++      ldih(dst, hi_h16, R0);
++      if (hi_l16 != 0)
++        ldi(dst, hi_l16, dst);
++    }
++    slll(dst, 32, dst);
++    if ( lo != 0 ) {
++      if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) {
++        // original val was in range 0x7FFF8000..0x7FFFFFFF
++        ldih(dst, 0x4000, dst);
++        ldih(dst, 0x4000, dst);
++        if (lo_l16 != 0)
++          ldi(dst, lo_l16, dst);
++      } else {
++        ldih(dst, lo_h16, dst);
++        if (lo_l16 != 0)
++          ldi(dst, lo_l16, dst);
++      }
++    }
++  } else if ( (hi == 0) && (lo != 0) ) {
++    if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) {
++      // original val was in range 0x7FFF8000..0x7FFFFFFF
++      /* ldih(d, lo_h16, R0);
++       * ldi(d, lo_l16, d);
++       * addw(d, 0, d); */
++      ldih(dst, 0x4000, R0);
++      ldih(dst, 0x4000, dst);
++      if (lo_l16 != 0)
++        ldi(dst, lo_l16, dst);
++    } else {
++      ldih(dst, lo_h16, R0);
++      if (lo_l16 != 0)
++        ldi(dst, lo_l16, dst);
++    }
++  } else {
++    tty->print_cr("value = 0x%lx", imm64);
++    guarantee(false, "Not supported yet in set64!");
++  }
++  char buf[50];
++  sprintf(buf, "0x%lx }", imm64);
++  block_comment(buf);
++}
++
++/**
++ * x86
++ *  Assembler::mov_literal64(Register, long, RelocationHolder const&)
++ * sw64
++ *  MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format
++ * note
++ *  x86's imm64 is just following the opcode, while sw64 is split and embeded in the ldi/sll/ldih/ldi seq.
++ *  x86's imm64 format is set when mov_literal64 invoke emit_data64. sw's formate is set here.
++ */
++void MacroAssembler::mov_immediate64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format) {
++  InstructionMark im(this);
++  assert(inst_mark() != NULL, "must be inside InstructionMark");
++  // Do not use AbstractAssembler::relocate, which is not intended for
++  // embedded words.  Instead, relocate to the enclosing instruction.
++  code_section()->relocate(inst_mark(), rspec, format);
++#ifdef ASSERT
++  check_relocation(rspec, format);
++#endif
++  
++  assert(imm64 <= ((intptr_t(1) << 48) - 1),  "imm64 is too large");
++  prepare_patch_li48(dst, imm64);
++}
++
++void MacroAssembler::mov_address64(Register dst, u_int64_t imm64, RelocationHolder const &rspec, int format) {
++  InstructionMark im(this);
++  assert(inst_mark() != NULL, "must be inside InstructionMark");
++  // Do not use AbstractAssembler::relocate, which is not intended for
++  // embedded words.  Instead, relocate to the enclosing instruction.
++  code_section()->relocate(inst_mark(), rspec, format);
++#ifdef ASSERT
++  check_relocation(rspec, format);
++#endif
++  if (SafePatch) {
++    if (offset() % 8 == 0) {
++      nop();
++      br(T12, 2);
++      emit_int64((long) imm64);
++      ldl(T12, 0, T12);
++    } else {
++      br(T12, 2);
++      emit_int64((long) imm64);
++      ldl(T12, 0, T12);
++      nop();
++    }
++  } else {
++    assert(imm64 <= ((intptr_t(1) << 48) - 1), "imm64 is too large");
++    prepare_patch_li48(dst, imm64);
++  }
++}
++// zero extend imm32 into dst
++void MacroAssembler::mov_immediate32(Register dst, int imm32) {
++  if (imm32>=0 && imm32 < (1<<15)) {
++    // if imm32=0x0000ffff, ldi will result in 0xf..fffff since it's sign extened
++    // so imm32 must less then 1<<15, not 1<<16
++    ldi(dst, imm32, R0);
++  } else {
++    int16_t high = (imm32 - (int16_t)(imm32))>>16;
++    int16_t low = (int16_t)(imm32);
++    ldih(dst, high, R0);
++    ldi(dst, low, dst);
++    // if imm32=0x0000ffff, ldih/ldi will result in 0x10000ffff, so we must zapnot
++    zapnot(dst, 0xf, dst);
++  }
++}
++
++// zero extend imm32 into dst
++void MacroAssembler::mov_immediate32u(Register dst, int imm32) {
++  if (imm32>=0 && imm32 < (1<<15)) {
++    // if imm32=0x0000ffff, ldi will result in 0xf..fffff since it's sign extened
++    // so imm32 must less then 1<<15, not 1<<16
++    ldi(dst, imm32, R0);
++  } else {
++    int16_t high = (imm32 - (int16_t)(imm32))>>16;
++    int16_t low = (int16_t)(imm32);
++    ldih(dst, high, R0);
++    ldi(dst, low, dst);
++    // if imm32=0x7fffffff, high=0x8000, low=0xffff ldih/ldi will result in 0xffffffff 7fffffff, so we must zapnot
++//    if( ((int)high == (-32768)) && (low < 0) ) //TODO CHECK lsp:  if((imm32<0) || (((int)high == (-32768)) && (low < 0)))
++    zapnot(dst, 0xf, dst);
++  }
++}
++// signed extend imm32 into dst
++void MacroAssembler::mov_immediate32s(Register dst, int imm32) {
++//  if (imm32>=0 && imm32 < (1<<15)) {
++//    // if imm32=0x0000ffff, ldi will result in 0xf..fffff since it's sign extened
++//    // so imm32 must less then 1<<15, not 1<<16
++//    ldi(dst, imm32, R0);
++//  } else {
++//    int16_t high = (imm32 - (int16_t)(imm32))>>16;
++//    int16_t low = (int16_t)(imm32);
++//    ldih(dst, high, R0);
++//    ldi(dst, low, dst);
++//    // if imm32=0x7fffffff, high=0x8000,low=0xffff ldih/ldi will result in 0xffffffff 7fffffff, so we must addw
++//    if( ((int)high == (-32768)) && (low < 0) )
++//      addw(dst, R0, dst);
++//  }
++  assert(is_simm(imm32, 32), "imm should be simm32 in MacroAssembler::li32");
++  int16_t high = (imm32 - (int16_t)(imm32))>>16;
++  int16_t low = (int16_t)(imm32);
++  if(is_simm16(imm32)){
++    ldi(dst, imm32, R0);
++  } else {
++    ldih(dst, high, R0);
++    ldi(dst, low, dst);
++    if( ((int)high == (-32768)) && (low < 0) )
++      addw(dst, R0, dst);
++  }
++}
++
++void MacroAssembler::hswap(Register reg) {
++  if (UseSW8A) {
++    revbh(reg, reg);
++    sexth(reg, reg);
++  } else {
++    srll(reg, 8, AT);
++    slll(reg, 24, reg);
++    addw(reg, 0, reg);
++    sral(reg, 16, reg);
++    or_ins(reg, AT, reg);
++  }
++}
++
++void MacroAssembler::huswap(Register reg) {
++  if (UseSW8A) {
++    revbh(reg, reg);
++  } else {
++    srll(reg, 8, AT);
++    slll(reg, 8, reg);
++    zapnot(reg, 0x2, reg);
++    or_ins(reg, AT, reg);
++  }
++}
++
++// something funny to do this will only one more register AT
++// 32 bits
++void MacroAssembler::swap(Register reg) {
++  if (UseSW8A) {
++    revbw(reg, reg);
++  } else {
++    assert_different_registers(reg, AT);
++    zapnot(reg, 0xf, reg);
++    srll(reg, 8, AT);
++    slll(reg, 24, reg);
++    or_ins(reg, AT, reg);
++    srll(AT, 16, AT);
++    xor_ins(AT, reg, AT);
++    and_ins(AT, 0xff, AT);
++    xor_ins(reg, AT, reg);
++    slll(AT, 16, AT);
++    xor_ins(reg, AT, reg);
++    addw(reg, 0x0, reg);
++  }
++}
++
++void MacroAssembler::bswapw(Register reg) {
++    swap(reg);
++}
++
++void MacroAssembler::boundary_test(FloatRegister ft, Register res){
++  Register tmp1 = AT;
++  Register tmp2 = GP;
++  fimovd(ft,tmp1);
++  slll(tmp1, 0x1, tmp2);
++  srll(tmp2, 53, tmp2);
++  ldi(tmp1, 2047, R0);
++  subl(tmp2, tmp1, res);
++}
++
++void MacroAssembler::set64(Register d, long value) {
++  // yj todo: check and merge with mov_immediate64
++  assert_not_delayed();
++
++  int32_t lo = (int32_t) (value);
++  int32_t hi = (int32_t) ((value - lo) >> 32);
++
++  int16_t lo_h16 = (lo - (int16_t)(lo))>>16;
++  int16_t lo_l16 = (int16_t)(lo);
++  int16_t hi_h16 = (hi - (int16_t)(hi))>>16;
++  int16_t hi_l16 = (int16_t)(hi);
++
++  if ( is_simm16(value) ) {
++    ldi(d, value, R0);
++  } else if ( hi != 0 ) {
++    if ( is_simm16(hi) ) {
++      ldi(d, hi, R0);
++    } else {
++      ldih(d, hi_h16, R0);
++      if (hi_l16 != 0)
++        ldi(d, hi_l16, d);
++    }
++    slll(d, 32, d);
++    if ( lo != 0 ) {
++      if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) {
++        // original val was in range 0x7FFF8000..0x7FFFFFFF
++        ldih(d, 0x4000, d);
++        ldih(d, 0x4000, d);
++        if (lo_l16 != 0)
++          ldi(d, lo_l16, d);
++      } else {
++        ldih(d, lo_h16, d);
++        if (lo_l16 != 0)
++          ldi(d, lo_l16, d);
++      }
++    }
++  } else if ( (hi == 0) && (lo != 0) ) {
++    if ( ((int)lo_h16 == -32768) && ((int)lo_l16 < 0)) {
++      // original val was in range 0x7FFF8000..0x7FFFFFFF
++      /* ldih(d, lo_h16, R0);
++       * ldi(d, lo_l16, d);
++       * addw(d, 0, d); */
++      ldih(d, 0x4000, R0);
++      ldih(d, 0x4000, d);
++      if (lo_l16 != 0)
++        ldi(d, lo_l16, d);
++    } else {
++      ldih(d, lo_h16, R0);
++      if (lo_l16 != 0)
++        ldi(d, lo_l16, d);
++    }
++  } else {
++    tty->print_cr("value = 0x%lx", value);
++    guarantee(false, "Not supported yet in set64!");
++  }
++}
++
++void MacroAssembler::push(int32_t imm32) {
++  assert(imm32==NULL_WORD, "we don't support imm other than 0");
++  subl(esp, 8, esp);
++  stl(R0, 0, esp);
++}
++
++void MacroAssembler::push(Register src) {
++  subl(esp, 8, esp);
++  stl(src, 0, esp);
++}
++
++void MacroAssembler::pop(Register dst) {
++    if(UseSW8A) {
++        ldl_a(dst, 8 ,esp);
++    } else {
++        ldl(dst, 0, esp);
++        addl(esp, 8, esp);
++    }
++}
++
++void MacroAssembler::push2(Register reg1, Register reg2) {
++  addiu(esp, -16, esp);
++  stl(reg2, 0, esp);
++  stl(reg1, 8, esp);
++}
++
++void MacroAssembler::pusha() {
++    ShouldNotReachHere();
++}
++
++void MacroAssembler::popa() {
++    ShouldNotReachHere();
++}
++
++void MacroAssembler::add(Register Rd, Register Rn, RegisterOrConstant increment) {
++    ShouldNotReachHere();
++}
++
++void MacroAssembler::sub(Register Rd, Register Rn, RegisterOrConstant decrement) {
++    ShouldNotReachHere();
++}
++
++// this simulates the behaviour of the x86 cmpxchg instruction using a
++// load linked/store conditional pair. we use the acquire/release
++// versions of these instructions so that we flush pending writes as
++// per Java semantics.
++
++// n.b the x86 version assumes the old value to be compared against is
++// in rax and updates rax with the value located in memory if the
++// cmpxchg fails. we supply a register for the old value explicitly
++
++// the sw64 load linked/store conditional instructions do not
++// accept an offset. so, unlike x86, we must provide a plain register
++// to identify the memory word to be compared/exchanged rather than a
++// register+offset Address.
++
++void MacroAssembler::cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
++                                Label &succeed, Label *fail) {
++    ShouldNotReachHere();
++}
++
++void MacroAssembler::cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
++                                        Label &succeed, Label *fail) {
++    ShouldNotReachHere();
++}
++
++void MacroAssembler::cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
++                                Label &succeed, Label *fail) {
++    ShouldNotReachHere();
++}
++
++#ifndef PRODUCT
++extern "C" void findpc(intptr_t x);
++#endif
++
++void MacroAssembler::debug(char* msg) {
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++        BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else {
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++    assert(false, "DEBUG MESSAGE: %s", msg);
++  }
++}
++
++void MacroAssembler::debug64(char* msg, int64_t pc, int64_t regs[])
++{
++  //::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++  if ( ShowMessageBoxOnError ) {
++    JavaThreadState saved_state = JavaThread::current()->thread_state();
++    JavaThread::current()->set_thread_state(_thread_in_vm);
++    {
++      // In order to get locks work, we need to fake a in_VM state
++      ttyLocker ttyl;
++      ::tty->print_cr("EXECUTION STOPPED: %s\n", msg);
++      if (CountBytecodes || TraceBytecodes || StopInterpreterAt) {
++        BytecodeCounter::print();
++      }
++
++    }
++    ThreadStateTransition::transition(JavaThread::current(), _thread_in_vm, saved_state);
++  }
++  else {
++    ::tty->print_cr("=============== DEBUG MESSAGE: %s ================\n", msg);
++    assert(false, "DEBUG MESSAGE: %s", msg);
++  }
++}
++
++void MacroAssembler::push_call_clobbered_registers() {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::pop_call_clobbered_registers() {
++  ShouldNotReachHere();
++}
++
++Address MacroAssembler::spill_address(int size, int offset, Register tmp)
++{
++  ShouldNotReachHere();
++  Register base = sp;
++  return Address(base, offset);
++}
++
++// Checks whether offset is aligned.
++// Returns true if it is, else false.
++bool MacroAssembler::merge_alignment_check(Register base,
++                                           size_t size,
++                                           long cur_offset,
++                                           long prev_offset) const {
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Checks whether current and previous loads/stores can be merged.
++// Returns true if it can be merged, else false.
++bool MacroAssembler::ldst_can_merge(Register rt,
++                                    const Address &adr,
++                                    size_t cur_size_in_bytes,
++                                    bool is_store) const {
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Merge current load/store with previous load/store into ldp/stp.
++void MacroAssembler::merge_ldst(Register rt,
++                                const Address &adr,
++                                size_t cur_size_in_bytes,
++                                bool is_store) {
++  ShouldNotReachHere();
++}
++
++/**
++ * Emits code to update CRC-32 with a 32-bit value according to tables 0 to 3
++ *
++ * @param [in,out]crc   Register containing the crc.
++ * @param [in]v         Register containing the 32-bit to fold into the CRC.
++ * @param [in]table0    Register containing table 0 of crc constants.
++ * @param [in]table1    Register containing table 1 of crc constants.
++ * @param [in]table2    Register containing table 2 of crc constants.
++ * @param [in]table3    Register containing table 3 of crc constants.
++ *
++ * uint32_t crc;
++ *   v = crc ^ v
++ *   crc = table3[v&0xff]^table2[(v>>8)&0xff]^table1[(v>>16)&0xff]^table0[v>>24]
++ *
++ */
++void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
++        Register table0, Register table1, Register table2, Register table3,
++        bool upper) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::kernel_crc32_using_crc32(Register crc, Register buf,
++        Register len, Register tmp0, Register tmp1, Register tmp2,
++        Register tmp3) {
++  ShouldNotReachHere();
++}
++
++void MacroAssembler::kernel_crc32c_using_crc32c(Register crc, Register buf,
++        Register len, Register tmp0, Register tmp1, Register tmp2,
++        Register tmp3) {
++  ShouldNotReachHere();
++}
++
++/**
++ * @param crc   register containing existing CRC (32-bit)
++ * @param buf   register pointing to input byte buffer (byte*)
++ * @param len   register containing number of bytes
++ * @param table register that will contain address of CRC table
++ * @param tmp   scratch register
++ */
++void MacroAssembler::kernel_crc32c(Register crc, Register buf, Register len,
++        Register table0, Register table1, Register table2, Register table3,
++        Register tmp, Register tmp2, Register tmp3) {
++  kernel_crc32c_using_crc32c(crc, buf, len, table0, table1, table2, table3);
++}
++
++void MacroAssembler::cmp_klass(Register oop, Register trial_klass, Register tmp) {
++  ShouldNotReachHere();
++}
++
++Address MacroAssembler::allocate_metadata_address(Metadata* obj) {
++  ShouldNotReachHere();
++  int index = oop_recorder()->allocate_metadata_index(obj);
++  RelocationHolder rspec = metadata_Relocation::spec(index);
++  return Address();
++}
++
++Address MacroAssembler::constant_oop_address(jobject obj) {
++  ShouldNotReachHere();
++  int oop_index = oop_recorder()->find_index(obj);
++  return Address();
++}
++
++// Move the address of the polling page into dest.
++void MacroAssembler::get_polling_page(Register dest, address page, relocInfo::relocType rtype) {
++  should_not_reach_here("get_polling_page");
++}
++
++// Move the address of the polling page into r, then read the polling
++// page.
++address MacroAssembler::read_polling_page(Register r, address page, relocInfo::relocType rtype) {
++  get_polling_page(r, page, rtype);
++  return read_polling_page(r, rtype);
++}
++
++// Read the polling page.  The address of the polling page must
++// already be in r.
++address MacroAssembler::read_polling_page(Register r, relocInfo::relocType rtype) {
++  should_not_reach_here("read_polling_page");
++  return 0;
++}
++
++void MacroAssembler::adrp(Register reg1, const Address &dest, unsigned long &byte_offset) {
++  should_not_reach_here("adrp");
++}
++
++void MacroAssembler::load_byte_map_base(Register reg) {
++  should_not_reach_here("load_byte_map_base");
++}
++
++void MacroAssembler::build_frame(int framesize) {
++  should_not_reach_here("build_frame");
++}
++
++void MacroAssembler::remove_frame(int framesize) {
++  should_not_reach_here("remove_frame");
++}
++
++typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
++
++typedef void (MacroAssembler::* chr_insn)(Register Rt, const Address &adr);
++typedef void (MacroAssembler::* uxt_insn)(Register Rd, Register Rn);
++
++// Compare Strings
++
++// For Strings we're passed the address of the first characters in a1
++// and a2 and the length in cnt1.
++// elem_size is the element size in bytes: either 1 or 2.
++// There are two implementations.  For arrays >= 8 bytes, all
++// comparisons (including the final one, which may overlap) are
++// performed 8 bytes at a time.  For strings < 8 bytes, we compare a
++// halfword, then a short, and then a byte.
++
++void MacroAssembler::string_equals(Register a1, Register a2,
++                                   Register result, Register cnt1, int elem_size)
++{
++  should_not_reach_here("string_equals");
++}
++
++
++// The size of the blocks erased by the zero_blocks stub.  We must
++// handle anything smaller than this ourselves in zero_words().
++const int MacroAssembler::zero_words_block_size = 8;
++
++// zero_words() is used by C2 ClearArray patterns.  It is as small as
++// possible, handling small word counts locally and delegating
++// anything larger to the zero_blocks stub.  It is expanded many times
++// in compiled code, so it is important to keep it short.
++
++// ptr:   Address of a buffer to be zeroed.
++// cnt:   Count in HeapWords.
++//
++// ptr, cnt, rscratch1, and rscratch2 are clobbered.
++void MacroAssembler::zero_words(Register ptr, Register cnt)
++{
++  should_not_reach_here("zero_words");
++}
++
++// base:         Address of a buffer to be zeroed, 8 bytes aligned.
++// cnt:          Immediate count in HeapWords.
++#define SmallArraySize (18 * BytesPerLong)
++void MacroAssembler::zero_words(Register base, u_int64_t cnt)
++{
++  should_not_reach_here("zero_words");
++}
++
++////// Zero blocks of memory by using DC ZVA.
++//////
++////// Aligns the base address first sufficently for DC ZVA, then uses
++////// DC ZVA repeatedly for every full block.  cnt is the size to be
++////// zeroed in HeapWords.  Returns the count of words left to be zeroed
++////// in cnt.
++//////
++////// NOTE: This is intended to be used in the zero_blocks() stub.  If
++////// you want to use it elsewhere, note that cnt must be >= 2*zva_length.
++////void MacroAssembler::zero_dcache_blocks(Register base, Register cnt) {
++////  Register tmp = rscratch1;
++////  Register tmp2 = rscratch2;
++////  int zva_length = VM_Version::zva_length();
++////  Label initial_table_end, loop_zva;
++////  Label fini;
++////
++////  // Base must be 16 byte aligned. If not just return and let caller handle it
++////  tst(base, 0x0f);
++////  br(Assembler::NE, fini);
++////  // Align base with ZVA length.
++////  neg(tmp, base);
++////  andr(tmp, tmp, zva_length - 1);
++////
++////  // tmp: the number of bytes to be filled to align the base with ZVA length.
++////  add(base, base, tmp);
++////  sub(cnt, cnt, tmp, Assembler::ASR, 3);
++////  adr(tmp2, initial_table_end);
++////  sub(tmp2, tmp2, tmp, Assembler::LSR, 2);
++////  br(tmp2);
++////
++////  for (int i = -zva_length + 16; i < 0; i += 16)
++////    stp(zr, zr, Address(base, i));
++////  BIND(initial_table_end);
++////
++////  sub(cnt, cnt, zva_length >> 3);
++////  BIND(loop_zva);
++////  dc(Assembler::ZVA, base);
++////  subs(cnt, cnt, zva_length >> 3);
++////  add(base, base, zva_length);
++////  br(Assembler::GE, loop_zva);
++////  add(cnt, cnt, zva_length >> 3); // count not zeroed by DC ZVA
++////  BIND(fini);
++////}
++
++// base:   Address of a buffer to be filled, 8 bytes aligned.
++// cnt:    Count in 8-byte unit.
++// value:  Value to be filled with.
++// base will point to the end of the buffer after filling.
++void MacroAssembler::fill_words(Register base, Register cnt, Register value)
++{
++  should_not_reach_here("fill_words");
++}
++
++void MacroAssembler::safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg) {SCOPEMARK_NAME(safepoint_poll, this);
++  if (SafepointMechanism::uses_thread_local_poll()) {
++    assert(thread_reg == rthread, "should be");
++    testb(Address(thread_reg, Thread::polling_page_offset()), SafepointMechanism::poll_bit(), temp_reg);
++    jcc(Assembler::notZero, slow_path, temp_reg); // handshake bit set implies poll
++  } else {
++    cmpw(ExternalAddress(SafepointSynchronize::address_of_state()),
++        SafepointSynchronize::_not_synchronized, temp_reg);
++    jcc(Assembler::notEqual, slow_path, temp_reg);
++  }
++}
++
++// Just like safepoint_poll, but use an acquiring load for thread-
++// local polling.
++//
++// We need an acquire here to ensure that any subsequent load of the
++// global SafepointSynchronize::_state flag is ordered after this load
++// of the local Thread::_polling page.  We don't want this poll to
++// return false (i.e. not safepointing) and a later poll of the global
++// SafepointSynchronize::_state spuriously to return true.
++//
++// This is to avoid a race when we're in a native->Java transition
++// racing the code which wakes up from a safepoint.
++//
++void MacroAssembler::safepoint_poll_acquire(Label& slow_path) {
++  should_not_reach_here("safepoint_poll_acquire");
++}
++
++void MacroAssembler::far_call(Address entry, CodeBuffer *cbuf, Register tmp) {
++    ShouldNotReachHere();
++}
++
++void MacroAssembler::far_jump(Address entry, CodeBuffer *cbuf, Register tmp) {
++    ShouldNotReachHere();
++}
++
++void MacroAssembler::jr(address entry) {
++  patchable_jump(entry);
++}
++
++void MacroAssembler::jr(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++    case relocInfo::runtime_call_type:
++    case relocInfo::none:
++      jr(entry);
++      break;
++    default:
++      {
++      InstructionMark im(this);
++      relocate(rtype);
++      patchable_jump(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::patchable_jump(address target) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    beq_a(R0, target);
++  } else {
++    if (SafePatch) {
++      if (offset() % 8 == 0) {
++        nop();
++        br(T12, 2);
++        emit_int64((long) target);
++        ldl(T12, 0, T12);
++      } else {
++        br(T12, 2);
++        emit_int64((long) target);
++        ldl(T12, 0, T12);
++        nop();
++      }
++    } else {
++      prepare_patch_li48(T12, (long) target);
++    }
++    jmp(T12);
++  }
++}
++
++void MacroAssembler::call_patch(address entry) {
++// c/c++ code assume T12 is entry point, so we just always move entry to t12
++// maybe there is some more graceful method to handle this. FIXME
++// For more info, see class NativeCall.
++  patchable_call(entry);
++}
++
++void MacroAssembler::call_patch(address entry, relocInfo::relocType rtype) {
++  switch (rtype) {
++//    case relocInfo::runtime_call_type:
++//      patchable_call_setfpec1(entry);
++//      break;
++    case relocInfo::none:
++      call_patch(entry);
++      break;
++    default:
++      {
++      InstructionMark im(this);
++      relocate(rtype);
++      call_patch(entry);
++      }
++      break;
++  }
++}
++
++void MacroAssembler::patchable_call(address target, Label *retAddr, Register tmp) {
++  if (reachable_from_cache(target)) {
++    nop();
++    nop();
++    nop();
++    nop();
++    bsr(RA, (int) (long) target);
++  } else {
++    if (SafePatch) {
++      if (offset() % 8 == 0) {
++        nop();
++        br(T12, 2);
++        emit_int64((long) target);
++        ldl(T12, 0, T12);
++      } else {
++        br(T12, 2);
++        emit_int64((long) target);
++        ldl(T12, 0, T12);
++        nop();
++      }
++    } else {
++      prepare_patch_li48(tmp, (long) target);
++      if (tmp != T12) {
++        movl(T12, tmp);
++      }
++    }
++    Assembler::call(RA, T12, 0);
++    if (retAddr)
++      bind(*retAddr);
++    if (UseSetfpec)
++      setfpec1();
++  }
++}
++
++//void MacroAssembler::patchable_call_setfpec1(address target) {
++//  if (reachable_from_cache(target)) {
++//    nop();
++//    nop();
++//    nop();
++//    nop();
++//    bsr(RA, (int)(long)target);
++//  } else {
++////    movptr(T12, (long)target);
++//    //jalr_setfpec1(T12);
++//    jmp(T12, rscratch1);
++//  }
++//}
++
++// Maybe emit a call via a trampoline.  If the code cache is small
++// trampolines won't be emitted.
++
++address MacroAssembler::trampoline_call(Address entry, CodeBuffer *cbuf) {
++  ShouldNotReachHere();
++  return 0;
++}
++
++
++// Emit a trampoline stub for a call to a target which is too far away.
++//
++// code sequences:
++//
++// call-site:
++//   branch-and-link to <destination> or <trampoline stub>
++//
++// Related trampoline stub for this call site in the stub section:
++//   load the call target from the constant pool
++//   branch (LR still points to the call site above)
++
++address MacroAssembler::emit_trampoline_stub(int insts_call_instruction_offset,
++                                             address dest) {
++  ShouldNotReachHere();
++  return 0;
++}
++
++void MacroAssembler::emit_static_call_stub() {
++  ShouldNotReachHere();
++}
++
++// These two are taken from x86, but they look generally useful
++
++// scans count pointer sized words at [addr] for occurence of value,
++// generic
++void MacroAssembler::repne_scan(Register addr, Register value, Register count,
++                                Register scratch) {
++    ShouldNotReachHere();
++}
++
++// scans count 4 byte words at [addr] for occurence of value,
++// generic
++void MacroAssembler::repne_scanw(Register addr, Register value, Register count,
++                                Register scratch) {
++    ShouldNotReachHere();
++}
+diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64.hpp b/src/hotspot/cpu/sw64/macroAssembler_sw64.hpp
+new file mode 100644
+index 0000000000..5e38150970
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/macroAssembler_sw64.hpp
+@@ -0,0 +1,2166 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_MACROASSEMBLER_SW64_HPP
++#define CPU_SW64_VM_MACROASSEMBLER_SW64_HPP
++
++#include "asm/assembler.hpp"
++#ifdef PRODUCT
++#define SCOPEMARK /* nothing */
++#define SCOPEMARK_NAME(name, masm) /* nothing */
++#else
++#define SCOPEMARK \
++char line[200]; sprintf(line,"%s:%d",__FILE__, __LINE__);\
++ScopeMark scopeMark(_masm, line);
++
++#define SCOPEMARK2 \
++char line[200]; sprintf(line,"%s:%d",__FILE__, __LINE__);\
++ScopeMark scopeMark(this, line);
++
++#define SCOPEMARK_NAME(name, masm) \
++char line[200]; sprintf(line,"%s:%d",__FILE__, __LINE__);\
++ScopeMark scopeMark(masm, line, #name); 
++
++#endif
++
++// MacroAssembler extends Assembler by frequently used macros.
++//
++// Instructions for which a 'better' code sequence exists depending
++// on arguments should also go in here.
++
++class MacroAssembler: public Assembler {
++  friend class LIR_Assembler;
++
++ public:
++  using Assembler::offset;
++
++  // Support for VM calls
++  //
++  // This is the base routine called by the different versions of call_VM_leaf. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  
++  virtual void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments        // the number of arguments to pop after the call
++  );
++  
++  //TODO:refactor use this edition to deal with label
++  virtual void call_VM_leaf_base(
++    address entry_point,               // the entry point
++    int     number_of_arguments,        // the number of arguments to pop after the call
++    Label   *retaddr,
++    Register rscratch = T12      
++  );
++  
++ protected:
++  // This is the base routine called by the different versions of call_VM. The interpreter
++  // may customize this version by overriding it for its purposes (e.g., to save/restore
++  // additional registers when doing a VM call).
++  //
++  // If no java_thread register is specified (noreg) than rthread will be used instead. call_VM_base
++  // returns the register which contains the thread upon return. If a thread register has been
++  // specified, the return value will correspond to that register. If no last_java_sp is specified
++  // (noreg) than rsp will be used instead.
++  virtual void call_VM_base(           // returns the register containing the thread upon return
++    Register oop_result,               // where an oop-result ends up if any; use noreg otherwise
++    Register java_thread,              // the thread if computed before     ; use noreg otherwise
++    Register last_java_sp,             // to set up last_Java_frame in stubs; use noreg otherwise
++    address  entry_point,              // the entry point
++    int      number_of_arguments,      // the number of arguments (w/o thread) to pop after the call
++    bool     check_exceptions          // whether to check for pending exceptions after return
++  );
++
++  void call_VM_helper(Register oop_result, address entry_point, int number_of_arguments, bool check_exceptions = true);
++
++ public:
++  MacroAssembler(CodeBuffer* code) : Assembler(code) {}
++ 
++  // These routines should emit JVMTI PopFrame and ForceEarlyReturn handling code.
++  // The implementation is only non-empty for the InterpreterMacroAssembler,
++  // as only the interpreter handles PopFrame and ForceEarlyReturn requests.
++  virtual void check_and_handle_popframe(Register java_thread);
++  virtual void check_and_handle_earlyret(Register java_thread);
++
++  //jzy
++  Address as_Address(ArrayAddress adr, Register base_reg);
++
++  // Support for NULL-checks
++  //
++  // Generates code that causes a NULL OS exception if the content of reg is NULL.
++  // If the accessed location is M[reg + offset] and the offset is known, provide the
++  // offset. No explicit code generation is needed if the offset is within a certain
++  // range (0 <= offset <= page_size).
++
++  virtual void null_check(Register reg, int offset = -1);
++  static bool needs_explicit_null_check(intptr_t offset);
++
++  // Required platform-specific helpers for Label::patch_instructions.
++  // They _shadow_ the declarations in AbstractAssembler, which are undefined.
++  static int pd_patch_instruction_size(address branch, address target);
++  static void pd_patch_instruction_aarch(address branch, address target) {
++    pd_patch_instruction_size(branch, target);
++  }
++  static address pd_call_destination(address branch) {
++    ShouldNotReachHere();
++    return 0;
++  }
++  int patched_branch(int dest_pos, int inst, int inst_pos);
++  void pd_patch_instruction(address branch, address target) {
++    jint& stub_inst = *(jint*) branch;
++    stub_inst = patched_branch(target - branch, stub_inst, 0);
++  }
++
++#ifndef PRODUCT
++  static void pd_print_patched_instruction(address branch);
++#endif
++  
++  static int patch_oop(address insn_addr, address o);
++  static int patch_narrow_klass(address insn_addr, narrowKlass n);
++
++  //void li64(Register rd, long imm);
++  //prepare target address for patcher(li48)
++  void prepare_patch_li48(Register rd, long imm);
++
++  address emit_trampoline_stub(int insts_call_instruction_offset, address target);
++  void emit_static_call_stub();
++
++  void load_unsigned_byte(Register dst, Address src);
++  void load_unsigned_short(Register dst, Address src);
++
++  void load_signed_byte32(Register rd, Address addr, Register tmp=rcc);
++  void load_signed_byte64(Register rd, Address addr, Register tmp=rcc);
++  void load_signed_short(Register rd, Address addr);
++
++  // Support for sign-extension (hi:lo = extend_sign(lo))
++  void extend_sign(Register hi, Register lo);
++
++  // Load and store values by size and signed-ness
++  void load_sized_value(Register dst, Address src, size_t size_in_bytes, bool is_signed, Register dst2 = noreg);
++  void store_sized_value(Address dst, Register src, size_t size_in_bytes, Register src2 = noreg);
++
++  // Support for inc/dec with optimal instruction selection depending on value
++
++  // x86_64 aliases an unqualified register/address increment and
++  // decrement to call incrementq and decrementq but also supports
++  // explicitly sized calls to incrementq/decrementq or
++  // incrementl/decrementl
++
++  // for sw64 the proper convention would be to use
++  // increment/decrement for 64 bit operatons and
++  // incrementw/decrementw for 32 bit operations. so when porting
++  // x86_64 code we can leave calls to increment/decrement as is,
++  // replace incrementq/decrementq with increment/decrement and
++  // replace incrementl/decrementl with incrementw/decrementw.
++
++  // n.b. increment/decrement calls with an Address destination will
++  // need to use a scratch register to load the value to be
++  // incremented. increment/decrement calls which add or subtract a
++  // constant value greater than 2^12 will need to use a 2nd scratch
++  // register to hold the constant. so, a register increment/decrement
++  // may trash rscratch2 and an address increment/decrement trash
++  // rscratch and rscratch2
++
++  void decrement(Register reg, int value = 1){decrementl(reg, value);}
++  void increment(Register reg, int value = 1){incrementl(reg, value);}
++
++  void decrementw(ExternalAddress dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2);
++  void decrementw(Address dst, int value = 1, Register tmp = rcc);
++  void decrementw(Register reg, int value = 1);
++
++  void decrementl(ExternalAddress dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2);
++  void decrementl(Address dst, int value = 1, Register tmp = rcc);
++  void decrementl(Register reg, int value = 1);
++
++  void incrementw(AddressLiteral dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2);
++  void incrementw(Address dst, int value = 1, Register tmp_not_rcc=rscratch1);
++  void incrementw(Register reg, int value = 1);
++
++  void incrementl(ExternalAddress dst, int value = 1, Register tmp1 = rscratch1, Register tmp2 = rscratch2);
++  void incrementl(Address dst, int value = 1, Register tmp = rcc);
++  void incrementl(Register reg, int value = 1);
++
++
++  // Alignment
++  void align(int modulus);
++
++  // Stack frame creation/removal
++  void enter();
++  void leave();
++
++  // Support for getting the JavaThread pointer (i.e.; a reference to thread-local information)
++  // The pointer will be loaded into the thread register.
++  void get_thread(Register thread);
++
++
++  // Support for VM calls
++  //
++  // It is imperative that all calls into the VM are handled via the call_VM macros.
++  // They make sure that the stack linkage is setup correctly. call_VM's correspond
++  // to ENTRY/ENTRY_X entry points while call_VM_leaf's correspond to LEAF entry points.
++
++
++  void call_VM(Register oop_result,
++               address entry_point,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  // Overloadings with last_Java_sp
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               int number_of_arguments = 0,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, bool
++               check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2,
++               bool check_exceptions = true);
++  void call_VM(Register oop_result,
++               Register last_java_sp,
++               address entry_point,
++               Register arg_1, Register arg_2, Register arg_3,
++               bool check_exceptions = true);
++
++  void get_vm_result  (Register oop_result, Register thread);
++  void get_vm_result_2(Register metadata_result, Register thread);
++
++//  // These always tightly bind to MacroAssembler::call_VM_base
++//  // bypassing the virtual implementation
++//  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, int number_of_arguments = 0, bool check_exceptions = true);
++//  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, bool check_exceptions = true);
++//  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, bool check_exceptions = true);
++//  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, bool check_exceptions = true);
++//  void super_call_VM(Register oop_result, Register last_java_sp, address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4, bool check_exceptions = true);
++
++  void call_VM_leaf0(address entry_point);
++  void call_VM_leaf(address entry_point,
++                    int number_of_arguments = 0);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2);
++  void call_VM_leaf(address entry_point,
++                    Register arg_1, Register arg_2, Register arg_3);
++
++  // These always tightly bind to MacroAssembler::call_VM_leaf_base
++  // bypassing the virtual implementation
++  void super_call_VM_leaf(address entry_point);
++  void super_call_VM_leaf(address entry_point, Register arg_1);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3);
++  void super_call_VM_leaf(address entry_point, Register arg_1, Register arg_2, Register arg_3, Register arg_4);
++
++  // last Java Frame (fills frame anchor)
++//  void set_last_Java_frame(Register thread,
++//                           Register last_java_sp,
++//                           Register last_java_fp,
++//                           address last_java_pc);
++  
++  // thread in the default location
++//  void set_last_Java_frame(Register last_java_sp,
++//                           Register last_java_fp,
++//                           address last_java_pc);
++  
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           address last_java_pc,
++                           Register scratch);
++
++  void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           Label &last_java_pc,
++                           Register scratch, Register scratch2=rscratch2_AT);
++
++  /*void set_last_Java_frame(Register last_java_sp,
++                           Register last_java_fp,
++                           Register last_java_pc,
++                           Register scratch);*/
++
++  void reset_last_Java_frame(Register thread, bool clear_fp);
++  
++////  void reset_last_Java_frame(Register thread);
++
++  // thread in the default location (rthread)
++  void reset_last_Java_frame(bool clear_fp);
++
++  // Stores
++////  void store_check(Register obj);                // store check for obj - register is destroyed afterwards
++////  void store_check(Register obj, Address dst);   // same as above, dst is exact store location (reg. is destroyed)
++
++  void resolve_jobject(Register value, Register thread, Register tmp);
++
++  // C 'boolean' to Java boolean: x == 0 ? 0 : 1
++  void c2bool(Register x);
++
++  // oop manipulations
++  void load_klass(Register dst, Register src);
++  void store_klass(Register dst, Register src);
++
++  void access_load_at(BasicType type, DecoratorSet decorators, Register dst, Address src,
++                      Register tmp1, Register tmp_thread);
++
++  void access_store_at(BasicType type, DecoratorSet decorators, Address dst, Register src,
++                       Register tmp1, Register tmp_thread);
++
++  void load_heap_oop(Register dst, Address src, Register tmp1 = noreg,
++                     Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void load_heap_oop_not_null(Register dst, Address src, Register tmp1 = noreg,
++                              Register thread_tmp = noreg, DecoratorSet decorators = 0);
++  void store_heap_oop(Address dst, Register src, Register tmp1 = noreg,
++                      Register tmp_thread = noreg, DecoratorSet decorators = 0);
++
++  // currently unimplemented
++  // Used for storing NULL. All other oop constants should be
++  // stored using routines that take a jobject.
++  void store_heap_oop_null(Address dst);
++
++  void load_prototype_header(Register dst, Register src);
++
++  void store_klass_gap(Register dst, Register src);
++
++  // This dummy is to prevent a call to store_heap_oop from
++  // converting a zero (like NULL) into a Register by giving
++  // the compiler two choices it can't resolve
++
++////  void store_heap_oop(Address dst, void* dummy);
++
++  void encode_heap_oop(Register dst, Register src);
++  void encode_heap_oop(Register r) { encode_heap_oop(r, r); }
++  void decode_heap_oop(Register dst, Register src);
++  void decode_heap_oop(Register r) { decode_heap_oop(r, r); }
++  void encode_heap_oop_not_null(Register r);
++  void decode_heap_oop_not_null(Register r);
++  void encode_heap_oop_not_null(Register dst, Register src);
++  void decode_heap_oop_not_null(Register dst, Register src);
++
++  void emit_data(RelocationHolder const& rspec, int format);
++  void mov_narrow_oop(Register dst, int32_t imm32, RelocationHolder const& rspec);
++
++  void set_narrow_oop(Register dst, jobject obj);
++  void set_narrow_oop(Address dst, jobject obj);
++  void cmp_narrow_oop(Register dst, jobject obj, Register ccReg=rcc);
++  void cmp_narrow_oop(Address dst, jobject obj, Register ccReg=rcc);
++
++  void encode_klass_not_null(Register r);
++  void decode_klass_not_null(Register r);
++  void encode_klass_not_null(Register dst, Register src);
++  void decode_klass_not_null(Register dst, Register src);
++  void set_narrow_klass(Register dst, Klass* k);
++  void set_narrow_klass(Address dst, Klass* k);
++  void cmp_narrow_klass(Register dst, Klass* k, Register ccReg=rcc);
++  void cmp_narrow_klass(Address dst, Klass* k, Register ccReg=rcc);
++
++  // if heap base register is used - reinit it with the correct value
++  void reinit_heapbase();
++
++  DEBUG_ONLY(void verify_heapbase(const char* msg);)
++
++  void push_CPU_state(bool save_vectors = false);
++  void pop_CPU_state(bool restore_vectors = false) ;
++
++  // Round up to a power of two
++  void round_to(Register reg, int modulus);
++
++  // allocation
++  void eden_allocate(
++    Register thread,                   // Current thread
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void tlab_allocate(
++    Register thread,                   // Current thread
++    Register obj,                      // result: pointer to object after successful allocation
++    Register var_size_in_bytes,        // object size in bytes if unknown at compile time; invalid otherwise
++    int      con_size_in_bytes,        // object size in bytes if   known at compile time
++    Register t1,                       // temp register
++    Register t2,                       // temp register
++    Label&   slow_case                 // continuation point if fast allocation fails
++  );
++  void zero_memory(Register addr, Register len, Register t1);
++
++  // interface method calling
++  void lookup_interface_method(Register recv_klass,
++                               Register intf_klass,
++                               RegisterOrConstant itable_index,
++                               Register method_result,
++                               Register scan_temp,
++                               Label& no_such_interface,
++                               bool return_method = true);
++
++  // virtual method calling
++  // n.b. x86 allows RegisterOrConstant for vtable_index
++  void lookup_virtual_method(Register recv_klass,
++                             RegisterOrConstant vtable_index,
++                             Register method_result);
++
++  // Test sub_klass against super_klass, with fast and slow paths.
++
++  // The fast path produces a tri-state answer: yes / no / maybe-slow.
++  // One of the three labels can be NULL, meaning take the fall-through.
++  // If super_check_offset is -1, the value is loaded up from super_klass.
++  // No registers are killed, except temp_reg.
++  void check_klass_subtype_fast_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     Label* L_slow_path,
++                RegisterOrConstant super_check_offset = RegisterOrConstant(-1));
++
++  // The rest of the type check; must be wired to a corresponding fast path.
++  // It does not repeat the fast path logic, so don't use it standalone.
++  // The temp_reg and temp2_reg can be noreg, if no temps are available.
++  // Updates the sub's secondary super cache as necessary.
++  // If set_cond_codes, condition codes will be Z on success, NZ on failure.
++  void check_klass_subtype_slow_path(Register sub_klass,
++                                     Register super_klass,
++                                     Register temp_reg,
++                                     Register temp2_reg,
++                                     Label* L_success,
++                                     Label* L_failure,
++                                     bool set_cond_codes = false);
++
++  // Simplified, combined version, good for typical uses.
++  // Falls through on failure.
++  void check_klass_subtype(Register sub_klass,
++                           Register super_klass,
++                           Register temp_reg,
++                           Label& L_success);
++
++  Address argument_address(RegisterOrConstant arg_slot, int extra_slot_offset = 0);
++
++  
++  // Debugging
++
++  // only if +VerifyOops
++  void verify_oop(Register reg, const char* s = "broken oop");
++  void verify_oop_addr(Address addr, const char * s = "broken oop addr");
++
++// TODO: verify method and klass metadata (compare against vptr?)
++  void _verify_method_ptr(Register reg, const char * msg, const char * file, int line) {}
++  void _verify_klass_ptr(Register reg, const char * msg, const char * file, int line){}
++
++#define verify_method_ptr(reg) _verify_method_ptr(reg, "broken method " #reg, __FILE__, __LINE__)
++#define verify_klass_ptr(reg) _verify_klass_ptr(reg, "broken klass " #reg, __FILE__, __LINE__)
++
++  // only if +VerifyFPU
++  void verify_FPU(int stack_depth, const char* s = "illegal FPU state");
++
++  // prints msg, dumps registers and stops execution
++  void stop(const char* msg);
++  //use for sw debug, need to refactor, like int3 in x86 platform jzy
++  void debug_stop(const char* msg);
++  
++  void int3() {
++    call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
++  }  
++//  // prints msg and continues
++  void warn(const char* msg);
++
++  static void debug(char* msg);
++  static void debug64(char* msg, int64_t pc, int64_t regs[]);
++
++//  void untested()                                { stop("untested"); }
++//
++  void unimplemented(const char* what = "");
++
++  void should_not_reach_here(const char* what="should_not_reach_here")                   { stop(what); }
++
++  // Stack overflow checking
++  void bang_stack_with_offset(int offset) {
++    // stack grows down, caller passes positive offset
++    assert(offset > 0, "must bang with negative offset");
++    if (offset <= 32768) {
++      stw(R0, -offset, esp);
++    } else {
++      mov_immediate64(rscratch2, offset);
++      subl(esp, rscratch2, rscratch2);
++      stw(R0, 0, rscratch2);
++    }
++  }
++
++  // Writes to stack successive pages until offset reached to check for
++  // stack overflow + shadow pages.  Also, clobbers tmp
++  void bang_stack_size(Register size, Register tmp);
++
++  // Check for reserved stack access in method being exited (for JIT)
++  void reserved_stack_check();
++
++  virtual RegisterOrConstant delayed_value_impl(intptr_t* delayed_value_addr,
++                                                Register tmp,
++                                                int offset);
++
++  // Support for serializing memory accesses between threads
++  void serialize_memory(Register thread, Register tmp);
++
++  void safepoint_poll(Label& slow_path, Register thread_reg, Register temp_reg);
++  void safepoint_poll_acquire(Label& slow_path);
++ 
++  void verify_tlab();
++
++  // Biased locking support
++  // lock_reg and obj_reg must be loaded up with the appropriate values.
++  // swap_reg is killed.
++  // tmp_reg must be supplied and must not be rscratch1 or rscratch2
++  // Optional slow case is for implementations (interpreter and C1) which branch to
++  // slow case directly. Leaves condition codes set for C2's Fast_Lock node.
++  // Returns offset of first potentially-faulting instruction for null
++  // check info (currently consumed only by C1). If
++  // swap_reg_contains_mark is true then returns -1 as it is assumed
++  // the calling code has already passed any potential faults.
++  int biased_locking_enter(Register lock_reg, Register obj_reg,
++                           Register swap_reg, Register tmp_reg,
++                           bool swap_reg_contains_mark,
++                           Label& done, Label* slow_case = NULL,
++                           BiasedLockingCounters* counters = NULL);
++  void biased_locking_exit (Register obj_reg, Register temp_reg, Label& done);
++  #ifdef COMPILER2
++  void atomic_incw(AddressLiteral counter_addr, int inc, Register tmp_reg1);
++  // Code used by cmpFastLock and cmpFastUnlock mach instructions in .ad file.
++  // See full desription in macroAssembler_x86.cpp.
++  void fast_lock(Register obj, Register box, Register tmp,
++                 Register scr, Register cx1, Register cx2,
++                 BiasedLockingCounters* counters,
++                 Metadata* method_data,
++                 bool use_rtm, bool profile_rtm);
++  void fast_unlock(Register obj, Register box, Register tmp, bool use_rtm);
++#endif
++    // Fill primitive arrays
++  void generate_fill(BasicType t, bool aligned,
++                     Register to, Register value, Register count,
++                     Register rtmp);
++
++// gpr load store instructions
++  
++#define LDINSNLIST(FUNC) \
++  FUNC(ldbu)\
++  FUNC(ldhu)\
++  FUNC(ldw)\
++  FUNC(ldl)\
++  FUNC(ldl_u)\
++  FUNC(ldi)
++
++#define LDFROMADDR_DEC(LDX) \
++  using Assembler::LDX; \
++  void LDX(Register ra, Address addr);
++  
++  LDINSNLIST(LDFROMADDR_DEC)  
++  
++#undef LDFROMADDR_DEC
++
++#define STINSNLIST(FUNC)  \
++  FUNC(stb)\
++  FUNC(sth)\
++  FUNC(stw)\
++  FUNC(stl)\
++  FUNC(stl_u)
++    
++#define ST2ADDR_DEC(STX) \
++  using Assembler::STX; \
++  void STX(Register ra, Address addr, Register tmp=rcc);
++  
++  STINSNLIST(ST2ADDR_DEC)
++  
++#undef ST2ADDR_DEC
++    
++  void stw(int, Address, Register tmp = rcc);
++  void stptr(Register rd, Address addr, Register tmp=rcc);
++  void ldhu_unaligned(Register rd, Address addr, Register tmp=rcc);
++  void ldhu_unaligned_be(Register rd, Address addr, Register tmp=rcc);
++  void ldwu(Register rd, Address addr);
++  void ldws(Register rd, Address addr);
++  void ldwu(Register rd, AddressLiteral addr);
++  void ldws(Register rd, AddressLiteral addr);
++  void ldptr(Register rd, Address addr, Register tmp=rcc);
++  void ldptr(Register rd, AddressLiteral addr);  
++
++  
++// float register load store instructions
++  
++#define FLOATINSNLIST(FUNC) \
++  FUNC(flds)\
++  FUNC(fldd)\
++  FUNC(fsts)\
++  FUNC(fstd)
++  
++#define ADDR_DEC(FLOATINSN) \
++  using Assembler::FLOATINSN; \
++  void FLOATINSN(FloatRegister ra, Address addr, Register tmp=rcc);
++  
++  FLOATINSNLIST(ADDR_DEC)
++  
++#undef ADDR_DEC
++
++  void load_float(FloatRegister ra, Address src, Register tmp=rcc);
++  void load_float(FloatRegister rd, AddressLiteral addr, Register tmp=rcc);
++  void load_double(FloatRegister ra, Address src, Register tmp=rcc);
++  void load_double(FloatRegister rd, AddressLiteral addr, Register tmp=rcc);
++  void store_float(FloatRegister ra, Address src, Register tmp=rcc);
++  void store_double(FloatRegister ra, Address src, Register tmp=rcc);
++  
++  void lea(Register rd, Address src);
++  void lea(Register rd, AddressLiteral addr);
++  void lea(Address dst, AddressLiteral adr, Register tmp_not_rcc);
++
++// arithmathic instrunctions  
++  
++/**
++ * x86
++ *  Assembler::andl/orl/xorl(Register dst, int32_t imm32)
++ * sw64
++ *  MacroAssembler::andw/orw/xorw(Register lh, int rh, Register res, Register scratch=rcc)
++ * note
++ *  we will clear the msb32 of res, so the msb32 of lh is no matter.
++ */
++#define LOGICINSNLIST(FUNC)  \
++  FUNC(andw, and_ins)\
++  FUNC(orw, bis)\
++  FUNC(xorw, xor_ins) 
++  
++#define ARITHINSNLIST(FUNC)  \
++  FUNC(addwu, addw)\
++  FUNC(subwu, subw)\
++  FUNC(mulwu, mulw)
++        
++    /* I introduce scratch reg in NAMEw since it's possible that lh and res could be the same reg  */
++#define EXPAND_W(NAME, INSN) \
++  void NAME(Register lh, int rh, Register res, Register scratch=rcc){\
++    assert_different_registers(lh, scratch);\
++    if (rh >=0 && rh < (1<<8)) {\
++      INSN(lh, rh, res);\
++    }\
++    else if (rh >=0 && rh < (1<<15)) {\
++      ldi(scratch, rh, R0);\
++      INSN(lh, scratch, res);\
++    } else {\
++      mov_immediate32(scratch, rh);\
++      INSN(lh, scratch, res);\
++    }  \
++  }\
++  void NAME(Register lh, Register rh, Register res){INSN(lh, rh, res); zapnot(res, 0xf, res); }
++  
++  LOGICINSNLIST(EXPAND_W)
++  ARITHINSNLIST(EXPAND_W)
++
++#undef EXPAND_W
++
++#undef LOGICINSNLIST
++#undef ARITHINSNLIST
++
++        
++#define LOGICINSNLIST(FUNC)  \
++  FUNC(andptr, and_ins)\
++  FUNC(orptr, bis)\
++  FUNC(xorptr, xor_ins) 
++        
++#define ARITHINSNLIST(FUNC)  \
++  FUNC(addptr, addl)\
++  FUNC(subptr, subl)
++
++    /* I introduce scratch reg in NAMEptr since it's possible that lh and res could be the same reg  */
++#define EXPAND_PTR(NAME, INSN) \
++  void NAME(Register lh, long rh, Register res, Register scratch=rcc){\
++    assert_different_registers(lh, scratch);\
++    if (rh >=0 && rh < (1<<8))\
++      INSN(lh, rh, res);\
++    else if (rh >=0 && rh < (1<<15)) {\
++      ldi(scratch, rh, R0);\
++      INSN(lh, scratch, res);\
++    } else {\
++      mov_immediate64(scratch, rh);\
++      INSN(lh, scratch, res);\
++    }\
++  }\
++  void NAME(Register lh, Register rh, Register res){INSN(lh, rh, res);}
++  
++  LOGICINSNLIST(EXPAND_PTR)
++  ARITHINSNLIST(EXPAND_PTR)
++
++#undef EXPAND_PTR
++  
++  void notl  (Register res) { ornot(R0, res, res); }
++  void notptr(Register res) { notl(res); }
++  void addptr(Register rd, Address addr);
++  void notw(Register rd, Register rs);
++  void negptr(Register rs) { subl(R0, rs, rs); }
++  
++// compare instructions
++  
++  void cmpoop(Register lh, Register rh, Register ccReg=rcc);
++  
++  void cmpb(Register lh, int rh, Register ccReg=rcc);
++  void cmpb(Address addr, int imm8, Register ccReg=rcc);
++  void cmpb(AddressLiteral src1, int imm8, Register ccReg=rcc);
++  
++  void cmph(Address addr, int imm16, Register ccReg=rcc);
++  
++  void cmpw(Register lh, int rh, Register ccReg=rcc);
++  void cmpw(Register lh, Register rh, Register ccReg=rcc);
++  void cmpw(Register lh, Address rh, Register ccReg=rcc);
++  void cmpw(Address lh, Register rh, Register ccReg=rcc);
++  void cmpw(Address lh, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1);
++  void cmpw(AddressLiteral src1, int32_t imm, Register ccReg=rcc, Register tmp=rscratch1);
++  void cmpw(AddressLiteral src1, Register rh, Register ccReg=rcc);
++  void cmpwu(Register lh, Address rh, Register ccReg=rcc);
++  void cmpws(int cc, Register op1, Register op2, Register ccReg=rcc);
++  void cmpls(int cc, Register op1, Register op2, Register ccReg=rcc);
++  void cmpwu(int cc, Register op1, Register op2, Register ccReg=rcc);
++  void cmplu(int cc, Register op1, Register op2, Register ccReg=rcc);
++  void cmpfs(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg=FcmpRES, bool is_order = false);
++  void cmpfd(int cc, FloatRegister op1, FloatRegister op2, FloatRegister ccReg=FcmpRES, bool is_order = false);
++  void cmpfcc(int cc, FloatRegister op1, FloatRegister op2);
++  void cmpdcc(int cc, FloatRegister op1, FloatRegister op2);
++
++  void cmpl(Register lh, int rh, Register ccReg=rcc);
++  void cmpl(Register lh, Register rh, Register ccReg=rcc);
++  void cmpl_raw(Register lh, Register rh, Register ccReg=rcc);
++  void cmpq(Register lh, Register rh, Register ccReg=rcc);
++  void cmpUL(Register lh, Register rh, Register ccReg);
++  
++  address cmp_insn_mark = NULL;
++  Register cmp_lh ;
++  Register cmp_rh;
++  bool cmp_long;
++  void set_cmp_insn_mark(Register lh, Register rh, bool lcmp=false);
++  void clear_cmp_insn_mark();
++  bool cmp_insn_marked();
++  void jccb(Condition cc, Label& L);
++
++  void cmpptr(Register lh, int rh, Register ccReg=rcc);
++  void cmpptr(Register lh, Register rh, Register ccReg=rcc);
++  void cmpptr(Register lh, Address rh, Register ccReg=rcc);
++  void cmpptr(Address lh, Register rh, Register ccReg=rcc);
++  void cmpptr(Address lh, int32_t  rh,  Register ccReg=rcc);
++  void cmpptr(Register lh, AddressLiteral rh, Register ccReg=rcc);
++  
++  void cmpxchgptr(Register xreg, AddressLiteral adr, Register creg, Register tmp);
++
++  void jump(AddressLiteral addr, Register tmp=AT);  //scw tmp=T12
++  void jump(RuntimeAddress addr, Register tmp=AT);  //scw tmp=T12
++  void jump(ArrayAddress entry, Register tmp1, Register tmp2);
++  void jump_cc(Condition cc, AddressLiteral dst, Register ccReg=rcc, Register tmp=rscratch1);
++  
++  void call(Register entry, Label *retAddr = NULL);
++  void call(Register entry, address& retAddr);
++  void call(AddressLiteral addr, Label *retAddr = NULL, Register tmp=T12);  //scw tmp=T12
++  void call(RuntimeAddress addr, Label *retAddr = NULL, Register tmp=T12);  //scw tmp=T12
++
++  void jmp(Label& lbl);
++  void jmp(Address rd, Register tmp=AT);
++  void jmp(Register rd, Register tmp=AT);
++  
++  void jcc(Condition cc, Label& L, Register ccReg=rcc, ConditionLength cl = bitl);
++//  void jccb(Condition cc, Label& L, Register ccReg=rcc);
++
++  // Helper functions for statistics gathering.
++  // Unconditional atomic increment.
++  void atomic_incw(Register counter_addr, Register tmp, Register tmp2);
++
++  void testb(Address addr, int imm8, Register ccReg=rcc);
++  void testb(Register lh, int rh, Register res=rcc);
++  void testb(Register lh, Register rh, Register res=rcc);
++  void testw(Register lh, int rh, Register res=rcc, Register scratch=rcc);
++  void testw(Register lh, Register rh, Register res=rcc);
++  void testl(Register lh, long rh, Register res=rcc, Register scratch=rcc);
++  void testl(Register lh, Register rh, Register ccReg=rcc);  
++  void testptr(Register lh, long rh, Register res=rcc, Register scratch=rcc);
++  void testptr(Register lh, Register rh, Register ccReg=rcc);
++
++  void inline fmovs(FloatRegister dst, FloatRegister src) {
++    fcpys(src, src, dst);
++  }
++  void inline fmovd(FloatRegister dst, FloatRegister src) {
++    fcpys(src, src, dst);
++  }
++
++  // swap the two byte of the low 16-bit halfword
++  // this directive will use AT, be sure the high 16-bit of reg is zero
++  void hswap(Register reg);
++  void huswap(Register reg);
++  
++  // convert big endian integer to little endian integer
++  void swap(Register reg);
++  void bswapw(Register reg);
++
++  /**
++   * if c_reg == *dest then *dest <= x_reg,
++   * else c_reg <= *dest.
++   * The AT indicate if xchg occurred, 1 for xchged, else  0
++   * @param x_reg
++   * @param dest
++   * @param c_reg
++   */
++  void cmpxchg(Register x_reg, Address dest, Register c_reg);
++  void cmpxchg32(Register x_reg, Address dest, Register c_reg);
++  void fill_to_size(address start, int size);
++  
++  /**
++   * if oldval == *dest then *dest <= newval
++   * @param oldval
++   * @param dest
++   * @param newval
++   */
++  void storeLcon(Register oldval, Address dest, Register newval);
++  void storeIcon(Register oldval, Address dest, Register newval);
++  void boundary_test(FloatRegister ft, Register res);
++  
++  // test if x is within signed immediate range for nbits
++  static bool is_uimm(intptr_t x, int nbits) { return intptr_t(0) <= x   &&   x  <  ( intptr_t(1) << nbits ); }
++  // test if 0 <= x <= 255
++  static bool is_uimm8(intptr_t x)            { return is_uimm(x, 8); }
++
++  // Various forms of CAS
++
++  void cmpxchg_obj_header(Register oldv, Register newv, Register obj, Register tmp,
++                          Label &suceed, Label *fail);
++  void cmpxchgptr(Register oldv, Register newv, Register addr, Register tmp,
++                  Label &suceed, Label *fail);
++
++  void cmpxchgw(Register oldv, Register newv, Register addr, Register tmp,
++                  Label &suceed, Label *fail);
++
++  void atomic_add(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addw(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addal(Register prev, RegisterOrConstant incr, Register addr);
++  void atomic_addalw(Register prev, RegisterOrConstant incr, Register addr);
++
++  void atomic_xchg(Register prev, Register newv, Register addr);
++  void atomic_xchgw(Register prev, Register newv, Register addr);
++  void atomic_xchgal(Register prev, Register newv, Register addr);
++  void atomic_xchgalw(Register prev, Register newv, Register addr);
++
++public:
++  // Calls
++
++  address trampoline_call(Address entry, CodeBuffer *cbuf = NULL);
++
++  static bool far_branches() {
++    ShouldNotReachHere();
++    return 0;
++  }
++
++  // Jumps that can reach anywhere in the code cache.
++  // Trashes tmp.
++  void far_call(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
++  void far_jump(Address entry, CodeBuffer *cbuf = NULL, Register tmp = rscratch1);
++
++  static int far_branch_size() {
++    ShouldNotReachHere();
++    return 0;
++  }
++
++  // Emit the CompiledIC call idiom
++  void ic_call(address entry, jint method_index = 0);
++
++public:
++
++  // Data
++
++  // dst = src1 if rcc match cc, else dst = src2
++  void cmove(Condition cc, Register dst, Register src1, Register src2, Register ccReg=rcc);
++
++  void mov_metadata(Register dst, Metadata* obj);
++  Address allocate_metadata_address(Metadata* obj);
++  Address constant_oop_address(jobject obj);
++
++  void movoop(Register dst, jobject obj, bool immediate = false);
++
++  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
++  void kernel_crc32(Register crc, Register buf, Register len,
++        Register table0, Register table1, Register table2, Register table3,
++        Register tmp, Register tmp2, Register tmp3);
++  // CRC32 code for java.util.zip.CRC32C::updateBytes() instrinsic.
++  void kernel_crc32c(Register crc, Register buf, Register len,
++        Register table0, Register table1, Register table2, Register table3,
++        Register tmp, Register tmp2, Register tmp3);
++
++  // Stack push and pop individual 64 bit registers
++  void push(Register src);
++  void push(int32_t imm32);
++  void pop(Register dst);
++  void push2(Register reg1, Register reg2);
++  void push (FloatRegister reg) { subl(esp, 8, esp);  fstd(reg, 0, esp); }
++  void pop  (FloatRegister reg) { if(UseSW8A) {fldd_a(reg, 8, esp);} else {fldd(reg, 0, esp);  addl(esp, 8, esp); }}
++
++  // push all registers onto the stack
++  void pusha();
++  void popa();
++  
++  void pushptr(Address src, Register tmp = rcc) { ldptr(rcc, src); push(rcc);}
++  void repne_scan(Register addr, Register value, Register count,
++                  Register scratch);
++  void repne_scanw(Register addr, Register value, Register count,
++                   Register scratch);
++
++  void add(Register Rd, Register Rn, RegisterOrConstant increment);
++  void sub(Register Rd, Register Rn, RegisterOrConstant decrement);
++
++  void adrp(Register reg1, const Address &dest, unsigned long &byte_offset);
++
++  void tableswitch(Register index, jint lowbound, jint highbound,
++                   Label &jumptable, Label &jumptable_end, int stride = 1) {
++    ShouldNotReachHere();
++  }
++
++  // Form an address from base + offset in Rd.  Rd may or may not
++  // actually be used: you must use the Address that is returned.  It
++  // is up to you to ensure that the shift provided matches the size
++  // of your data.
++  Address form_address(Register Rd, Register base, long byte_offset, int shift);
++
++  // Return true iff an address is within the 48-bit Sw64 address
++  // space.
++  bool is_valid_Sw64_address(address a) {
++    return ((uint64_t)a >> 48) == 0;
++  }
++
++  // Load the base of the cardtable byte map into reg.
++  void load_byte_map_base(Register reg);
++
++  // Prolog generator routines to support switch between x86 code and
++  // generated ARM code
++
++  // routine to generate an x86 prolog for a stub function which
++  // bootstraps into the generated ARM code which directly follows the
++  // stub
++  //
++
++public:
++
++  address read_polling_page(Register r, address page, relocInfo::relocType rtype);
++  address read_polling_page(Register r, relocInfo::relocType rtype);
++  void get_polling_page(Register dest, address page, relocInfo::relocType rtype);
++
++  // CRC32 code for java.util.zip.CRC32::updateBytes() instrinsic.
++  void update_byte_crc32(Register crc, Register val, Register table);
++  void update_word_crc32(Register crc, Register v, Register tmp,
++        Register table0, Register table1, Register table2, Register table3,
++        bool upper = false);
++
++  void string_compare(Register str1, Register str2,
++                      Register cnt1, Register cnt2, Register result,
++                      Register tmp1, Register tmp2, FloatRegister vtmp1,
++                      FloatRegister vtmp2, FloatRegister vtmp3, int ae);
++
++  void has_negatives(Register ary1, Register len, Register result);
++
++  void arrays_equals(Register a1, Register a2, Register result, Register cnt1,
++                     Register tmp1, Register tmp2, Register tmp3, int elem_size);
++
++  void string_equals(Register a1, Register a2, Register result, Register cnt1,
++                     int elem_size);
++
++  void fill_words(Register base, Register cnt, Register value);
++  void zero_words(Register base, u_int64_t cnt);
++  void zero_words(Register ptr, Register cnt);
++////  void zero_dcache_blocks(Register base, Register cnt);
++
++  static const int zero_words_block_size;
++
++  void byte_array_inflate(Register src, Register dst, Register len,
++                          FloatRegister vtmp1, FloatRegister vtmp2,
++                          FloatRegister vtmp3, Register tmp4);
++
++  void char_array_compress(Register src, Register dst, Register len,
++                           FloatRegister tmp1Reg, FloatRegister tmp2Reg,
++                           FloatRegister tmp3Reg, FloatRegister tmp4Reg,
++                           Register result);
++
++  void encode_iso_array(Register src, Register dst,
++                        Register len, Register result,
++                        FloatRegister Vtmp1, FloatRegister Vtmp2,
++                        FloatRegister Vtmp3, FloatRegister Vtmp4);
++  void string_indexof(Register str1, Register str2,
++                      Register cnt1, Register cnt2,
++                      Register tmp1, Register tmp2,
++                      Register tmp3, Register tmp4,
++                      Register tmp5, Register tmp6,
++                      int int_cnt1, Register result, int ae);
++  void string_indexof_char(Register str1, Register cnt1,
++                           Register ch, Register result,
++                           Register tmp1, Register tmp2, Register tmp3);
++  void fast_log(FloatRegister vtmp0, FloatRegister vtmp1, FloatRegister vtmp2,
++                FloatRegister vtmp3, FloatRegister vtmp4, FloatRegister vtmp5,
++                FloatRegister tmpC1, FloatRegister tmpC2, FloatRegister tmpC3,
++                FloatRegister tmpC4, Register tmp1, Register tmp2,
++                Register tmp3, Register tmp4, Register tmp5);
++  void generate_dsin_dcos(bool isCos, address npio2_hw, address two_over_pi,
++      address pio2, address dsin_coef, address dcos_coef);
++ private:
++  // begin trigonometric functions support block
++  void generate__ieee754_rem_pio2(address npio2_hw, address two_over_pi, address pio2);
++  void generate__kernel_rem_pio2(address two_over_pi, address pio2);
++  void generate_kernel_sin(FloatRegister x, bool iyIsOne, address dsin_coef);
++  void generate_kernel_cos(FloatRegister x, address dcos_coef);
++  // end trigonometric functions support block
++  void add2_with_carry(Register final_dest_hi, Register dest_hi, Register dest_lo,
++                       Register src1, Register src2);
++  void add2_with_carry(Register dest_hi, Register dest_lo, Register src1, Register src2) {
++    add2_with_carry(dest_hi, dest_hi, dest_lo, src1, src2);
++  }
++  void multiply_64_x_64_loop(Register x, Register xstart, Register x_xstart,
++                             Register y, Register y_idx, Register z,
++                             Register carry, Register product,
++                             Register idx, Register kdx);
++  void multiply_128_x_128_loop(Register y, Register z,
++                               Register carry, Register carry2,
++                               Register idx, Register jdx,
++                               Register yz_idx1, Register yz_idx2,
++                               Register tmp, Register tmp3, Register tmp4,
++                               Register tmp7, Register product_hi);
++  void kernel_crc32_using_crc32(Register crc, Register buf,
++        Register len, Register tmp0, Register tmp1, Register tmp2,
++        Register tmp3);
++  void kernel_crc32c_using_crc32c(Register crc, Register buf,
++        Register len, Register tmp0, Register tmp1, Register tmp2,
++        Register tmp3);
++public:
++  void multiply_to_len(Register x, Register xlen, Register y, Register ylen, Register z,
++                       Register zlen, Register tmp1, Register tmp2, Register tmp3,
++                       Register tmp4, Register tmp5, Register tmp6, Register tmp7);
++  void mul_add(Register out, Register in, Register offs, Register len, Register k);
++  // ISB may be needed because of a safepoint
++  void maybe_isb() { ShouldNotReachHere();}
++
++private:
++  // Returns an address on the stack which is reachable with a ldr/str of size
++  // Uses rscratch2 if the address is not directly reachable
++  Address spill_address(int size, int offset, Register tmp=rscratch2);
++
++  bool merge_alignment_check(Register base, size_t size, long cur_offset, long prev_offset) const;
++
++  // Check whether two loads/stores can be merged into ldp/stp.
++  bool ldst_can_merge(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store) const;
++
++  // Merge current load/store with previous load/store into ldp/stp.
++  void merge_ldst(Register rx, const Address &adr, size_t cur_size_in_bytes, bool is_store);
++
++  // Try to merge two loads/stores into ldp/stp. If success, returns true else false.
++  bool try_merge_ldst(Register rt, const Address &adr, size_t cur_size_in_bytes, bool is_store);
++
++public:
++
++  // True if an XOR can be used to expand narrow klass references.
++  bool use_XOR_for_compressed_class_base;
++
++  
++//  void andw(Register lh, int rh, Register res, Register scratch=rcc);
++//  void andw(Register lh, Register rh, Register res);
++//  void andptr(Register lh, long rh, Register res, Register scratch=rcc);
++//  void andptr(Register lh, Register rh, Register res);
++  
++  
++  void addiu32(Register rs, int imm, Register rt, Register cc = GP) {
++    if (imm >= 0 && is_uimm8(imm)) {
++      addw(rs, imm, rt);
++    } else if (imm < 0 && is_uimm8(-imm)) {
++      subw(rs, -imm, rt);
++    } else {
++      ldi(cc, imm, R0);
++      addw(rs, cc, rt);
++    }
++  }
++  
++  void addiu(Register rs, int imm, Register rt, Register cc = GP) {
++    if (imm >= 0 && is_uimm8(imm)) {
++      addl(rs, imm, rt);
++    } else if (imm < 0 && is_uimm8(-imm)) {
++      subl(rs, -imm, rt);
++    } else {
++      ldi(cc, imm, R0);
++      addl(rs, cc, rt);
++    }
++  }
++  
++  void ori(Register rs, int imm, Register rt, Register cc = GP) {
++    if (is_uimm8(imm)) {
++      bis(rs, imm, rt);
++    } else {
++      ldi(cc, imm, R0);
++      bis(rs, cc, rt);
++    }
++  }
++  
++  void andi(Register rs, int imm, Register rt, Register cc = GP) {
++    if (is_uimm8(imm)) {
++      and_ins(rs, imm, rt);
++    } else {
++      ldi(cc, imm, R0);
++      and_ins(rs, cc, rt);
++    }
++  }
++
++  void idiv_sw(Register rs, Register rt, Register rd){
++    if(rt == R0){
++      ShouldNotReachHere();
++    }else{
++      FloatRegister fsrc1 = f22;
++      FloatRegister fsrc2 = f23;
++      FloatRegister fdest = f24;
++      ifmovd(rs, fsrc1);
++      ifmovd(rt, fsrc2);
++      fcvtld(fsrc1, fsrc1);
++      fcvtld(fsrc2, fsrc2);
++      fdivd(fsrc1, fsrc2, fdest);
++      fcvtdl_z(fdest, fdest);
++      fcvtlw(fdest, fsrc1);
++      fimovs(fsrc1, rd);
++    }
++  }
++  
++  void irem_sw(Register rs, Register rt, Register rd){
++    if(rt == R0){
++      ShouldNotReachHere();
++    }else{
++      FloatRegister fsrc1 = f22;
++      FloatRegister fsrc2 = f23;
++      FloatRegister fdest = f24;
++      Register tem1 = rscratch3; 
++      Register tem2 = rscratch4;
++      ifmovd(rs, fsrc1);
++      ifmovd(rt, fsrc2);
++      fcvtld(fsrc1, fsrc1);
++      fcvtld(fsrc2, fsrc2);
++      fdivd(fsrc1, fsrc2, fdest);
++      fcvtdl_z(fdest, fdest);
++      fimovd(fdest, tem1);
++      mulw(tem1, rt, tem2);
++      subw(rs, tem2, rd);
++    }
++  }
++  
++  void ldiv_sw(Register rs, Register rt, Register rd){
++    if(rt == R0){
++      ShouldNotReachHere();
++    }else{
++      FloatRegister fsrc1 = f23;
++      FloatRegister fsrc2 = f24;
++      FloatRegister fdest = f25;
++      ifmovd(rs, fsrc1);
++      ifmovd(rt, fsrc2);
++      fcvtld(fsrc1, fsrc1);
++      fcvtld(fsrc2, fsrc2);
++      fdivd(fsrc1, fsrc2, fdest);
++      fcvtdl_z(fdest, fdest);
++      fimovd(fdest, rd);
++    }
++  }
++  
++  void lrem_sw(Register rs, Register rt, Register rd){
++    if(rt == R0){
++      ShouldNotReachHere();
++    }else{
++      FloatRegister fsrc1 = f23;
++      FloatRegister fsrc2 = f24;
++      FloatRegister fdest = f25;
++      Register tem1 = rscratch3; 
++      Register tem2 = rscratch4;
++      ifmovd(rs, fsrc1);
++      ifmovd(rt, fsrc2);
++      fcvtld(fsrc1, fsrc1);
++      fcvtld(fsrc2, fsrc2);
++      fdivd(fsrc1, fsrc2, fdest);
++      fcvtdl_z(fdest, fdest);
++      fimovd(fdest, tem1);
++      mull(tem1, rt, tem2);
++      subl(rs, tem2, rd);
++    }
++  }
++  void corrected_idivw(Register ra, Register rb, Register rc) {
++    Label special_case, done;
++    Register tem1 = GP;
++
++    if(rb == R0) {
++      ShouldNotReachHere();
++    } else {
++      // load -1 in register
++      ldi(tem1, -1, R0);
++
++      // check for special case, e.g. rb = -1
++      cmpeq(tem1, rb, tem1);
++      bne_l(tem1, special_case);
++
++      // handle normal case
++      divw(ra, rb, rc);
++      beq_l(R0, done);
++
++      // handle special case
++      bind(special_case);
++      subw(R0, ra, rc);
++
++      // normal and special case exit
++      bind(done);
++    }
++  }
++
++  void corrected_idivl(Register ra, Register rb, Register rc) {
++    Label special_case, done;
++    Register tem1 = GP;
++
++    if (rb == R0) {
++      ShouldNotReachHere();
++    } else {
++      // load -1 in register
++      ldi(tem1, -1, R0);
++
++      // check for special case, e.g. rb = -1l
++      cmpeq(tem1, rb, tem1);
++      bne_l(tem1, special_case);
++
++      // handle normal case
++      divl(ra, rb, rc);
++      beq_l(R0, done);
++
++      // handle special case
++      bind(special_case);
++      subl(R0, ra, rc);
++
++      // normal and special case exit
++      bind(done);
++    }
++  }
++  void add_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        fadds(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        fadds(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        fadds(fs, f28, fd);
++      }else{
++        fadds(fs, ft, fd);
++      }
++    } else
++      fadds(fs, ft, fd);
++  }
++  
++  void sub_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        fsubs(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        fsubs(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        fsubs(fs, f28, fd);
++      }else{
++        fsubs(fs, ft, fd);
++      }
++    } else
++      fsubs(fs, ft, fd);
++  }
++  
++  void mul_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        fmuls(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        fmuls(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        fmuls(fs, f28, fd);
++      }else{
++        fmuls(fs, ft, fd);
++      }
++    } else
++      fmuls(fs, ft, fd);
++  }
++  
++  void div_s(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        fdivs(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        fdivs(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        fdivs(fs, f28, fd);
++      }else{
++        fdivs(fs, ft, fd);
++      }
++    } else 
++      fdivs(fs, ft, fd);
++  }
++  
++  void add_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        faddd(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        faddd(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        faddd(fs, f28, fd);
++      }else{
++        faddd(fs, ft, fd);
++      }
++    } else
++      faddd(fs, ft, fd);
++  }
++  
++  void sub_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        fsubd(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        fsubd(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        fsubd(fs, f28, fd);
++      }else{
++        fsubd(fs, ft, fd);
++      }
++    } else
++      fsubd(fs, ft, fd);
++  }
++  
++  void mul_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        fmuld(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        fmuld(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        fmuld(fs, f28, fd);
++      }else{
++        fmuld(fs, ft, fd);
++      }
++    } else
++      fmuld(fs, ft, fd);
++  }
++  
++  void div_d(FloatRegister fd, FloatRegister fs, FloatRegister ft) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(ft, f28);
++      if (fs == ft && ft == fd){
++        fdivd(fs, ft, f28);
++        fcpys(f28, f28, fd);
++      }else if (fs == fd){
++        fcpys(fs, fs, f28);
++        fdivd(f28, ft, fd);
++      }else if (ft == fd){
++        fcpys(ft, ft, f28);
++        fdivd(fs, f28, fd);
++      }else{
++        fdivd(fs, ft, fd);
++      }
++    } else
++      fdivd(fs, ft, fd);
++  }
++  
++  void sqrt_s(FloatRegister fd, FloatRegister fs) {
++    if (FRegisterConflict) {
++      if(fs == fd){
++        mov_s(f28, fs);
++        fsqrts(f28, fd);
++      } else
++        fsqrts(fs, fd);
++    } else
++        fsqrts(fs, fd);
++  }
++
++  void sqrt_d(FloatRegister fd, FloatRegister fs) {
++    if (FRegisterConflict) {
++      if (fs == fd) {
++        mov_d(f28, fs);
++        fsqrtd(f28,fd);
++      } else
++        fsqrtd(fs, fd);
++    } else
++      fsqrtd(fs, fd);
++  }
++  
++  void cvt_s_l(FloatRegister fd, FloatRegister fs) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(fd, f28);
++      if (fs == fd){
++        fcpys(fs, fs, f28);
++        fcvtls(f28, fd);
++      }else{
++        fcvtls(fs, fd);
++      }
++    } else
++      fcvtls(fs, fd);
++  }
++  
++  void cvt_d_l(FloatRegister fd, FloatRegister fs) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(fd, f28);
++      if (fs == fd){
++        fcpys(fs, fs, f28);
++        fcvtld(f28, fd);
++      }else{
++        fcvtld(fs, fd);
++      }
++    } else
++      fcvtld(fs, fd);
++  }
++  
++  void cvt_d_s(FloatRegister fd, FloatRegister fs) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(fd, f28);
++      if (fs == fd){
++        fcpys(fs, fs, f28);
++        fcvtsd(f28, fd);
++      }else{
++        fcvtsd(fs, fd);
++      }
++    } else
++      fcvtsd(fs, fd);
++  }
++  
++  void cvt_s_d(FloatRegister fd, FloatRegister fs) {
++    if (FRegisterConflict) {
++      assert_different_registers(fs, f28);
++      assert_different_registers(fd, f28);
++      if (fs == fd){
++        fcpys(fs, fs, f28);
++        fcvtds(f28, fd);
++      } else
++        fcvtds(fs, fd);
++    } else
++      fcvtds(fs, fd);
++  }
++  
++  void c_un_s  (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpun(fs, ft, FcmpRES);
++  }
++  void c_eq_s  (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpeq(fs, ft, FcmpRES);
++  }
++  void c_ueq_s (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpun(fs, ft, FcmpRES);
++      fbne(FcmpRES, 1);
++      fcmpeq(fs, ft, FcmpRES);
++  }
++
++  // fs < ft    FcmpRES = 2.0
++  // fs/ft nan  FcmpRES = 2.0
++  void c_ult_s (FloatRegister fs, FloatRegister ft) {
++    assert_different_registers(fs, FcmpRES);
++    assert_different_registers(ft, FcmpRES);
++    fcmpun(fs, ft, FcmpRES);
++    fbne(FcmpRES, 1);
++    fcmplt(fs, ft, FcmpRES);
++//    fcmple(ft, fs, FcmpRES);
++//    fcmpeq(FcmpRES, fzero, FcmpRES);
++  }
++  
++  void c_olt_s (FloatRegister fs, FloatRegister ft) {
++    assert_different_registers(fs, f28, FcmpRES);
++    assert_different_registers(ft, f28, FcmpRES);
++//    fcmpun(fs, ft, f28);
++//    fcmpeq(f28, f31, FcmpRES);
++//    fbeq(FcmpRES, 1);
++    fcmplt(fs, ft, FcmpRES);
++  }
++
++  void c_ult_d (FloatRegister fs, FloatRegister ft) {
++    assert_different_registers(fs, FcmpRES);
++    assert_different_registers(ft, FcmpRES);
++    fcmpun(fs, ft, FcmpRES);
++    fbne(FcmpRES, 1);
++    fcmplt(fs, ft, FcmpRES);
++//    fcmple(ft, fs, FcmpRES);
++//    fcmpeq(FcmpRES, fzero, FcmpRES);
++  }
++  
++  void c_olt_d (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, f28, FcmpRES);
++      assert_different_registers(ft, f28, FcmpRES);
++//      fcmpun(fs, ft, f28);
++//      fcmpeq(f28, f31, FcmpRES);
++//      fbeq(FcmpRES, 1);
++      fcmplt(fs, ft, FcmpRES);
++  }
++  
++  void c_ole_s (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, f28, FcmpRES);
++      assert_different_registers(ft, f28, FcmpRES);
++//      fcmpun(fs, ft, f28);
++//      fcmpeq(f28, f31, FcmpRES);
++//      fbeq(FcmpRES, 1);
++      fcmple(fs, ft, FcmpRES);
++  }
++
++  void c_ule_s (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpun(fs, ft, FcmpRES);
++      fbne(FcmpRES, 1);
++      fcmple(fs, ft, FcmpRES);
++//    fcmplt(ft, fs, FcmpRES);
++//    fcmpeq(FcmpRES, fzero, FcmpRES);
++  }
++
++  void c_un_d  (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpun(fs, ft, FcmpRES);
++  }
++  void c_eq_d  (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpeq(fs, ft, FcmpRES);
++  }
++  void c_ueq_d (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpun(fs, ft, FcmpRES);
++      fbne(FcmpRES, 1);
++      fcmpeq(fs, ft, FcmpRES);
++  }
++
++  void c_ole_d (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, f28, FcmpRES);
++      assert_different_registers(ft, f28, FcmpRES);
++//      fcmpun(fs, ft, f28);
++//      fcmpeq(f28, f31, FcmpRES);
++//      fbeq(FcmpRES, 1);
++      fcmple(fs, ft, FcmpRES);
++  }
++
++  void c_ule_d (FloatRegister fs, FloatRegister ft) {
++      assert_different_registers(fs, FcmpRES);
++      assert_different_registers(ft, FcmpRES);
++      fcmpun(fs, ft, FcmpRES);
++      fbne(FcmpRES, 1);
++      fcmple(fs, ft, FcmpRES);
++//    fcmplt(ft, fs, FcmpRES);
++//    fcmpeq(FcmpRES, fzero, FcmpRES);
++  }
++
++  // Frame creation and destruction shared between JITs.
++  void build_frame(int framesize);
++  void remove_frame(int framesize);
++
++public:
++//  void mov(Register dst, Address a);
++  void mov_immediate64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format = 0);
++  void mov_address64(Register dst, u_int64_t imm64, RelocationHolder const& rspec, int format = 0);
++  void mov_immediate64(Register dst, u_int64_t imm64);
++  void mov_immediate32(Register dst, int imm32);
++  void mov_immediate32u(Register dst, int imm32);
++  void mov_immediate32s(Register dst, int imm32);
++  void set64(Register d, long value);
++  void push_RA_call(Register entry, Register tmp=T12) {
++    if (entry != tmp) movl(tmp, entry);
++
++    br(RA, 0);
++    addl(RA, 4 * BytesPerInt, RA);
++    subl(rsp, wordSize, rsp);
++    stl(RA, 0, rsp);
++    Assembler::call(RA, tmp, (int)0);
++    addl(rsp, wordSize, rsp);
++    if(UseSetfpec)
++      setfpec1();
++  }
++    
++  static void imm48_split(long imm48, int16_t &msb_l, int16_t &lsb_h, int16_t &lsb_l) {
++    int32_t lsb32 = (int32_t) ((intptr_t) imm48);
++    int32_t msb32 = (int32_t) (((intptr_t) imm48 - lsb32) >> 32);
++
++    msb_l = (int16_t) msb32;
++    lsb_h = (lsb32 - (int16_t) lsb32) >> 16;
++    lsb_l = (int16_t) lsb32;
++    guarantee((msb_l >= 0x0 && msb_l < 0x7fff) || (msb_l == 0x7fff && lsb32 >= 0x0 && lsb32 < 0x7fff8000), "wrong number in li48 ");
++    if (lsb32 >= 0x7fff8000)
++      msb_l = msb_l + 1;
++  }
++
++//  void push(RegSet regs, Register stack) { if (regs.bits()) push(regs.bits(), stack); }
++//  void pop(RegSet regs, Register stack) { if (regs.bits()) pop(regs.bits(), stack); }
++
++  // Push and pop everything that might be clobbered by a native
++  // runtime call except rscratch1 and rscratch2.  (They are always
++  // scratch, so we don't have to protect them.)  Only save the lower
++  // 64 bits of each vector register.
++  void push_call_clobbered_registers();
++  void pop_call_clobbered_registers();
++
++  // Helper functions for statistics gathering.
++  void atomic_inc32(address counter_addr, int inc, Register tmp_reg1, Register tmp_reg2);
++
++  // now mov instructions for loading absolute addresses and 32 or
++  // 64 bit integers
++
++  inline void mov(Register dst, address addr)
++  {
++    mov_immediate64(dst, (u_int64_t)addr);
++  }
++
++  inline void mov(Register dst, u_int64_t imm64)
++  {
++    mov_immediate64(dst, imm64);
++  }
++  
++  inline void movws(Register dst, u_int32_t imm32)
++  {
++    mov_immediate32(dst, imm32);
++    movws(dst, dst);
++  }
++  
++  /**
++   * x86
++   *  movslq(Register dst, Register src)
++   * sw64
++   *  movws(Register dst, Register src)
++   * note
++   *  sign extend 32bit to 64bit
++   */
++  inline void movws(Register dst, Register src)
++  {
++    addw(src, R0, dst);
++  }
++  
++  inline void movws(Register dst, Address src)
++  { ShouldNotReachHere();
++    ldw(dst, src);  
++  }
++
++  inline void movl(Register dst, u_int64_t imm64)
++  {
++    mov_immediate64(dst, imm64);
++  }
++
++  inline void movws(Register dst, int32_t imm32)
++  {
++    mov_immediate32s(dst, imm32);
++  }
++
++  inline void movwu(Register dst, u_int32_t imm32)
++  {
++    mov_immediate32u(dst, imm32);
++  }
++
++  inline void movw(Register dst, u_int32_t imm32)
++  {
++    mov_immediate32(dst, imm32);
++  }
++  
++  inline void movw(Register dst, Register src)
++  {
++    zapnot(src, 0xf, dst);
++  }
++
++  inline void movwu(Register dst, Register src)
++  {
++    zapnot(src, 0xf, dst);
++  }
++    
++   inline void movw(Register dst, ExternalAddress addr, Register tmp=rcc)
++  {
++    mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec());
++    zapnot(tmp, 0xf, dst);
++  }
++   
++   inline void movw(ExternalAddress addr, Register src, Register tmp=rcc)
++  {
++    mov_immediate64(tmp, (intptr_t)addr.target(), addr.rspec()); 
++    stw(src, Address(tmp, 0));
++  } 
++
++  inline void mov(Register dst, long l)
++  {
++    mov(dst, (u_int64_t)l);
++  }
++
++  inline void mov(Register dst, int i)
++  {
++    mov(dst, (long)i);
++  }
++
++  void mov(Register dst, RegisterOrConstant src) {
++    ShouldNotReachHere();
++  }
++
++  
++public:
++
++  // Can we reach target using jal/j from anywhere
++  // in the code cache (because code can be relocated)?
++  bool reachable_from_cache(address target) {
++    return false;
++  }
++  // Argument ops
++  inline void store_int_argument(Register s, Argument &a) {ShouldNotReachHere();
++    if(a.is_Register()) {
++      move(a.as_Register(), s);
++    } else {
++      sw(s, a.as_caller_address());
++    }
++  }
++  
++  void sign_extend_short(Register reg)      { sexth(reg, reg); }
++  void sign_extend_byte (Register reg)      { sextb(reg, reg); }
++  
++  void trigfunc(char trig, int num_fpu_regs_in_use = 1);
++  
++  void subu32(Register rd, Register rs, Register rt) { subw(rs, rt, rd); }
++  void dsub  (Register rd, Register rs, Register rt) { subl(rs, rt, rd); }
++  void addu32(Register rd, Register rs, Register rt) { addw(rs, rt, rd); }
++  void daddu (Register rd, Register rs, Register rt) { addl(rs, rt, rd); }
++  void dadd  (Register rd, Register rs, Register rt) { addl(rs, rt, rd); }
++  //move should replace with movl jzy
++  void move(Register rd, Register rs)       {  movl(rd, rs); }
++  void movl(Register rd, Register rs)       { if (rs != rd) bis(R0, rs, rd); }  
++  void stbool(bool boolconst, Address dst, Register tmp = rscratch1) {
++    ldi(tmp, (int) boolconst, R0);
++    if(sizeof(bool) == 1)
++      stb(tmp, dst);
++    else if(sizeof(bool) == 2)
++      sth(tmp, dst);
++    else if(sizeof(bool) == 4)
++      stw(tmp, dst);
++    else
++      ShouldNotReachHere();
++  }
++
++  void mov_s(FloatRegister fd, FloatRegister fs) { fcpys(fs, fs, fd); }
++  void mov_d(FloatRegister fd, FloatRegister fs) { fcpys(fs, fs, fd); }
++  void abs_d(FloatRegister fd, FloatRegister fs) { fcpys(f31, fs, fd); }
++  
++  void brk    (int code) { sys_call(0x80); }
++  
++  void dsll(Register rd, Register rt , int sa)  { slll(rt, sa, rd); }
++  void dsrl(Register rd, Register rt , int sa)  { srll(rt, sa, rd); }
++  void sll (Register rt, int sa, Register rd)   { slll(rt, sa&0x1f, rd); addw(rd, 0, rd); }
++  void sllv(Register rd, Register rt, Register rs, Register cc = GP) { and_ins(rs, 0x1f, cc); slll(rt, cc, rd); addw(rd, 0, rd); }
++  void sra (Register rd, Register rt, int sa)   { addw(rt, 0, rd); sral(rt, sa&0x1f, rd); }
++  void srav(Register rd, Register rt, Register rs, Register cc = GP) { and_ins(rs, 0x1f, cc); addw(rt, 0, rd); sral(rd, cc, rd); }
++  void srlv(Register rd, Register rt, Register rs, Register cc = GP) { and_ins(rs, 0x1f, cc); zapnot(rt, 0xf, rd); srll(rd, cc, rd); addw(rd, 0x0, rd); }
++  
++  void lbu (Register rt, Address src)            { ldbu(rt, src.disp(), src.base()); }
++//  void lb  (Register rt, Address src)            { lb(rt, src.disp(), src.base());   }
++//  void lb  (Register rt, int off, Register base) { ldbu(rt, off, base);  sextb(rt, rt); }
++//  void lh  (Register rt, Address src)            { ldh(rt, src.disp(), src.base());  }
++//  void ldh (Register rt, int off, Register base) { ldhu(rt, Address(base, off));  sexth(rt, rt); }
++  void lhu (Register rt, Address src)            { ldhu(rt, src); }
++//  void lhu (Register rt, Register base, int off) { ldhu(rt, Address(base, off)); }
++  void lw  (Register rt, Address src)            { ldw(rt, src.disp(), src.base());  }
++//  void ldwu(Register rt, int off, Register base) { ldw(rt, off, base);   zapnot(rt, 0xF, rt); }
++  void ld  (Register rt, Address src)            { ldl(rt, src.disp(), src.base());  }
++  void sb  (Register rt, Address dst)            { stb(rt, dst.disp(), dst.base());  }
++  void sb  (Register rt, Register base, int off) { stb(rt, off, base); }
++  
++  // ld_ptr will perform lw for 32 bit VMs and ld for 64 bit VMs
++  inline void ld_ptr(Register rt, Address a){
++    ldl(rt, a.disp(), a.base());
++  }
++  inline void st_ptr(Register rt, Address a){
++   stl(rt, a.disp(), a.base());
++  }
++  
++//  void lwc1(FloatRegister rt, Address src)            { lwc1(rt, src.base(), src.disp()); }
++//  void lwc1(FloatRegister ft, Register base, int off) { flds(ft, off, base); }
++//  void ldc1(FloatRegister rt, Address src)            { ldc1(rt, src.base(), src.disp()); }
++//  void ldc1(FloatRegister ft, Register base, int off) { fldd(ft, off, base); }
++  void lw  (Register rt, Register base, int off)      { ldw(rt, off, base);  }
++  void ld  (Register rt, Register base, int off)      { ldl(rt, off, base);  }
++//  void swc1(FloatRegister ft, Register base, int off) { fsts(ft, off, base); }
++//  void sdc1(FloatRegister ft, Register base, int off) { fstd(ft, off, base); }
++  void sw  (Register rt, Register base, int off)      { stw(rt, off, base);  }
++  void sd  (Register rt, Register base, int off)      { stl(rt, off, base);  }
++  
++//  void fflds(FloatRegister rt, Address src)       { flds(rt, src.disp(), src.base()); }
++//  void ffldd(FloatRegister rt, Address src)       { fldd(rt, src.disp(), src.base()); }
++//  void ffsts(FloatRegister rt, Address dst)       { fsts(rt, dst.disp(), dst.base()); }
++//  void ffstd(FloatRegister rt, Address dst)       { fstd(rt, dst.disp(), dst.base()); }
++  
++  void sw(Register rt, Address dst) {
++    Register src   = rt;
++    Register base  = dst.base();
++    int disp  = dst.disp();
++
++    if( Assembler::is_simm16(disp) ) {
++      stw(src, disp, base);
++    } else {
++      mov_immediate32(AT, disp);
++      addl(base, AT, AT);
++      stw(src, 0, AT);
++    }
++  }
++  
++  void std(Register rt, Address dst) {
++    Register src   = rt;
++    Register base  = dst.base();
++    int disp  = dst.disp();
++
++    if(is_simm16(disp)) {
++      stl(src, disp, base);
++    } else {
++      mov_immediate32(AT, disp);
++      addl(base, AT, AT);
++      stl(src, 0, AT);
++    }
++  }
++  
++  void empty_FPU_stack(){/*need implemented*/};
++  
++  inline void store_ptr_argument(Register s, Argument &a) {ShouldNotReachHere();
++    if(a.is_Register()) {
++      move(a.as_Register(), s);
++    } else {
++      st_ptr(s, a.as_caller_address());
++    }
++  }
++
++  inline void store_float_argument(FloatRegister s, Argument &a) {ShouldNotReachHere();
++    if(a.is_Register()) {
++      fcpys(s, s, a.as_FloatRegister());
++    } else {
++      fsts(s, a.as_caller_address());
++    }
++  }
++
++  inline void store_double_argument(FloatRegister s, Argument &a) {ShouldNotReachHere();
++    if(a.is_Register()) {
++      fcpys(s, s, a.as_FloatRegister());
++    } else {
++      fstd(s, a.as_caller_address());
++    }
++  }
++  
++  void load( int width, Register ra, int mdisp, Register rb ){
++    if(width == 0)         ldbu( ra, mdisp, rb );
++    else if(width == 1) ldhu( ra, mdisp, rb );
++    else if(width == 2) ldw( ra, mdisp, rb );
++    else  ldl( ra, mdisp, rb );
++  }
++  
++  void store( int width, Register ra, int mdisp, Register rb ){
++    if(width == 0)         stb( ra, mdisp, rb );
++    else if(width == 1) sth( ra, mdisp, rb );
++    else if(width == 2) stw( ra, mdisp, rb );
++    else  stl( ra, mdisp, rb );
++  }
++  
++  //get the offset field of jump/branch instruction
++  //for condition branch instruction the disp is 21 bits
++  int offset(address entry) {
++    assert(is_simm21((entry - pc() - 4) / 4), "change this code");
++    if (!is_simm21((entry - pc() - 4) / 4)) {
++      tty->print_cr("!!! is_simm16: %x", (unsigned int)((entry - pc() - 4) / 4));
++    }
++    return (entry - pc() - 4) / 4;
++  }
++  
++  /**
++  * oop_maps->add_gc_map use offset to compute map 
++  * but sw should put setfpec1 after call where will call gcc's code in,
++  * so sw should not use default offset method  
++  * 
++  * lbl is label which use to calculate return address
++  * offset is codebuffer's offset.
++  */
++  int offset(Label &lbl, address start) {
++    assert(lbl.is_bound(), "need bound");  
++    int off = lbl.loc_pos() - (start - code_section()->start());
++    return off;
++  }
++
++  
++  void beq_a (Register a, address entry) { beq(a, offset(entry)); }
++  void beq_l (Register a, Label& L)      { beq(a, offset(target(L))); }
++  void beq_c (Register rs, Register rt, Label& L, Register cc = GP) {
++    if ( rt == R0 ) {
++      beq(rs, offset(target(L)));
++    } else if (rs == R0) {
++      beq(rt, offset(target(L)));
++    } else {
++      cmpeq(rs, rt, cc);
++      bne(cc, offset(target(L)));
++    }
++  }
++  void bne_l (Register a, Label& L)  { bne(a, offset(target(L))); }
++  void bne_c (Register rs, Register rt, Label& L, Register cc = GP) {
++    if ( rt == R0 ) {
++      bne(rs, offset(target(L)));
++    } else if (rs == R0) {
++      bne(rt, offset(target(L)));
++    } else {
++      cmpeq(rs, rt, cc);
++      beq(cc, offset(target(L)));
++    }
++  }
++  
++  void bgtz(Register rs, address entry) { bgt(rs, offset(entry)); }
++  void blez(Register rs, address entry) { ble(rs, offset(entry)); }
++  
++  void bge_l( Register a, Label& L )       { bge( a, offset(target(L))); }
++  void bgt_l( Register a, Label& L )       { bgt( a, offset(target(L))); }
++  void ble_l( Register a, Label& L )       { ble( a, offset(target(L))); }
++  void blt_l( Register a, Label& L )       { blt( a, offset(target(L))); }
++  void sltu (Register rd, Register rs, Register rt)   { cmpult(rs, rt, rd); }
++  void slti(Register rt, Register rs, int imm, Register cc = GP) { ldi(cc, imm, R0); cmplt(rs, cc, rt); }
++  
++  void ffbeq(FloatRegister rs, Label& L)  { fbeq(rs, offset(target(L))); }
++  void ffbne(FloatRegister rs, Label& L)  { fbne(rs, offset(target(L))); }
++  
++  //we need 2 fun to save and resotre general register
++  void pushad(Register skip = noreg);
++  void popad(Register skip = noreg);
++  void saveTRegisters();
++  void restoreTRegisters();
++
++  void ret_sw()                          { Assembler::ret(R0, RA, 0);}
++  void ret()                             { Assembler::ret(R0, RA, 0);}
++  
++  //TODO:to implement
++  void xchgptr(Register src1, Address src2)   { stop("unimplement xchgptr: jzy"); }
++  void xchgptr(Register src1, Register src2);
++  void xchgw  (Register src1, Address src2)   { stop("unimplement xchgw: jzy");}
++  
++  void cmpxchgq(Register src1, Address src2)  { stop("unimplement cmpxchgq: jzy"); }
++  void cmpxchgw(Register src1, Address src2)  { stop("unimplement cmpxchgw: jzy"); }
++  void cmpxchgb(Register src1, Address src2)  { stop("unimplement cmpxchgb: jzy"); }
++  void lock()                                 {  memb(); }
++  
++  void xaddw  (Address src1, Register src2)   { stop("unimplement xaddw: jzy");}
++  void xaddptr  (Address src1, Register src2) { stop("unimplement xaddptr: jzy");}
++  // Jumps
++  void jr(Register rs) { Assembler::jmp(rscratch2, rs, 0); }
++  void jr(address entry);
++  void jr(address entry, relocInfo::relocType rtype);
++  
++  void patchable_jump(address target);
++
++  void jalr(Register rd, Register rs) { ShouldNotReachHere(); }
++  void jalr(Register rs)              { jalr(RA, rs); }
++  void jalr()                         { jalr(T12); }
++  
++  void jalr_setfpec1(Register rd, Register rs) { ShouldNotReachHere(); }
++  void jalr_setfpec1(Register rs)     { jalr_setfpec1(RA, rs);  }
++  
++  // Calls
++  void call_patch(address entry);
++  void call_patch(address entry, relocInfo::relocType rtype);
++  
++  void patchable_call_setfpec1(address target);
++  void patchable_call(address target, Label *retAddr = NULL, Register tmp=T12);
++  
++  inline void xorr    ( Register rd, Register rs, Register rt )  { xor_ins(rs, rt, rd); }
++  inline void andnot  ( Register ra, Register rb, Register rc )  { bic( ra, rb,  rc ); }
++  inline void andnot  ( Register ra, int lit,     Register rc )  { bic( ra, lit, rc ); }
++  inline void or_ins  ( Register ra, Register rb, Register rc )  { bis( ra, rb,  rc ); }
++  inline void or_ins  ( Register ra, int lit,     Register rc )  { bis( ra, lit, rc ); }
++  
++  // Generalized Test Bit And Branch, including a "far" variety which
++  // spans more than 32KiB.
++  void tbr(Condition cond, Register Rt, int bitpos, Label &dest, bool far = false) {
++    ShouldNotReachHere();
++  }
++
++  // idiv variant which deals with MINLONG as dividend and -1 as divisor
++  int corrected_idivl(Register result, Register ra, Register rb,
++                      bool want_remainder, Register tmp = rscratch1);
++  int corrected_idivq(Register result, Register ra, Register rb,
++                      bool want_remainder, Register tmp = rscratch1);
++
++  static address target_addr_for_insn(address insn_addr, unsigned insn);
++  static address target_addr_for_insn(address insn_addr) {
++    unsigned insn = *(unsigned*)insn_addr;
++    return target_addr_for_insn(insn_addr, insn);
++  }
++
++  static void assert_signed_word_disp_range(intptr_t x, int nbits) {
++    assert( (x & 3) == 0, "not word aligned");
++    assert_signed_range(x, nbits + 2);
++  }
++  
++  static intptr_t inv_wdisp( int x, intptr_t pos, int nbits ) {
++    int pre_sign_extend = x & (( 1 << nbits ) - 1);
++    int r =  pre_sign_extend >= ( 1 << (nbits-1) )
++       ?   pre_sign_extend | ~(( 1 << nbits ) - 1)
++       :   pre_sign_extend;
++    return (r << 2) + pos;
++  }
++  
++  static int wdisp( intptr_t x, intptr_t off, int nbits ) {
++    intptr_t xx = x - off;
++    assert_signed_word_disp_range(xx, nbits);
++    int r =  (xx >> 2) & (( 1 << nbits ) - 1);
++    assert( inv_wdisp( r, off, nbits )  ==  x, "inverse not inverse");
++    return r;
++  }
++  static int ins_mask(int msb, int lsb) {
++    int nbits = msb - lsb + 1;
++    unsigned mask = checked_cast<unsigned>(right_n_bits(nbits));
++    mask <<= lsb;
++    return mask;
++  }
++  static int wdisp2( intptr_t dst, intptr_t src, int msb, int lsb) {
++    intptr_t xx = dst - src;
++    int nbits = msb - lsb +1;
++    assert_signed_word_disp_range(xx, nbits);
++    int r =  (xx >> 2) & (( 1 << nbits ) - 1);
++    assert( inv_wdisp( r, src, nbits )  ==  dst, "inverse not inverse");
++    r <<= lsb;
++    return r;
++  }
++  // signed immediate, in low bits, nbits long
++  static int simm(int x, int nbits) {
++    assert_signed_range(x, nbits);
++    return x  &  (( 1 << nbits ) - 1);
++  }
++
++  void verify_oop_subroutine();
++
++  void cmp_klass(Register oop, Register trial_klass, Register tmp);
++
++  void resolve_oop_handle(Register result, Register tmp = rscratch1);
++  void load_mirror(Register mirror, Register method, Register tmp = rscratch1);
++};
++
++class ScopeMark {
++private:
++  MacroAssembler* _masm;
++  char _begin[300];
++  char _end[300];
++public:
++
++  ScopeMark(MacroAssembler* masm, const char* position, const char* comment = "") : _masm(masm) {
++    if (comment == "") {
++      ::sprintf(_begin, "%s{", position);
++      ::sprintf(_end, "%s}", position);
++    } else {
++      ::sprintf(_begin, "%s %s %s enter", position, "{", comment);
++      ::sprintf(_end, "%s leave }", position);
++    }
++      
++    _masm->block_comment(_begin);
++  }
++
++  ~ScopeMark() {
++    _masm->block_comment(_end);
++  }
++};
++
++class SizedScope {
++private:
++  int _size;
++  MacroAssembler* _masm;
++  address _start;
++public:
++  SizedScope(MacroAssembler* masm, int size) {
++    _masm = masm;
++    _size = size;
++    _start = _masm->pc();
++  }
++  ~SizedScope() {
++    if (_masm->pc() - _start > _size) Unimplemented();
++    while (_masm->pc() - _start < _size) _masm->nop();
++  }
++};
++
++#ifdef ASSERT
++inline bool AbstractAssembler::pd_check_instruction_mark() { return true; }
++#endif
++
++/**
++ * class SkipIfEqual:
++ *
++ * Instantiating this class will result in assembly code being output that will
++ * jump around any code emitted between the creation of the instance and it's
++ * automatic destruction at the end of a scope block, depending on the value of
++ * the flag passed to the constructor, which will be checked at run-time.
++ */
++class SkipIfEqual {
++ private:
++  MacroAssembler* _masm;
++  Label _label;
++
++ public:
++   SkipIfEqual(MacroAssembler*, const bool* flag_addr, bool value);
++   ~SkipIfEqual();
++};
++
++struct tableswitch {
++  Register _reg;
++  int _insn_index; jint _first_key; jint _last_key;
++  Label _after;
++  Label _branches;
++};
++
++#endif // CPU_SW64_VM_MACROASSEMBLER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64.inline.hpp b/src/hotspot/cpu/sw64/macroAssembler_sw64.inline.hpp
+new file mode 100644
+index 0000000000..1b6d78b16a
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/macroAssembler_sw64.inline.hpp
+@@ -0,0 +1,35 @@
++/*
++ * Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_MACROASSEMBLER_SW64_INLINE_HPP
++#define CPU_SW64_VM_MACROASSEMBLER_SW64_INLINE_HPP
++
++#include "asm/assembler.hpp"
++
++#ifndef PRODUCT
++
++#endif // ndef PRODUCT
++
++#endif // CPU_SW64_VM_MACROASSEMBLER_SW64_INLINE_HPP
+diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64_log.cpp b/src/hotspot/cpu/sw64/macroAssembler_sw64_log.cpp
+new file mode 100644
+index 0000000000..2ce5a62378
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/macroAssembler_sw64_log.cpp
+@@ -0,0 +1,262 @@
++/* Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
++ * Copyright (c) 2016, Intel Corporation.
++ * Intel Math Library (LIBM) Source Code
++ *
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "macroAssembler_sw64.hpp"
++
++// Algorithm idea is taken from x86 hotspot intrinsic and adapted for AARCH64.
++//
++// For mathematical background please refer to the following literature:
++//
++// Tang, Ping-Tak Peter.
++// Table-driven implementation of the logarithm function
++// in IEEE floating-point arithmetic.
++// ACM Transactions on Mathematical Software (TOMS) 16, no. 4, 1990: 378-400.
++
++/******************************************************************************/
++//                     ALGORITHM DESCRIPTION - LOG()
++//                     ---------------------
++//
++//    x=2^k * mx, mx in [1,2)
++//
++//    Get B~1/mx based on the output of frecpe instruction (B0)
++//    B = int((B0*2^7+0.5))/2^7
++//
++//    Reduced argument: r=B*mx-1.0 (computed accurately in high and low parts)
++//
++//    Result:  k*log(2) - log(B) + p(r) if |x-1| >= small value (2^-6)  and
++//             p(r) is a degree 7 polynomial
++//             -log(B) read from data table (high, low parts)
++//             Result is formed from high and low parts
++//
++// Special cases:
++// 1. log(NaN) = quiet NaN
++// 2. log(+INF) = that INF
++// 3. log(0) = -INF
++// 4. log(1) = +0
++// 5. log(x) = NaN if x < -0, including -INF
++//
++/******************************************************************************/
++
++// Table with p(r) polynomial coefficients
++// and table representation of logarithm values (hi and low parts)
++__attribute__ ((aligned(64))) juint _L_tbl[] =
++{
++    // coefficients of p(r) polynomial:
++    // _coeff[]
++    0x00000000UL, 0xbfd00000UL, // C1_0 = -0.25
++    0x92492492UL, 0x3fc24924UL, // C1_1 = 0.14285714285714285
++    0x55555555UL, 0x3fd55555UL, // C2_0 = 0.3333333333333333
++    0x3d6fb175UL, 0xbfc5555eUL, // C2_1 = -0.16666772842235003
++    0x00000000UL, 0xbfe00000UL, // C3_0 = -0.5
++    0x9999999aUL, 0x3fc99999UL, // C3_1 = 0.2
++    // _log2[]
++    0xfefa3800UL, 0x3fa62e42UL, // C4_0 = 0.043321698784993146
++    0x93c76730UL, 0x3ceef357UL, // C4_1 = 3.436201886692732e-15
++    // _L_tbl[] with logarithm values (hi and low parts)
++    0xfefa3800UL, 0x3fe62e42UL, 0x93c76730UL, 0x3d2ef357UL, 0xaa241800UL,
++    0x3fe5ee82UL, 0x0cda46beUL, 0x3d220238UL, 0x5c364800UL, 0x3fe5af40UL,
++    0xac10c9fbUL, 0x3d2dfa63UL, 0x26bb8c00UL, 0x3fe5707aUL, 0xff3303ddUL,
++    0x3d09980bUL, 0x26867800UL, 0x3fe5322eUL, 0x5d257531UL, 0x3d05ccc4UL,
++    0x835a5000UL, 0x3fe4f45aUL, 0x6d93b8fbUL, 0xbd2e6c51UL, 0x6f970c00UL,
++    0x3fe4b6fdUL, 0xed4c541cUL, 0x3cef7115UL, 0x27e8a400UL, 0x3fe47a15UL,
++    0xf94d60aaUL, 0xbd22cb6aUL, 0xf2f92400UL, 0x3fe43d9fUL, 0x481051f7UL,
++    0xbcfd984fUL, 0x2125cc00UL, 0x3fe4019cUL, 0x30f0c74cUL, 0xbd26ce79UL,
++    0x0c36c000UL, 0x3fe3c608UL, 0x7cfe13c2UL, 0xbd02b736UL, 0x17197800UL,
++    0x3fe38ae2UL, 0xbb5569a4UL, 0xbd218b7aUL, 0xad9d8c00UL, 0x3fe35028UL,
++    0x9527e6acUL, 0x3d10b83fUL, 0x44340800UL, 0x3fe315daUL, 0xc5a0ed9cUL,
++    0xbd274e93UL, 0x57b0e000UL, 0x3fe2dbf5UL, 0x07b9dc11UL, 0xbd17a6e5UL,
++    0x6d0ec000UL, 0x3fe2a278UL, 0xe797882dUL, 0x3d206d2bUL, 0x1134dc00UL,
++    0x3fe26962UL, 0x05226250UL, 0xbd0b61f1UL, 0xd8bebc00UL, 0x3fe230b0UL,
++    0x6e48667bUL, 0x3d12fc06UL, 0x5fc61800UL, 0x3fe1f863UL, 0xc9fe81d3UL,
++    0xbd2a7242UL, 0x49ae6000UL, 0x3fe1c078UL, 0xed70e667UL, 0x3cccacdeUL,
++    0x40f23c00UL, 0x3fe188eeUL, 0xf8ab4650UL, 0x3d14cc4eUL, 0xf6f29800UL,
++    0x3fe151c3UL, 0xa293ae49UL, 0xbd2edd97UL, 0x23c75c00UL, 0x3fe11af8UL,
++    0xbb9ddcb2UL, 0xbd258647UL, 0x8611cc00UL, 0x3fe0e489UL, 0x07801742UL,
++    0x3d1c2998UL, 0xe2d05400UL, 0x3fe0ae76UL, 0x887e7e27UL, 0x3d1f486bUL,
++    0x0533c400UL, 0x3fe078bfUL, 0x41edf5fdUL, 0x3d268122UL, 0xbe760400UL,
++    0x3fe04360UL, 0xe79539e0UL, 0xbd04c45fUL, 0xe5b20800UL, 0x3fe00e5aUL,
++    0xb1727b1cUL, 0xbd053ba3UL, 0xaf7a4800UL, 0x3fdfb358UL, 0x3c164935UL,
++    0x3d0085faUL, 0xee031800UL, 0x3fdf4aa7UL, 0x6f014a8bUL, 0x3d12cde5UL,
++    0x56b41000UL, 0x3fdee2a1UL, 0x5a470251UL, 0x3d2f27f4UL, 0xc3ddb000UL,
++    0x3fde7b42UL, 0x5372bd08UL, 0xbd246550UL, 0x1a272800UL, 0x3fde148aUL,
++    0x07322938UL, 0xbd1326b2UL, 0x484c9800UL, 0x3fddae75UL, 0x60dc616aUL,
++    0xbd1ea42dUL, 0x46def800UL, 0x3fdd4902UL, 0xe9a767a8UL, 0x3d235bafUL,
++    0x18064800UL, 0x3fdce42fUL, 0x3ec7a6b0UL, 0xbd0797c3UL, 0xc7455800UL,
++    0x3fdc7ff9UL, 0xc15249aeUL, 0xbd29b6ddUL, 0x693fa000UL, 0x3fdc1c60UL,
++    0x7fe8e180UL, 0x3d2cec80UL, 0x1b80e000UL, 0x3fdbb961UL, 0xf40a666dUL,
++    0x3d27d85bUL, 0x04462800UL, 0x3fdb56faUL, 0x2d841995UL, 0x3d109525UL,
++    0x5248d000UL, 0x3fdaf529UL, 0x52774458UL, 0xbd217cc5UL, 0x3c8ad800UL,
++    0x3fda93edUL, 0xbea77a5dUL, 0x3d1e36f2UL, 0x0224f800UL, 0x3fda3344UL,
++    0x7f9d79f5UL, 0x3d23c645UL, 0xea15f000UL, 0x3fd9d32bUL, 0x10d0c0b0UL,
++    0xbd26279eUL, 0x43135800UL, 0x3fd973a3UL, 0xa502d9f0UL, 0xbd152313UL,
++    0x635bf800UL, 0x3fd914a8UL, 0x2ee6307dUL, 0xbd1766b5UL, 0xa88b3000UL,
++    0x3fd8b639UL, 0xe5e70470UL, 0xbd205ae1UL, 0x776dc800UL, 0x3fd85855UL,
++    0x3333778aUL, 0x3d2fd56fUL, 0x3bd81800UL, 0x3fd7fafaUL, 0xc812566aUL,
++    0xbd272090UL, 0x687cf800UL, 0x3fd79e26UL, 0x2efd1778UL, 0x3d29ec7dUL,
++    0x76c67800UL, 0x3fd741d8UL, 0x49dc60b3UL, 0x3d2d8b09UL, 0xe6af1800UL,
++    0x3fd6e60eUL, 0x7c222d87UL, 0x3d172165UL, 0x3e9c6800UL, 0x3fd68ac8UL,
++    0x2756eba0UL, 0x3d20a0d3UL, 0x0b3ab000UL, 0x3fd63003UL, 0xe731ae00UL,
++    0xbd2db623UL, 0xdf596000UL, 0x3fd5d5bdUL, 0x08a465dcUL, 0xbd0a0b2aUL,
++    0x53c8d000UL, 0x3fd57bf7UL, 0xee5d40efUL, 0x3d1fadedUL, 0x0738a000UL,
++    0x3fd522aeUL, 0x8164c759UL, 0x3d2ebe70UL, 0x9e173000UL, 0x3fd4c9e0UL,
++    0x1b0ad8a4UL, 0xbd2e2089UL, 0xc271c800UL, 0x3fd4718dUL, 0x0967d675UL,
++    0xbd2f27ceUL, 0x23d5e800UL, 0x3fd419b4UL, 0xec90e09dUL, 0x3d08e436UL,
++    0x77333000UL, 0x3fd3c252UL, 0xb606bd5cUL, 0x3d183b54UL, 0x76be1000UL,
++    0x3fd36b67UL, 0xb0f177c8UL, 0x3d116ecdUL, 0xe1d36000UL, 0x3fd314f1UL,
++    0xd3213cb8UL, 0xbd28e27aUL, 0x7cdc9000UL, 0x3fd2bef0UL, 0x4a5004f4UL,
++    0x3d2a9cfaUL, 0x1134d800UL, 0x3fd26962UL, 0xdf5bb3b6UL, 0x3d2c93c1UL,
++    0x6d0eb800UL, 0x3fd21445UL, 0xba46baeaUL, 0x3d0a87deUL, 0x635a6800UL,
++    0x3fd1bf99UL, 0x5147bdb7UL, 0x3d2ca6edUL, 0xcbacf800UL, 0x3fd16b5cUL,
++    0xf7a51681UL, 0x3d2b9acdUL, 0x8227e800UL, 0x3fd1178eUL, 0x63a5f01cUL,
++    0xbd2c210eUL, 0x67616000UL, 0x3fd0c42dUL, 0x163ceae9UL, 0x3d27188bUL,
++    0x604d5800UL, 0x3fd07138UL, 0x16ed4e91UL, 0x3cf89cdbUL, 0x5626c800UL,
++    0x3fd01eaeUL, 0x1485e94aUL, 0xbd16f08cUL, 0x6cb3b000UL, 0x3fcf991cUL,
++    0xca0cdf30UL, 0x3d1bcbecUL, 0xe4dd0000UL, 0x3fcef5adUL, 0x65bb8e11UL,
++    0xbcca2115UL, 0xffe71000UL, 0x3fce530eUL, 0x6041f430UL, 0x3cc21227UL,
++    0xb0d49000UL, 0x3fcdb13dUL, 0xf715b035UL, 0xbd2aff2aUL, 0xf2656000UL,
++    0x3fcd1037UL, 0x75b6f6e4UL, 0xbd084a7eUL, 0xc6f01000UL, 0x3fcc6ffbUL,
++    0xc5962bd2UL, 0xbcf1ec72UL, 0x383be000UL, 0x3fcbd087UL, 0x595412b6UL,
++    0xbd2d4bc4UL, 0x575bd000UL, 0x3fcb31d8UL, 0x4eace1aaUL, 0xbd0c358dUL,
++    0x3c8ae000UL, 0x3fca93edUL, 0x50562169UL, 0xbd287243UL, 0x07089000UL,
++    0x3fc9f6c4UL, 0x6865817aUL, 0x3d29904dUL, 0xdcf70000UL, 0x3fc95a5aUL,
++    0x58a0ff6fUL, 0x3d07f228UL, 0xeb390000UL, 0x3fc8beafUL, 0xaae92cd1UL,
++    0xbd073d54UL, 0x6551a000UL, 0x3fc823c1UL, 0x9a631e83UL, 0x3d1e0ddbUL,
++    0x85445000UL, 0x3fc7898dUL, 0x70914305UL, 0xbd1c6610UL, 0x8b757000UL,
++    0x3fc6f012UL, 0xe59c21e1UL, 0xbd25118dUL, 0xbe8c1000UL, 0x3fc6574eUL,
++    0x2c3c2e78UL, 0x3d19cf8bUL, 0x6b544000UL, 0x3fc5bf40UL, 0xeb68981cUL,
++    0xbd127023UL, 0xe4a1b000UL, 0x3fc527e5UL, 0xe5697dc7UL, 0x3d2633e8UL,
++    0x8333b000UL, 0x3fc4913dUL, 0x54fdb678UL, 0x3d258379UL, 0xa5993000UL,
++    0x3fc3fb45UL, 0x7e6a354dUL, 0xbd2cd1d8UL, 0xb0159000UL, 0x3fc365fcUL,
++    0x234b7289UL, 0x3cc62fa8UL, 0x0c868000UL, 0x3fc2d161UL, 0xcb81b4a1UL,
++    0x3d039d6cUL, 0x2a49c000UL, 0x3fc23d71UL, 0x8fd3df5cUL, 0x3d100d23UL,
++    0x7e23f000UL, 0x3fc1aa2bUL, 0x44389934UL, 0x3d2ca78eUL, 0x8227e000UL,
++    0x3fc1178eUL, 0xce2d07f2UL, 0x3d21ef78UL, 0xb59e4000UL, 0x3fc08598UL,
++    0x7009902cUL, 0xbd27e5ddUL, 0x39dbe000UL, 0x3fbfe891UL, 0x4fa10afdUL,
++    0xbd2534d6UL, 0x830a2000UL, 0x3fbec739UL, 0xafe645e0UL, 0xbd2dc068UL,
++    0x63844000UL, 0x3fbda727UL, 0x1fa71733UL, 0x3d1a8940UL, 0x01bc4000UL,
++    0x3fbc8858UL, 0xc65aacd3UL, 0x3d2646d1UL, 0x8dad6000UL, 0x3fbb6ac8UL,
++    0x2bf768e5UL, 0xbd139080UL, 0x40b1c000UL, 0x3fba4e76UL, 0xb94407c8UL,
++    0xbd0e42b6UL, 0x5d594000UL, 0x3fb9335eUL, 0x3abd47daUL, 0x3d23115cUL,
++    0x2f40e000UL, 0x3fb8197eUL, 0xf96ffdf7UL, 0x3d0f80dcUL, 0x0aeac000UL,
++    0x3fb700d3UL, 0xa99ded32UL, 0x3cec1e8dUL, 0x4d97a000UL, 0x3fb5e95aUL,
++    0x3c5d1d1eUL, 0xbd2c6906UL, 0x5d208000UL, 0x3fb4d311UL, 0x82f4e1efUL,
++    0xbcf53a25UL, 0xa7d1e000UL, 0x3fb3bdf5UL, 0xa5db4ed7UL, 0x3d2cc85eUL,
++    0xa4472000UL, 0x3fb2aa04UL, 0xae9c697dUL, 0xbd20b6e8UL, 0xd1466000UL,
++    0x3fb1973bUL, 0x560d9e9bUL, 0xbd25325dUL, 0xb59e4000UL, 0x3fb08598UL,
++    0x7009902cUL, 0xbd17e5ddUL, 0xc006c000UL, 0x3faeea31UL, 0x4fc93b7bUL,
++    0xbd0e113eUL, 0xcdddc000UL, 0x3faccb73UL, 0x47d82807UL, 0xbd1a68f2UL,
++    0xd0fb0000UL, 0x3faaaef2UL, 0x353bb42eUL, 0x3d20fc1aUL, 0x149fc000UL,
++    0x3fa894aaUL, 0xd05a267dUL, 0xbd197995UL, 0xf2d4c000UL, 0x3fa67c94UL,
++    0xec19afa2UL, 0xbd029efbUL, 0xd42e0000UL, 0x3fa466aeUL, 0x75bdfd28UL,
++    0xbd2c1673UL, 0x2f8d0000UL, 0x3fa252f3UL, 0xe021b67bUL, 0x3d283e9aUL,
++    0x89e74000UL, 0x3fa0415dUL, 0x5cf1d753UL, 0x3d0111c0UL, 0xec148000UL,
++    0x3f9c63d2UL, 0x3f9eb2f3UL, 0x3d2578c6UL, 0x28c90000UL, 0x3f984925UL,
++    0x325a0c34UL, 0xbd2aa0baUL, 0x25980000UL, 0x3f9432a9UL, 0x928637feUL,
++    0x3d098139UL, 0x58938000UL, 0x3f902056UL, 0x06e2f7d2UL, 0xbd23dc5bUL,
++    0xa3890000UL, 0x3f882448UL, 0xda74f640UL, 0xbd275577UL, 0x75890000UL,
++    0x3f801015UL, 0x999d2be8UL, 0xbd10c76bUL, 0x59580000UL, 0x3f700805UL,
++    0xcb31c67bUL, 0x3d2166afUL, 0x00000000UL, 0x00000000UL, 0x00000000UL,
++    0x80000000UL
++};
++
++// BEGIN dlog PSEUDO CODE:
++//  double dlog(double X) {
++//    // p(r) polynomial coefficients initialized from _L_tbl table
++//    double C1_0 = _L_tbl[0];
++//    double C1_1 = _L_tbl[1];
++//    double C2_0 = _L_tbl[2];
++//    double C2_1 = _L_tbl[3];
++//    double C3_0 = _L_tbl[4];
++//    double C3_1 = _L_tbl[5];
++//    double C4_0 = _L_tbl[6];
++//    double C4_1 = _L_tbl[7];
++//    // NOTE: operations with coefficients above are mostly vectorized in assembly
++//    // Check corner cases first
++//    if (X == 1.0d || AS_LONG_BITS(X) + 0x0010000000000000 <= 0x0010000000000000) {
++//      // NOTE: AS_LONG_BITS(X) + 0x0010000000000000 <= 0x0010000000000000 means
++//      //    that X < 0 or X >= 0x7FF0000000000000 (0x7FF* is NaN or INF)
++//      if (X < 0 || X is NaN) return NaN;
++//      if (X == 1.0d) return 0.0d;
++//      if (X == 0.0d) return -INFINITY;
++//      if (X is INFINITY) return INFINITY;
++//    }
++//    // double representation is 2^exponent * mantissa
++//    // split X into two multipliers: 2^exponent and 1.0 * mantissa
++//    // pseudo function: zeroExponent(X) return value of X with exponent == 0
++//    float vtmp5 = 1/(float)(zeroExponent(X)); // reciprocal estimate
++//    // pseudo function: HI16(X) returns high 16 bits of double value
++//    int hiWord = HI16(X);
++//    double vtmp1 = (double) 0x77F0 << 48 | mantissa(X);
++//    hiWord -= 16;
++//    if (AS_LONG_BITS(hiWord) > 0x8000) {
++//      // SMALL_VALUE branch
++//      vtmp0 = vtmp1 = vtmp0 * AS_DOUBLE_BITS(0x47F0000000000000);
++//      hiWord = HI16(vtmp1);
++//      vtmp0 = AS_DOUBLE_BITS(AS_LONG_BITS(vtmp0) |= 0x3FF0000000000000);
++//      vtmp5 = (double) (1/(float)vtmp0);
++//      vtmp1 <<= 12;
++//      vtmp1 >>= 12;
++//    }
++//    // MAIN branch
++//    double vtmp3 = AS_LONG_BITS(vtmp1) & 0xffffe00000000000; // hi part
++//    int intB0 = AS_INT_BITS(vtmp5) + 0x8000;
++//    double vtmp0 = AS_DOUBLE_BITS(0xffffe00000000000 & (intB0<<29));
++//    int index = (intB0 >> 16) && 0xFF;
++//    double hiTableValue = _L_tbl[8+index]; // vtmp2[0]
++//    double lowTableValue = _L_tbl[16+index]; // vtmp2[1]
++//    vtmp5 = AS_DOUBLE_BITS(hiWord & 0x7FF0 - 0x3FE0); // 0x3FE = 1023 << 4
++//    vtmp1 -= vtmp3; // low part
++//    vtmp3 = vtmp3*vtmp0 - 1.0;
++//    hiTableValue += C4_0 * vtmp5;
++//    lowTableValue += C4_1 * vtmp5;
++//    double r = vtmp1 * vtmp0 + vtmp3; // r = B*mx-1.0, computed in hi and low parts
++//    vtmp0 = hiTableValue + r;
++//    hiTableValue -= vtmp0;
++//    double i2 = r*r;
++//    double i3 = i2*r;
++//    double p7 = C3_0*i2 + C2_0*i3 + C1_0*i2*i2 + C3_1*i3*i2 + C2_1*i3*i3
++//              + C1_1*i3*i2*i2; // degree 7 polynomial
++//    return p7 + (vtmp0 + ((r + hiTableValue) + lowTableValue));
++//  }
++//
++// END dlog PSEUDO CODE
++
++
++// Generate log(X). X passed in register f0. Return log(X) into f0.
++// Generator parameters: 10 temporary FPU registers and  temporary general
++// purpose registers
++void MacroAssembler::fast_log(FloatRegister vtmp0, FloatRegister vtmp1,
++                              FloatRegister vtmp2, FloatRegister vtmp3,
++                              FloatRegister vtmp4, FloatRegister vtmp5,
++                              FloatRegister C1, FloatRegister C2,
++                              FloatRegister C3, FloatRegister C4,
++                              Register tmp1, Register tmp2, Register tmp3,
++                              Register tmp4, Register tmp5) {
++  ShouldNotReachHere();
++}
+diff --git a/src/hotspot/cpu/sw64/macroAssembler_sw64_trig.cpp b/src/hotspot/cpu/sw64/macroAssembler_sw64_trig.cpp
+new file mode 100644
+index 0000000000..d790586d6d
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/macroAssembler_sw64_trig.cpp
+@@ -0,0 +1,710 @@
++/* Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Cavium. All rights reserved. (By BELLSOFT)
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/assembler.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "macroAssembler_sw64.hpp"
++
++// The following code is a optimized version of fdlibm sin/cos implementation
++// (C code is in share/runtime/sharedRuntimeTrig.cpp) adapted for SW64.
++
++// Please refer to sin/cos approximation via polynomial and
++// trigonometric argument reduction techniques to the following literature:
++//
++// [1] Muller, Jean-Michel, Nicolas Brisebarre, Florent De Dinechin,
++// Claude-Pierre Jeannerod, Vincent Lefevre, Guillaume Melquiond,
++// Nathalie Revol, Damien Stehlé, and Serge Torres:
++// Handbook of floating-point arithmetic.
++// Springer Science & Business Media, 2009.
++// [2] K. C. Ng
++// Argument Reduction for Huge Arguments: Good to the Last Bit
++// July 13, 1992, SunPro
++//
++// HOW TO READ THIS CODE:
++// This code consists of several functions. Each function has following header:
++// 1) Description
++// 2) C-pseudo code with differences from fdlibm marked by comments starting
++//        with "NOTE". Check unmodified fdlibm code in
++//        share/runtime/SharedRuntimeTrig.cpp
++// 3) Brief textual description of changes between fdlibm and current
++//        implementation along with optimization notes (if applicable)
++// 4) Assumptions, input and output
++// 5) (Optional) additional notes about intrinsic implementation
++// Each function is separated in blocks which follow the pseudo-code structure
++//
++// HIGH-LEVEL ALGORITHM DESCRIPTION:
++//    - entry point: generate_dsin_dcos(...);
++//    - check corner cases: NaN, INF, tiny argument.
++//    - check if |x| < Pi/4. Then approximate sin/cos via polynomial (kernel_sin/kernel_cos)
++//    -- else proceed to argument reduction routine (__ieee754_rem_pio2) and
++//           use reduced argument to get result via kernel_sin/kernel_cos
++//
++// HIGH-LEVEL CHANGES BETWEEN INTRINSICS AND FDLIBM:
++// 1) two_over_pi table fdlibm representation is int[], while intrinsic version
++// has these int values converted to double representation to load converted
++// double values directly (see stubRoutines_aarch4::_two_over_pi)
++// 2) Several loops are unrolled and vectorized: see comments in code after
++// labels: SKIP_F_LOAD, RECOMP_FOR1_CHECK, RECOMP_FOR2
++// 3) fdlibm npio2_hw table now has "prefix" with constants used in
++// calculation. These constants are loaded from npio2_hw table instead of
++// constructing it in code (see stubRoutines_sw64.cpp)
++// 4) Polynomial coefficients for sin and cos are moved to table sin_coef
++// and cos_coef to use the same optimization as in 3). It allows to load most of
++// required constants via single instruction
++//
++//
++//
++///* __ieee754_rem_pio2(x,y)
++// *
++// * returns the remainder of x rem pi/2 in y[0]+y[1] (i.e. like x div pi/2)
++// * x is input argument, y[] is hi and low parts of reduced argument (x)
++// * uses __kernel_rem_pio2()
++// */
++// // use tables(see stubRoutines_sw64.cpp): two_over_pi and modified npio2_hw
++//
++// BEGIN __ieee754_rem_pio2 PSEUDO CODE
++//
++//static int __ieee754_rem_pio2(double x, double *y) {
++//  double z,w,t,r,fn;
++//  double tx[3];
++//  int e0,i,j,nx,n,ix,hx,i0;
++//
++//  i0 = ((*(int*)&two24A)>>30)^1;        /* high word index */
++//  hx = *(i0+(int*)&x);          /* high word of x */
++//  ix = hx&0x7fffffff;
++//  if(ix<0x4002d97c) {  /* |x| < 3pi/4, special case with n=+-1 */
++//    if(hx>0) {
++//      z = x - pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z - pio2_1t;
++//        y[1] = (z-y[0])-pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z -= pio2_2;
++//        y[0] = z - pio2_2t;
++//        y[1] = (z-y[0])-pio2_2t;
++//      }
++//      return 1;
++//    } else {    /* negative x */
++//      z = x + pio2_1;
++//      if(ix!=0x3ff921fb) {    /* 33+53 bit pi is good enough */
++//        y[0] = z + pio2_1t;
++//        y[1] = (z-y[0])+pio2_1t;
++//      } else {                /* near pi/2, use 33+33+53 bit pi */
++//        z += pio2_2;
++//        y[0] = z + pio2_2t;
++//        y[1] = (z-y[0])+pio2_2t;
++//      }
++//      return -1;
++//    }
++//  }
++//  if(ix<=0x413921fb) { /* |x| ~<= 2^19*(pi/2), medium size */
++//    t  = fabsd(x);
++//    n  = (int) (t*invpio2+half);
++//    fn = (double)n;
++//    r  = t-fn*pio2_1;
++//    w  = fn*pio2_1t;    /* 1st round good to 85 bit */
++//    // NOTE: y[0] = r-w; is moved from if/else below to be before "if"
++//    y[0] = r-w;
++//    if(n<32&&ix!=npio2_hw[n-1]) {
++//      // y[0] = r-w;       /* quick check no cancellation */ // NOTE: moved earlier
++//    } else {
++//      j  = ix>>20;
++//      // y[0] = r-w; // NOTE: moved earlier
++//      i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//      if(i>16) {  /* 2nd iteration needed, good to 118 */
++//        t  = r;
++//        w  = fn*pio2_2;
++//        r  = t-w;
++//        w  = fn*pio2_2t-((t-r)-w);
++//        y[0] = r-w;
++//        i = j-(((*(i0+(int*)&y[0]))>>20)&0x7ff);
++//        if(i>49)  {     /* 3rd iteration need, 151 bits acc */
++//          t  = r;       /* will cover all possible cases */
++//          w  = fn*pio2_3;
++//          r  = t-w;
++//          w  = fn*pio2_3t-((t-r)-w);
++//          y[0] = r-w;
++//        }
++//      }
++//    }
++//    y[1] = (r-y[0])-w;
++//    if(hx<0)    {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//    else         return n;
++//  }
++//  /*
++//   * all other (large) arguments
++//   */
++//  // NOTE: this check is removed, because it was checked in dsin/dcos
++//  // if(ix>=0x7ff00000) {          /* x is inf or NaN */
++//  //  y[0]=y[1]=x-x; return 0;
++//  // }
++//  /* set z = scalbn(|x|,ilogb(x)-23) */
++//  *(1-i0+(int*)&z) = *(1-i0+(int*)&x);
++//  e0    = (ix>>20)-1046;        /* e0 = ilogb(z)-23; */
++//  *(i0+(int*)&z) = ix - (e0<<20);
++//
++//  // NOTE: "for" loop below in unrolled. See comments in asm code
++//  for(i=0;i<2;i++) {
++//    tx[i] = (double)((int)(z));
++//    z     = (z-tx[i])*two24A;
++//  }
++//
++//  tx[2] = z;
++//  nx = 3;
++//
++//  // NOTE: while(tx[nx-1]==zeroA) nx--;  is unrolled. See comments in asm code
++//  while(tx[nx-1]==zeroA) nx--;  /* skip zero term */
++//
++//  n  =  __kernel_rem_pio2(tx,y,e0,nx,2,two_over_pi);
++//  if(hx<0) {y[0] = -y[0]; y[1] = -y[1]; return -n;}
++//  return n;
++//}
++//
++// END __ieee754_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic for __ieee754_rem_pio2:
++//     1. INF/NaN check for huge argument is removed in comparison with fdlibm
++//     code, because this check is already done in dcos/dsin code
++//     2. Most constants are now loaded from table instead of direct initialization
++//     3. Two loops are unrolled
++// Assumptions:
++//     1. Assume |X| >= PI/4
++//     2. Assume rscratch1 = 0x3fe921fb00000000  (~ PI/4)
++//     3. Assume ix = i3
++// Input and output:
++//     1. Input: X = i0
++//     2. Return n in i2, y[0] == y0 == f4, y[1] == y1 == f5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__ieee754_rem_pio2(address npio2_hw,
++    address two_over_pi, address pio2) {
++    ShouldNotReachHere();
++}
++
++///*
++// * __kernel_rem_pio2(x,y,e0,nx,prec,ipio2)
++// * double x[],y[]; int e0,nx,prec; int ipio2[];
++// *
++// * __kernel_rem_pio2 return the last three digits of N with
++// *              y = x - N*pi/2
++// * so that |y| < pi/2.
++// *
++// * The method is to compute the integer (mod 8) and fraction parts of
++// * (2/pi)*x without doing the full multiplication. In general we
++// * skip the part of the product that are known to be a huge integer (
++// * more accurately, = 0 mod 8 ). Thus the number of operations are
++// * independent of the exponent of the input.
++// *
++// * NOTE: 2/pi int representation is converted to double
++// * // (2/pi) is represented by an array of 24-bit integers in ipio2[].
++// *
++// * Input parameters:
++// *      x[]     The input value (must be positive) is broken into nx
++// *              pieces of 24-bit integers in double precision format.
++// *              x[i] will be the i-th 24 bit of x. The scaled exponent
++// *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
++// *              match x's up to 24 bits.
++// *
++// *              Example of breaking a double positive z into x[0]+x[1]+x[2]:
++// *                      e0 = ilogb(z)-23
++// *                      z  = scalbn(z,-e0)
++// *              for i = 0,1,2
++// *                      x[i] = floor(z)
++// *                      z    = (z-x[i])*2**24
++// *
++// *
++// *      y[]     ouput result in an array of double precision numbers.
++// *              The dimension of y[] is:
++// *                      24-bit  precision       1
++// *                      53-bit  precision       2
++// *                      64-bit  precision       2
++// *                      113-bit precision       3
++// *              The actual value is the sum of them. Thus for 113-bit
++// *              precsion, one may have to do something like:
++// *
++// *              long double t,w,r_head, r_tail;
++// *              t = (long double)y[2] + (long double)y[1];
++// *              w = (long double)y[0];
++// *              r_head = t+w;
++// *              r_tail = w - (r_head - t);
++// *
++// *      e0      The exponent of x[0]
++// *
++// *      nx      dimension of x[]
++// *
++// *      prec    an interger indicating the precision:
++// *                      0       24  bits (single)
++// *                      1       53  bits (double)
++// *                      2       64  bits (extended)
++// *                      3       113 bits (quad)
++// *
++// *      NOTE: ipio2[] array below is converted to double representation
++// *      //ipio2[]
++// *      //        integer array, contains the (24*i)-th to (24*i+23)-th
++// *      //        bit of 2/pi after binary point. The corresponding
++// *      //        floating value is
++// *
++// *                      ipio2[i] * 2^(-24(i+1)).
++// *
++// * Here is the description of some local variables:
++// *
++// *      jk      jk+1 is the initial number of terms of ipio2[] needed
++// *              in the computation. The recommended value is 2,3,4,
++// *              6 for single, double, extended,and quad.
++// *
++// *      jz      local integer variable indicating the number of
++// *              terms of ipio2[] used.
++// *
++// *      jx      nx - 1
++// *
++// *      jv      index for pointing to the suitable ipio2[] for the
++// *              computation. In general, we want
++// *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
++// *              is an integer. Thus
++// *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
++// *              Hence jv = max(0,(e0-3)/24).
++// *
++// *      jp      jp+1 is the number of terms in PIo2[] needed, jp = jk.
++// *
++// *      q[]     double array with integral value, representing the
++// *              24-bits chunk of the product of x and 2/pi.
++// *
++// *      q0      the corresponding exponent of q[0]. Note that the
++// *              exponent for q[i] would be q0-24*i.
++// *
++// *      PIo2[]  double precision array, obtained by cutting pi/2
++// *              into 24 bits chunks.
++// *
++// *      f[]     ipio2[] in floating point
++// *
++// *      iq[]    integer array by breaking up q[] in 24-bits chunk.
++// *
++// *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
++// *
++// *      ih      integer. If >0 it indicates q[] is >= 0.5, hence
++// *              it also indicates the *sign* of the result.
++// *
++// */
++//
++// Use PIo2 table(see stubRoutines_sw64.cpp)
++//
++// BEGIN __kernel_rem_pio2 PSEUDO CODE
++//
++//static int __kernel_rem_pio2(double *x, double *y, int e0, int nx, int prec, /* NOTE: converted to double */ const double *ipio2 // const int *ipio2) {
++//  int jz,jx,jv,jp,jk,carry,n,iq[20],i,j,k,m,q0,ih;
++//  double z,fw,f[20],fq[20],q[20];
++//
++//  /* initialize jk*/
++//  // jk = init_jk[prec]; // NOTE: prec==2 for double. jk is always 4.
++//  jp = jk; // NOTE: always 4
++//
++//  /* determine jx,jv,q0, note that 3>q0 */
++//  jx =  nx-1;
++//  jv = (e0-3)/24; if(jv<0) jv=0;
++//  q0 =  e0-24*(jv+1);
++//
++//  /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
++//  j = jv-jx; m = jx+jk;
++//
++//  // NOTE: split into two for-loops: one with zeroB and one with ipio2[j]. It
++//  //       allows the use of wider loads/stores
++//  for(i=0;i<=m;i++,j++) f[i] = (j<0)? zeroB : /* NOTE: converted to double */ ipio2[j]; //(double) ipio2[j];
++//
++//  // NOTE: unrolled and vectorized "for". See comments in asm code
++//  /* compute q[0],q[1],...q[jk] */
++//  for (i=0;i<=jk;i++) {
++//    for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j]; q[i] = fw;
++//  }
++//
++//  jz = jk;
++//recompute:
++//  /* distill q[] into iq[] reversingly */
++//  for(i=0,j=jz,z=q[jz];j>0;i++,j--) {
++//    fw    =  (double)((int)(twon24* z));
++//    iq[i] =  (int)(z-two24B*fw);
++//    z     =  q[j-1]+fw;
++//  }
++//
++//  /* compute n */
++//  z  = scalbnA(z,q0);           /* actual value of z */
++//  z -= 8.0*floor(z*0.125);              /* trim off integer >= 8 */
++//  n  = (int) z;
++//  z -= (double)n;
++//  ih = 0;
++//  if(q0>0) {    /* need iq[jz-1] to determine n */
++//    i  = (iq[jz-1]>>(24-q0)); n += i;
++//    iq[jz-1] -= i<<(24-q0);
++//    ih = iq[jz-1]>>(23-q0);
++//  }
++//  else if(q0==0) ih = iq[jz-1]>>23;
++//  else if(z>=0.5) ih=2;
++//
++//  if(ih>0) {    /* q > 0.5 */
++//    n += 1; carry = 0;
++//    for(i=0;i<jz ;i++) {        /* compute 1-q */
++//      j = iq[i];
++//      if(carry==0) {
++//        if(j!=0) {
++//          carry = 1; iq[i] = 0x1000000- j;
++//        }
++//      } else  iq[i] = 0xffffff - j;
++//    }
++//    if(q0>0) {          /* rare case: chance is 1 in 12 */
++//      switch(q0) {
++//      case 1:
++//        iq[jz-1] &= 0x7fffff; break;
++//      case 2:
++//        iq[jz-1] &= 0x3fffff; break;
++//      }
++//    }
++//    if(ih==2) {
++//      z = one - z;
++//      if(carry!=0) z -= scalbnA(one,q0);
++//    }
++//  }
++//
++//  /* check if recomputation is needed */
++//  if(z==zeroB) {
++//    j = 0;
++//    for (i=jz-1;i>=jk;i--) j |= iq[i];
++//    if(j==0) { /* need recomputation */
++//      for(k=1;iq[jk-k]==0;k++);   /* k = no. of terms needed */
++//
++//      for(i=jz+1;i<=jz+k;i++) {   /* add q[jz+1] to q[jz+k] */
++//        f[jx+i] = /* NOTE: converted to double */ ipio2[jv+i]; //(double) ipio2[jv+i];
++//        for(j=0,fw=0.0;j<=jx;j++) fw += x[j]*f[jx+i-j];
++//        q[i] = fw;
++//      }
++//      jz += k;
++//      goto recompute;
++//    }
++//  }
++//
++//  /* chop off zero terms */
++//  if(z==0.0) {
++//    jz -= 1; q0 -= 24;
++//    while(iq[jz]==0) { jz--; q0-=24;}
++//  } else { /* break z into 24-bit if necessary */
++//    z = scalbnA(z,-q0);
++//    if(z>=two24B) {
++//      fw = (double)((int)(twon24*z));
++//      iq[jz] = (int)(z-two24B*fw);
++//      jz += 1; q0 += 24;
++//      iq[jz] = (int) fw;
++//    } else iq[jz] = (int) z ;
++//  }
++//
++//  /* convert integer "bit" chunk to floating-point value */
++//  fw = scalbnA(one,q0);
++//  for(i=jz;i>=0;i--) {
++//    q[i] = fw*(double)iq[i]; fw*=twon24;
++//  }
++//
++//  /* compute PIo2[0,...,jp]*q[jz,...,0] */
++//  for(i=jz;i>=0;i--) {
++//    for(fw=0.0,k=0;k<=jp&&k<=jz-i;k++) fw += PIo2[k]*q[i+k];
++//    fq[jz-i] = fw;
++//  }
++//
++//  // NOTE: switch below is eliminated, because prec is always 2 for doubles
++//  /* compress fq[] into y[] */
++//  //switch(prec) {
++//  //case 0:
++//  //  fw = 0.0;
++//  //  for (i=jz;i>=0;i--) fw += fq[i];
++//  //  y[0] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 1:
++//  //case 2:
++//    fw = 0.0;
++//    for (i=jz;i>=0;i--) fw += fq[i];
++//    y[0] = (ih==0)? fw: -fw;
++//    fw = fq[0]-fw;
++//    for (i=1;i<=jz;i++) fw += fq[i];
++//    y[1] = (ih==0)? fw: -fw;
++//  //  break;
++//  //case 3:       /* painful */
++//  //  for (i=jz;i>0;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  // fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (i=jz;i>1;i--) {
++//  //    fw      = fq[i-1]+fq[i];
++//  //    fq[i]  += fq[i-1]-fw;
++//  //    fq[i-1] = fw;
++//  //  }
++//  //  for (fw=0.0,i=jz;i>=2;i--) fw += fq[i];
++//  //  if(ih==0) {
++//  //    y[0] =  fq[0]; y[1] =  fq[1]; y[2] =  fw;
++//  //  } else {
++//  //    y[0] = -fq[0]; y[1] = -fq[1]; y[2] = -fw;
++//  //  }
++//  //}
++//  return n&7;
++//}
++//
++// END __kernel_rem_pio2 PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. One loop is unrolled and vectorized (see comments in code)
++//     2. One loop is split into 2 loops (see comments in code)
++//     3. Non-double code is removed(last switch). Sevaral variables became
++//         constants because of that (see comments in code)
++//     4. Use of jx, which is nx-1 instead of nx
++// Assumptions:
++//     1. Assume |X| >= PI/4
++// Input and output:
++//     1. Input: X = i0, jx == nx - 1 == i6, e0 == rscratch1
++//     2. Return n in i2, y[0] == y0 == f4, y[1] == y1 == f5
++// NOTE: general purpose register names match local variable names in C code
++// NOTE: fpu registers are actively reused. See comments in code about their usage
++void MacroAssembler::generate__kernel_rem_pio2(address two_over_pi, address pio2) {
++    ShouldNotReachHere();
++}
++
++///* __kernel_sin( x, y, iy)
++// * kernel sin function on [-pi/4, pi/4], pi/4 ~ 0.7854
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// * Input iy indicates whether y is 0. (if iy=0, y assume to be 0).
++// *
++// * Algorithm
++// *      1. Since sin(-x) = -sin(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return x with inexact if x!=0.
++// *      3. sin(x) is approximated by a polynomial of degree 13 on
++// *         [0,pi/4]
++// *                               3            13
++// *              sin(x) ~ x + S1*x + ... + S6*x
++// *         where
++// *
++// *      |sin(x)         2     4     6     8     10     12  |     -58
++// *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
++// *      |  x                                               |
++// *
++// *      4. sin(x+y) = sin(x) + sin'(x')*y
++// *                  ~ sin(x) + (1-x*x/2)*y
++// *         For better accuracy, let
++// *                   3      2      2      2      2
++// *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
++// *         then                   3    2
++// *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
++// */
++//static const double
++//S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
++//S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
++//S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
++//S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
++//S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
++//S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
++//
++// NOTE: S1..S6 were moved into a table: StubRoutines::sw64::_dsin_coef
++//
++// BEGIN __kernel_sin PSEUDO CODE
++//
++//static double __kernel_sin(double x, double y, bool iy)
++//{
++//        double z,r,v;
++//
++//        // NOTE: not needed. moved to dsin/dcos
++//        //int ix;
++//        //ix = high(x)&0x7fffffff;                /* high word of x */
++//
++//        // NOTE: moved to dsin/dcos
++//        //if(ix<0x3e400000)                       /* |x| < 2**-27 */
++//        //   {if((int)x==0) return x;}            /* generate inexact */
++//
++//        z       =  x*x;
++//        v       =  z*x;
++//        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
++//        if(iy==0) return x+v*(S1+z*r);
++//        else      return x-((z*(half*y-v*r)-y)-v*S1);
++//}
++//
++// END __kernel_sin PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dsin_coef
++//     3. C code parameter "int iy" was modified to "bool iyIsOne", because
++//         iy is always 0 or 1. Also, iyIsOne branch was moved into
++//         generation phase instead of taking it during code execution
++// Input ans output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, iyIsOne
++//         = flag to use low argument low part or not, dsin_coef = coefficients
++//         table address
++//     3. Return sin(x) value in f0
++void MacroAssembler::generate_kernel_sin(FloatRegister x, bool iyIsOne,
++    address dsin_coef) {
++    ShouldNotReachHere();
++}
++
++///*
++// * __kernel_cos( x,  y )
++// * kernel cos function on [-pi/4, pi/4], pi/4 ~ 0.785398164
++// * Input x is assumed to be bounded by ~pi/4 in magnitude.
++// * Input y is the tail of x.
++// *
++// * Algorithm
++// *      1. Since cos(-x) = cos(x), we need only to consider positive x.
++// *      2. if x < 2^-27 (hx<0x3e400000 0), return 1 with inexact if x!=0.
++// *      3. cos(x) is approximated by a polynomial of degree 14 on
++// *         [0,pi/4]
++// *                                       4            14
++// *              cos(x) ~ 1 - x*x/2 + C1*x + ... + C6*x
++// *         where the remez error is
++// *
++// *      |              2     4     6     8     10    12     14 |     -58
++// *      |cos(x)-(1-.5*x +C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  )| <= 2
++// *      |                                                      |
++// *
++// *                     4     6     8     10    12     14
++// *      4. let r = C1*x +C2*x +C3*x +C4*x +C5*x  +C6*x  , then
++// *             cos(x) = 1 - x*x/2 + r
++// *         since cos(x+y) ~ cos(x) - sin(x)*y
++// *                        ~ cos(x) - x*y,
++// *         a correction term is necessary in cos(x) and hence
++// *              cos(x+y) = 1 - (x*x/2 - (r - x*y))
++// *         For better accuracy when x > 0.3, let qx = |x|/4 with
++// *         the last 32 bits mask off, and if x > 0.78125, let qx = 0.28125.
++// *         Then
++// *              cos(x+y) = (1-qx) - ((x*x/2-qx) - (r-x*y)).
++// *         Note that 1-qx and (x*x/2-qx) is EXACT here, and the
++// *         magnitude of the latter is at least a quarter of x*x/2,
++// *         thus, reducing the rounding error in the subtraction.
++// */
++//
++//static const double
++//C1  =  4.16666666666666019037e-02, /* 0x3FA55555, 0x5555554C */
++//C2  = -1.38888888888741095749e-03, /* 0xBF56C16C, 0x16C15177 */
++//C3  =  2.48015872894767294178e-05, /* 0x3EFA01A0, 0x19CB1590 */
++//C4  = -2.75573143513906633035e-07, /* 0xBE927E4F, 0x809C52AD */
++//C5  =  2.08757232129817482790e-09, /* 0x3E21EE9E, 0xBDB4B1C4 */
++//C6  = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
++//
++// NOTE: C1..C6 were moved into a table: StubRoutines::sw64::_dcos_coef
++//
++// BEGIN __kernel_cos PSEUDO CODE
++//
++//static double __kernel_cos(double x, double y)
++//{
++//  double a,h,z,r,qx=0;
++//
++//  // NOTE: ix is already initialized in dsin/dcos. Reuse value from register
++//  //int ix;
++//  //ix = high(x)&0x7fffffff;              /* ix = |x|'s high word*/
++//
++//  // NOTE: moved to dsin/dcos
++//  //if(ix<0x3e400000) {                   /* if x < 2**27 */
++//  //  if(((int)x)==0) return one;         /* generate inexact */
++//  //}
++//
++//  z  = x*x;
++//  r  = z*(C1+z*(C2+z*(C3+z*(C4+z*(C5+z*C6)))));
++//  if(ix < 0x3FD33333)                   /* if |x| < 0.3 */
++//    return one - (0.5*z - (z*r - x*y));
++//  else {
++//    if(ix > 0x3fe90000) {               /* x > 0.78125 */
++//      qx = 0.28125;
++//    } else {
++//      set_high(&qx, ix-0x00200000); /* x/4 */
++//      set_low(&qx, 0);
++//    }
++//    h = 0.5*z-qx;
++//    a = one-qx;
++//    return a - (h - (z*r-x*y));
++//  }
++//}
++//
++// END __kernel_cos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Removed |x| < 2**-27 check, because if was done earlier in dsin/dcos
++//     2. Constants are now loaded from table dcos_coef
++// Input and output:
++//     1. Input for generated function: X argument = x
++//     2. Input for generator: x = register to read argument from, dcos_coef
++//        = coefficients table address
++//     2. Return cos(x) value in f0
++void MacroAssembler::generate_kernel_cos(FloatRegister x, address dcos_coef) {
++    ShouldNotReachHere();
++}
++
++// generate_dsin_dcos creates stub for dsin and dcos
++// Generation is done via single call because dsin and dcos code is almost the
++// same(see C code below). These functions work as follows:
++// 1) handle corner cases: |x| ~< pi/4, x is NaN or INF, |x| < 2**-27
++// 2) perform argument reduction if required
++// 3) call kernel_sin or kernel_cos which approximate sin/cos via polynomial
++//
++// BEGIN dsin/dcos PSEUDO CODE
++//
++//dsin_dcos(jdouble x, bool isCos) {
++//  double y[2],z=0.0;
++//  int n, ix;
++//
++//  /* High word of x. */
++//  ix = high(x);
++//
++//  /* |x| ~< pi/4 */
++//  ix &= 0x7fffffff;
++//  if(ix <= 0x3fe921fb) return isCos ? __kernel_cos : __kernel_sin(x,z,0);
++//
++//  /* sin/cos(Inf or NaN) is NaN */
++//  else if (ix>=0x7ff00000) return x-x;
++//  else if (ix<0x3e400000) {                   /* if ix < 2**27 */
++//    if(((int)x)==0) return isCos ? one : x;         /* generate inexact */
++//  }
++//  /* argument reduction needed */
++//  else {
++//    n = __ieee754_rem_pio2(x,y);
++//    switch(n&3) {
++//    case 0: return isCos ?  __kernel_cos(y[0],y[1])      :  __kernel_sin(y[0],y[1], true);
++//    case 1: return isCos ? -__kernel_sin(y[0],y[1],true) :  __kernel_cos(y[0],y[1]);
++//    case 2: return isCos ? -__kernel_cos(y[0],y[1])      : -__kernel_sin(y[0],y[1], true);
++//    default:
++//      return isCos ? __kernel_sin(y[0],y[1],1) : -__kernel_cos(y[0],y[1]);
++//    }
++//  }
++//}
++// END dsin/dcos PSEUDO CODE
++//
++// Changes between fdlibm and intrinsic:
++//     1. Moved ix < 2**27 from kernel_sin/kernel_cos into dsin/dcos
++//     2. Final switch use equivalent bit checks(tbz/tbnz)
++// Input ans output:
++//     1. Input for generated function: X = i0
++//     2. Input for generator: isCos = generate sin or cos, npio2_hw = address
++//         of npio2_hw table, two_over_pi = address of two_over_pi table,
++//         pio2 = address if pio2 table, dsin_coef = address if dsin_coef table,
++//         dcos_coef = address of dcos_coef table
++//     3. Return result in f0
++// NOTE: general purpose register names match local variable names in C code
++void MacroAssembler::generate_dsin_dcos(bool isCos, address npio2_hw,
++    address two_over_pi, address pio2, address dsin_coef, address dcos_coef) {
++    ShouldNotReachHere();
++}
+diff --git a/src/hotspot/cpu/sw64/methodHandles_sw64.cpp b/src/hotspot/cpu/sw64/methodHandles_sw64.cpp
+new file mode 100644
+index 0000000000..fee8abd9c5
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/methodHandles_sw64.cpp
+@@ -0,0 +1,616 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/flags/flagSetting.hpp"
++#include "runtime/frame.inline.hpp"
++
++#define __ _masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {SCOPEMARK_NAME(MethodHandles::load_klass_from_Class, _masm)
++  if (VerifyMethodHandles)
++    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++                 "MH argument is a Class");
++  __ ldptr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++}
++
++#ifdef ASSERT
++static int check_nonzero(const char* xname, int x) {
++  assert(x != 0, "%s should be nonzero", xname);
++  return x;
++}
++#define NONZERO(x) check_nonzero(#x, x)
++#else //ASSERT
++#define NONZERO(x) (x)
++#endif //ASSERT
++
++#ifdef ASSERT
++void MethodHandles::verify_klass(MacroAssembler* _masm,
++                                 Register obj, SystemDictionary::WKID klass_id,
++                                 const char* error_message) {SCOPEMARK_NAME(MethodHandles::verify_klass, _masm)
++  InstanceKlass** klass_addr = SystemDictionary::well_known_klass_addr(klass_id);
++  Klass* klass = SystemDictionary::well_known_klass(klass_id);
++  Register temp = rdi;
++  Register temp2 = noreg;
++  temp2 = rscratch3;  // used by MacroAssembler::cmpptr
++  Label L_ok, L_bad;
++  BLOCK_COMMENT("verify_klass {");
++  __ verify_oop(obj);
++  __ jcc(Assembler::zero, L_bad, obj);
++  __ push(temp); if (temp2 != noreg)  __ push(temp2);
++#define UNPUSH { if (temp2 != noreg)  __ pop(temp2);  __ pop(temp); }
++  __ load_klass(temp, obj);
++  __ cmpptr(temp, ExternalAddress((address) klass_addr));
++  __ jcc(Assembler::equal, L_ok);
++  int super_check_offset = klass->super_check_offset(); //long-> int may be a problem? need modify? jzy
++  __ ldptr(temp, Address(temp, super_check_offset));
++  __ cmpptr(temp, ExternalAddress((address) klass_addr));
++  __ jcc(Assembler::equal, L_ok);
++  UNPUSH;
++  __ BIND(L_bad);
++  __ stop(error_message);
++  __ BIND(L_ok);
++  UNPUSH;
++  BLOCK_COMMENT("} verify_klass");
++}
++
++void MethodHandles::verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) {SCOPEMARK_NAME(MethodHandles::verify_ref_kind, _masm)
++  Label L;
++  BLOCK_COMMENT("verify_ref_kind {");
++  __ ldwu(temp, Address(member_reg, NONZERO(java_lang_invoke_MemberName::flags_offset_in_bytes())));
++  __ srll(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_SHIFT, temp);
++  __ andw(temp, java_lang_invoke_MemberName::MN_REFERENCE_KIND_MASK, temp);
++  __ cmpw(temp, ref_kind);
++  __ jcc(Assembler::equal, L);
++  { char* buf = NEW_C_HEAP_ARRAY(char, 100, mtInternal);
++    jio_snprintf(buf, 100, "verify_ref_kind expected %x", ref_kind);
++    if (ref_kind == JVM_REF_invokeVirtual ||
++        ref_kind == JVM_REF_invokeSpecial)
++      // could do this for all ref_kinds, but would explode assembly code size
++      trace_method_handle(_masm, buf);
++    __ stop(buf);
++  }
++  BLOCK_COMMENT("} verify_ref_kind");
++  __ BIND(L);
++}
++
++#endif //ASSERT
++
++void MethodHandles::jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                            bool for_compiler_entry) {SCOPEMARK_NAME(MethodHandles::jump_from_method_handle, _masm)
++  assert(method == rmethod, "interpreter calling convention");
++
++  Label L_no_such_method;
++   __ jcc(Assembler::zero, L_no_such_method, rmethod);
++
++  __ verify_method_ptr(method);
++
++  if (!for_compiler_entry && JvmtiExport::can_post_interpreter_events()) {
++    Label run_compiled_code;
++    // JVMTI events, such as single-stepping, are implemented partly by avoiding running
++    // compiled code in threads for which the event is enabled.  Check here for
++    // interp_only_mode if these events CAN be enabled.
++    //Register rthread = rthread;
++    // interp_only is an int, on little endian it is sufficient to test the byte only
++    // Is a cmpl faster?
++    __ cmpb(Address(rthread, JavaThread::interp_only_mode_offset()), 0);
++    __ jcc(Assembler::zero, run_compiled_code);
++    __ jmp(Address(method, Method::interpreter_entry_offset()));
++    __ BIND(run_compiled_code);
++  }
++
++  const ByteSize entry_offset = for_compiler_entry ? Method::from_compiled_offset() :
++                                                     Method::from_interpreted_offset();
++  __ jmp(Address(method, entry_offset));
++
++  __ bind(L_no_such_method);
++  __ jump(RuntimeAddress(StubRoutines::throw_AbstractMethodError_entry()));
++}
++
++void MethodHandles::jump_to_lambda_form(MacroAssembler* _masm,
++                                        Register recv, Register method_temp,
++                                        Register temp2,
++                                        bool for_compiler_entry) {
++  BLOCK_COMMENT("jump_to_lambda_form {");
++  // This is the initial entry point of a lazy method handle.
++  // After type checking, it picks up the invoker from the LambdaForm.
++  assert_different_registers(recv, method_temp, temp2, rscratch3);
++  assert(recv != noreg, "required register");
++  assert(method_temp == rmethod, "required register for loading method");
++
++  //NOT_PRODUCT({ FlagSetting fs(TraceMethodHandles, true); trace_method_handle(_masm, "LZMH"); });
++
++  // Load the invoker, as MH -> MH.form -> LF.vmentry
++  __ verify_oop(recv);
++  __ load_heap_oop(method_temp, Address(recv, NONZERO(java_lang_invoke_MethodHandle::form_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_LambdaForm::vmentry_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ load_heap_oop(method_temp, Address(method_temp, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes())), temp2);
++  __ verify_oop(method_temp);
++  __ access_load_at(T_ADDRESS, IN_HEAP, method_temp, 
++                    Address(method_temp, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes())),
++                    noreg, noreg);
++
++  if (VerifyMethodHandles && !for_compiler_entry) {
++    // make sure recv is already on stack
++    __ ldptr(temp2, Address(method_temp, Method::const_offset()));
++    __ load_sized_value(temp2,
++                        Address(temp2, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    Label L;
++    __ ldl(rscratch3, __ argument_address(temp2, -1));
++    __ cmpoop(recv, rscratch3);
++    __ jcc(Assembler::equal, L);
++    __ ldptr(V0, __ argument_address(temp2, -1));
++    __ stop("receiver not on stack");
++    __ BIND(L);
++  }
++
++  jump_from_method_handle(_masm, method_temp, temp2, for_compiler_entry);
++  BLOCK_COMMENT("} jump_to_lambda_form");
++}
++
++
++// Code generation
++address MethodHandles::generate_method_handle_interpreter_entry(MacroAssembler* _masm,
++                                                                vmIntrinsics::ID iid) {SCOPEMARK_NAME(MethodHandles::generate_method_handle_interpreter_entry, _masm)
++  const bool not_for_compiler_entry = false;  // this is the interpreter entry
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  if (iid == vmIntrinsics::_invokeGeneric ||
++      iid == vmIntrinsics::_compiledLambdaForm) {
++    // Perhaps surprisingly, the symbolic references visible to Java are not directly used.
++    // They are linked to Java-generated adapters via MethodHandleNatives.linkMethod.
++    // They all allow an appendix argument.
++    __ stop("empty stubs make SG sick");
++    return NULL;
++  }
++
++  // rsi/r13: sender SP (must preserve; see prepare_to_jump_from_interpreted)
++  // rbx: Method*
++  // rdx: argument locator (parameter slot count, added to rsp)
++  // rcx: used as temp to hold mh or receiver
++  // rax, rdi: garbage temps, blown away
++  Register rdx_argp   = rdx;   // argument list ptr, live on error paths
++  Register rax_temp   = rax;
++  Register rcx_mh     = rcx;   // MH receiver; dies quickly and is recycled
++  Register rbx_method = rbx;   // eventual target of this invocation
++  //Register rcx        = c_rarg3;
++  // here's where control starts out:
++  __ align(CodeEntryAlignment);
++  address entry_point = __ pc();
++
++  if (VerifyMethodHandles) {
++    assert(Method::intrinsic_id_size_in_bytes() == 2, "assuming Method::_intrinsic_id is u2");
++
++    Label L;
++    BLOCK_COMMENT("verify_intrinsic_id {");
++    __ movw(rscratch3, (int)iid);
++    __ cmpw(Address(rbx_method, Method::intrinsic_id_offset_in_bytes()), rscratch3);
++    __ jcc(Assembler::equal, L);
++    if (iid == vmIntrinsics::_linkToVirtual ||
++        iid == vmIntrinsics::_linkToSpecial) {
++      // could do this for all kinds, but would explode assembly code size
++      trace_method_handle(_masm, "bad Method*::intrinsic_id");
++    }
++    __ stop("bad Method*::intrinsic_id");
++    __ bind(L);
++    BLOCK_COMMENT("} verify_intrinsic_id");
++  }
++
++  // First task:  Find out how big the argument list is.
++  Address rdx_first_arg_addr;
++  int ref_kind = signature_polymorphic_intrinsic_ref_kind(iid);
++  assert(ref_kind != 0 || iid == vmIntrinsics::_invokeBasic, "must be _invokeBasic or a linkTo intrinsic");
++  if (ref_kind == 0 || MethodHandles::ref_kind_has_receiver(ref_kind)) {
++    __ ldptr(rdx_argp, Address(rbx_method, Method::const_offset()));
++    __ load_sized_value(rdx_argp,
++                        Address(rdx_argp, ConstMethod::size_of_parameters_offset()),
++                        sizeof(u2), /*is_signed*/ false);
++    // assert(sizeof(u2) == sizeof(Method::_size_of_parameters), "");
++    rdx_first_arg_addr = __ argument_address(rdx_argp, -1);
++  } else {
++    DEBUG_ONLY(rdx_argp = noreg);
++  }
++
++  if (!is_signature_polymorphic_static(iid)) {
++    __ ldptr(rcx_mh, rdx_first_arg_addr);
++    DEBUG_ONLY(rdx_argp = noreg);
++  }
++
++  // rdx_first_arg_addr is live!
++
++  trace_method_handle_interpreter_entry(_masm, iid);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    generate_method_handle_dispatch(_masm, iid, rcx_mh, noreg, not_for_compiler_entry);
++
++  } else {
++    // Adjust argument list by popping the trailing MemberName argument.
++    Register rcx_recv = noreg;
++    if (MethodHandles::ref_kind_has_receiver(ref_kind)) {
++      // Load the receiver (not the MH; the actual MemberName's receiver) up from the interpreter stack.
++      __ ldptr(rcx_recv = rcx, rdx_first_arg_addr);
++    }
++    DEBUG_ONLY(rdx_argp = noreg);
++    Register rbx_member = rbx_method;  // MemberName ptr; incoming method ptr is dead now
++    //TODO:__ stop("check:generate_method_handle_interpreter_entry jzy");
++    //__ pop(rax_temp);           // return address
++    __ pop(rbx_member);         // extract last argument
++    //__ push(rax_temp);          // re-push return address
++    generate_method_handle_dispatch(_masm, iid, rcx_recv, rbx_member, not_for_compiler_entry);
++  }
++
++  return entry_point;
++}
++
++void MethodHandles::generate_method_handle_dispatch(MacroAssembler* _masm,
++                                                    vmIntrinsics::ID iid,
++                                                    Register receiver_reg,
++                                                    Register member_reg,
++                                                    bool for_compiler_entry) {SCOPEMARK_NAME(MethodHandles::generate_method_handle_dispatch, _masm)
++  assert(is_signature_polymorphic(iid), "expected invoke iid");
++  Register rbx_method = rbx;   // eventual target of this invocation
++  // temps used in this code are not used in *either* compiled or interpreted calling sequences
++
++  Register temp1 = rscratch1;
++  Register temp2 = rscratch2;
++  Register temp3 = rax;
++  if (for_compiler_entry) {
++    assert(receiver_reg == (iid == vmIntrinsics::_linkToStatic ? noreg : j_rarg0), "only valid assignment");
++    assert_different_registers(temp1,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp2,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++    assert_different_registers(temp3,        j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5);
++  }
++  else {
++    assert_different_registers(temp1, temp2, temp3, saved_last_sp_register());  // don't trash lastSP
++  }
++  assert_different_registers(temp1, temp2, temp3, receiver_reg);
++  assert_different_registers(temp1, temp2, temp3, member_reg);
++
++  if (iid == vmIntrinsics::_invokeBasic) {
++    // indirect through MH.form.vmentry.vmtarget
++    jump_to_lambda_form(_masm, receiver_reg, rbx_method, temp1, for_compiler_entry);
++
++  } else {
++    // The method is a member invoker used by direct method handles.
++    if (VerifyMethodHandles) {
++      // make sure the trailing argument really is a MemberName (caller responsibility)
++      verify_klass(_masm, member_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MemberName),
++                   "MemberName required for invokeVirtual etc.");
++    }
++    //TODO:__ stop("generate_method_handle_dispatch check:jzy");
++    Address member_clazz(    member_reg, NONZERO(java_lang_invoke_MemberName::clazz_offset_in_bytes()));
++    Address member_vmindex(  member_reg, NONZERO(java_lang_invoke_MemberName::vmindex_offset_in_bytes()));
++    Address member_vmtarget( member_reg, NONZERO(java_lang_invoke_MemberName::method_offset_in_bytes()));
++    Address vmtarget_method( rbx_method, NONZERO(java_lang_invoke_ResolvedMethodName::vmtarget_offset_in_bytes()));
++
++    Register temp1_recv_klass = temp1;
++    if (iid != vmIntrinsics::_linkToStatic) {
++      __ verify_oop(receiver_reg);
++      if (iid == vmIntrinsics::_linkToSpecial) {
++        // Don't actually load the klass; just null-check the receiver.
++        __ null_check(receiver_reg);
++      } else {
++        // load receiver klass itself
++        __ null_check(receiver_reg, oopDesc::klass_offset_in_bytes());
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      BLOCK_COMMENT("check_receiver {");
++      // The receiver for the MemberName must be in receiver_reg.
++      // Check the receiver against the MemberName.clazz
++      if (VerifyMethodHandles && iid == vmIntrinsics::_linkToSpecial) {
++        // Did not load it above...
++        __ load_klass(temp1_recv_klass, receiver_reg);
++        __ verify_klass_ptr(temp1_recv_klass);
++      }
++      if (VerifyMethodHandles && iid != vmIntrinsics::_linkToInterface) {
++        Label L_ok;
++        Register temp2_defc = temp2;
++        __ load_heap_oop(temp2_defc, member_clazz, temp3);
++        load_klass_from_Class(_masm, temp2_defc);
++        __ verify_klass_ptr(temp2_defc);
++        __ check_klass_subtype(temp1_recv_klass, temp2_defc, temp3, L_ok);
++        // If we get here, the type check failed!
++        __ stop("receiver class disagrees with MemberName.clazz");
++        __ BIND(L_ok);
++      }
++      BLOCK_COMMENT("} check_receiver");
++    }
++    if (iid == vmIntrinsics::_linkToSpecial ||
++        iid == vmIntrinsics::_linkToStatic) {
++      DEBUG_ONLY(temp1_recv_klass = noreg);  // these guys didn't load the recv_klass
++    }
++
++    // Live registers at this point:
++    //  member_reg - MemberName that was the trailing argument
++    //  temp1_recv_klass - klass of stacked receiver, if needed
++    //  rsi/r13 - interpreter linkage (if interpreted)
++    //  rcx, rdx, rsi, rdi, r8 - compiler arguments (if compiled)
++
++    Label L_incompatible_class_change_error;
++    switch (iid) {
++    case vmIntrinsics::_linkToSpecial:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeSpecial, member_reg, temp3);
++      }
++      __ load_heap_oop(rbx_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rbx_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToStatic:
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeStatic, member_reg, temp3);
++      }
++      __ load_heap_oop(rbx_method, member_vmtarget);
++      __ access_load_at(T_ADDRESS, IN_HEAP, rbx_method, vmtarget_method, noreg, noreg);
++      break;
++
++    case vmIntrinsics::_linkToVirtual:
++    {
++      // same as TemplateTable::invokevirtual,
++      // minus the CP setup and profiling:
++
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeVirtual, member_reg, temp3);
++      }
++
++      // pick out the vtable index from the MemberName, and then we can discard it:
++      Register temp2_index = temp2;
++      __ access_load_at(T_ADDRESS, IN_HEAP, temp2_index, member_vmindex, noreg, noreg);
++
++      if (VerifyMethodHandles) {
++        Label L_index_ok;
++        __ cmpw(temp2_index, 0);
++        __ jcc(Assembler::greaterEqual, L_index_ok);
++        __ stop("no virtual index");
++        __ BIND(L_index_ok);
++      }
++
++      // Note:  The verifier invariants allow us to ignore MemberName.clazz and vmtarget
++      // at this point.  And VerifyMethodHandles has already checked clazz, if needed.
++
++      // get target Method* & entry point
++      __ lookup_virtual_method(temp1_recv_klass, temp2_index, rbx_method);
++      break;
++    }
++
++    case vmIntrinsics::_linkToInterface:
++    {
++      // same as TemplateTable::invokeinterface
++      // (minus the CP setup and profiling, with different argument motion)
++      if (VerifyMethodHandles) {
++        verify_ref_kind(_masm, JVM_REF_invokeInterface, member_reg, temp3);
++      }
++
++      BarrierSetAssembler* bs = BarrierSet::barrier_set()->barrier_set_assembler();
++
++      Register temp3_intf = temp3;
++      __ load_heap_oop(temp3_intf, member_clazz);
++      load_klass_from_Class(_masm, temp3_intf);
++      __ verify_klass_ptr(temp3_intf);
++
++      Register rbx_index = rbx_method;
++      __ access_load_at(T_ADDRESS, IN_HEAP, rbx_index, member_vmindex, noreg, noreg);
++      if (VerifyMethodHandles) {
++        Label L;
++        __ cmpw(rbx_index, 0);
++        __ jcc(Assembler::greaterEqual, L);
++        __ stop("invalid vtable index for MH.invokeInterface");
++        __ BIND(L);
++      }
++
++      // given intf, index, and recv klass, dispatch to the implementation method
++      __ lookup_interface_method(temp1_recv_klass, temp3_intf,
++                                 // note: next two args must be the same:
++                                 rbx_index, rbx_method,
++                                 temp2,
++                                 L_incompatible_class_change_error);
++      break;
++    }
++
++    default:
++      fatal("unexpected intrinsic %d: %s", iid, vmIntrinsics::name_at(iid));
++      break;
++    }
++
++    // Live at this point:
++    //   rbx_method
++    //   rsi/r13 (if interpreted)
++
++    // After figuring out which concrete method to call, jump into it.
++    // Note that this works in the interpreter with no data motion.
++    // But the compiled version will require that rcx_recv be shifted out.
++    __ verify_method_ptr(rbx_method);
++    jump_from_method_handle(_masm, rbx_method, temp1, for_compiler_entry);
++
++    if (iid == vmIntrinsics::_linkToInterface) {
++      __ bind(L_incompatible_class_change_error);
++      __ jump(RuntimeAddress(StubRoutines::throw_IncompatibleClassChangeError_entry()));
++    }
++  }
++}
++
++#ifndef PRODUCT
++void trace_method_handle_stub(const char* adaptername,
++                              oop mh,
++                              intptr_t* saved_regs,
++                              intptr_t* entry_sp) {
++  // called as a leaf from native code: do not block the JVM!
++  bool has_mh = (strstr(adaptername, "/static") == NULL &&
++                 strstr(adaptername, "linkTo") == NULL);    // static linkers don't have MH
++  const char* mh_reg_name = has_mh ? "rcx_mh" : "rcx";
++  tty->print_cr("MH %s %s=" PTR_FORMAT " sp=" PTR_FORMAT,
++                adaptername, mh_reg_name,
++                p2i(mh), p2i(entry_sp));
++
++  if (Verbose) {
++    tty->print_cr("Registers:");
++    const int saved_regs_count = RegisterImpl::number_of_registers;
++    for (int i = 0; i < saved_regs_count; i++) {
++      Register r = as_Register(i);
++      // The registers are stored in reverse order on the stack (by pusha).
++      tty->print("%3s=" PTR_FORMAT, r->name(), saved_regs[((saved_regs_count - 1) - i)]);
++      if ((i + 1) % 4 == 0) {
++        tty->cr();
++      } else {
++        tty->print(", ");
++      }
++    }
++    tty->cr();
++
++    {
++     // dumping last frame with frame::describe
++
++      JavaThread* p = JavaThread::active();
++
++      ResourceMark rm;
++      PRESERVE_EXCEPTION_MARK; // may not be needed by safer and unexpensive here
++      FrameValues values;
++
++      // Note: We want to allow trace_method_handle from any call site.
++      // While trace_method_handle creates a frame, it may be entered
++      // without a PC on the stack top (e.g. not just after a call).
++      // Walking that frame could lead to failures due to that invalid PC.
++      // => carefully detect that frame when doing the stack walking
++
++      // Current C frame
++      frame cur_frame = os::current_frame();
++
++      // Robust search of trace_calling_frame (independant of inlining).
++      // Assumes saved_regs comes from a pusha in the trace_calling_frame.
++      assert(cur_frame.sp() < saved_regs, "registers not saved on stack ?");
++      frame trace_calling_frame = os::get_sender_for_C_frame(&cur_frame);
++      while (trace_calling_frame.fp() < saved_regs) {
++        trace_calling_frame = os::get_sender_for_C_frame(&trace_calling_frame);
++      }
++
++      // safely create a frame and call frame::describe
++      intptr_t *dump_sp = trace_calling_frame.sender_sp();
++      intptr_t *dump_fp = trace_calling_frame.link();
++
++      bool walkable = has_mh; // whether the traced frame shoud be walkable
++
++      if (walkable) {
++        // The previous definition of walkable may have to be refined
++        // if new call sites cause the next frame constructor to start
++        // failing. Alternatively, frame constructors could be
++        // modified to support the current or future non walkable
++        // frames (but this is more intrusive and is not considered as
++        // part of this RFE, which will instead use a simpler output).
++        frame dump_frame = frame(dump_sp, dump_fp);
++        dump_frame.describe(values, 1);
++      } else {
++        // Stack may not be walkable (invalid PC above FP):
++        // Add descriptions without building a Java frame to avoid issues
++        values.describe(-1, dump_fp, "fp for #1 <not parsed, cannot trust pc>");
++        values.describe(-1, dump_sp, "sp for #1");
++      }
++      values.describe(-1, entry_sp, "raw top of stack");
++
++      tty->print_cr("Stack layout:");
++      values.print(p);
++    }
++    if (has_mh && oopDesc::is_oop(mh)) {
++      mh->print();
++      if (java_lang_invoke_MethodHandle::is_instance(mh)) {
++        if (java_lang_invoke_MethodHandle::form_offset_in_bytes() != 0)
++          java_lang_invoke_MethodHandle::form(mh)->print();
++  }
++}
++  }
++}
++
++// The stub wraps the arguments in a struct on the stack to avoid
++// dealing with the different calling conventions for passing 6
++// arguments.
++struct MethodHandleStubArguments {
++  const char* adaptername;
++  oopDesc* mh;
++  intptr_t* saved_regs;
++  intptr_t* entry_sp;
++};
++void trace_method_handle_stub_wrapper(MethodHandleStubArguments* args) {
++  trace_method_handle_stub(args->adaptername,
++                           args->mh,
++                           args->saved_regs,
++                           args->entry_sp);
++}
++
++void MethodHandles::trace_method_handle(MacroAssembler* _masm, const char* adaptername) {SCOPEMARK_NAME(MethodHandles::trace_method_handle, _masm)
++  if (!TraceMethodHandles)  return;Unimplemented();
++  BLOCK_COMMENT(err_msg("trace_method_handle %s {", adaptername));
++  Register rbx = R0; //? jzy
++  __ stop("check: trace_method_handle jzy");
++  __ enter();
++  __ andptr(esp, -16, esp); // align stack if needed for FPU state
++  __ pushad();
++  __ movl(r12_heapbase, esp); // for retreiving saved_regs
++  // Note: saved_regs must be in the entered frame for the
++  // robust stack walking implemented in trace_method_handle_stub.
++
++  // save FP result, valid at some call sites (adapter_opt_return_float, ...)
++  __ increment(esp, -2 * wordSize);
++  __ store_double(FSF, Address(esp, 0));
++
++  // Incoming state:
++  // rcx: method handle
++  //
++  // To avoid calling convention issues, build a record on the stack
++  // and pass the pointer to that instead.
++  Register rbp = rfp;
++  Register rcx = R0; //? jzy
++  __ push(rbp);               // entry_sp (with extra align space)
++  __ push(rbx);               // pusha saved_regs
++  __ push(rcx);               // mh
++  __ push(rcx);               // slot for adaptername
++  __ mov_immediate64(rscratch1, (intptr_t) adaptername);
++  __ stptr(rscratch1, Address(esp, 0));
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, trace_method_handle_stub_wrapper), esp);
++  __ increment(esp, sizeof(MethodHandleStubArguments));
++
++  __ load_double(FSF, Address(esp, 0));
++  __ increment(esp, 2 * wordSize);
++
++  __ popad();
++  __ leave();
++  BLOCK_COMMENT("} trace_method_handle");
++}
++#endif //PRODUCT
+diff --git a/src/hotspot/cpu/sw64/methodHandles_sw64.hpp b/src/hotspot/cpu/sw64/methodHandles_sw64.hpp
+new file mode 100644
+index 0000000000..78e1a5545c
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/methodHandles_sw64.hpp
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2010, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// Platform-specific definitions for method handles.
++// These definitions are inlined into class MethodHandles.
++
++// Adapters
++enum /* platform_dependent_constants */ {
++  adapter_code_size = 32000 DEBUG_ONLY(+ 150000)
++};
++
++public:
++
++  static void load_klass_from_Class(MacroAssembler* _masm, Register klass_reg);
++
++  static void verify_klass(MacroAssembler* _masm,
++                           Register obj, SystemDictionary::WKID klass_id,
++                           const char* error_message = "wrong klass") NOT_DEBUG_RETURN;
++
++  static void verify_method_handle(MacroAssembler* _masm, Register mh_reg) {
++    verify_klass(_masm, mh_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_invoke_MethodHandle),
++                 "reference is a MH");
++  }
++
++  static void verify_ref_kind(MacroAssembler* _masm, int ref_kind, Register member_reg, Register temp) NOT_DEBUG_RETURN;
++
++  // Similar to InterpreterMacroAssembler::jump_from_interpreted.
++  // Takes care of special dispatch from single stepping too.
++  static void jump_from_method_handle(MacroAssembler* _masm, Register method, Register temp,
++                                      bool for_compiler_entry);
++
++  static void jump_to_lambda_form(MacroAssembler* _masm,
++                                  Register recv, Register method_temp,
++                                  Register temp2,
++                                  bool for_compiler_entry);
++
++  static Register saved_last_sp_register() {
++    // Should be in sharedRuntime, not here.
++   return i29;
++  }
+diff --git a/src/hotspot/cpu/sw64/nativeInst_sw64.cpp b/src/hotspot/cpu/sw64/nativeInst_sw64.cpp
+new file mode 100755
+index 0000000000..8cf3a62cde
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/nativeInst_sw64.cpp
+@@ -0,0 +1,773 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "jvm.h"
++#include "asm/macroAssembler.hpp"
++#include "classfile/javaClasses.inline.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "memory/allocation.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/flags/flagSetting.hpp"
++#include "runtime/frame.inline.hpp"
++#include "utilities/preserveException.hpp"
++
++#include <sys/mman.h>
++
++#define __ _masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#define STOP(error) stop(error)
++#else
++#define BLOCK_COMMENT(str) __ block_comment(str)
++#define STOP(error) block_comment(error); __ stop(error)
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++int NativeCall::instruction_size        = 5 * BytesPerInstWord;
++int NativeCall::return_address_offset   = 5 * BytesPerInstWord;
++int NativeJump::instruction_size        = 5 * BytesPerInstWord;
++int NativeJump::next_instruction_offset = 5 * BytesPerInstWord;
++
++void NativeInstruction::imm48_split(long imm48, int16_t &msb_l, int16_t &lsb_h, int16_t &lsb_l) {
++  int32_t lsb32 = (int32_t) ((intptr_t) imm48);
++  int32_t msb32 = (int32_t) (((intptr_t) imm48 - lsb32) >> 32);
++
++  msb_l = (int16_t) msb32;
++  lsb_h = (lsb32 - (int16_t) lsb32) >> 16;
++  lsb_l = (int16_t) lsb32;
++  guarantee((msb_l >= 0x0 && msb_l < 0x7fff) || (msb_l == 0x7fff && lsb32 >= 0x0 && lsb32 < 0x7fff8000), "wrong number in li48 ");
++  if (lsb32 >= 0x7fff8000)
++    msb_l = msb_l + 1;
++}
++
++//void MethodHandles::load_klass_from_Class(MacroAssembler* _masm, Register klass_reg) {
++//  if (VerifyMethodHandles)
++//    verify_klass(_masm, klass_reg, SystemDictionary::WK_KLASS_ENUM_NAME(java_lang_Class),
++//                 "MH argument is a Class");
++//  __ ldptr(klass_reg, Address(klass_reg, java_lang_Class::klass_offset_in_bytes()));
++//}
++
++
++/**
++ * x86
++ *  NativeInstruction::set_ptr_at(data_offset, x)
++ * sw64
++ *  NativeInstruction::set_address(address dest)
++ * note
++ *  x86 call/jmp 64bits destination embedded following the opcodes
++ *  sw64 call/jmp 48bits destination split in the disp in the ldi/sll/ldih/ldi sequence
++ */
++void NativeInstruction::set_address(address dest) {
++//  Unimplemented();
++  if (SafePatch) {
++    if (is_op(int_at(0), Assembler::op_ldi) &&
++        is_op(int_at(4), Assembler::op_br) &&
++        is_op(int_at(16), Assembler::op_ldl)) {
++      set_long_at(8, (long) dest);
++    } else if (is_op(int_at(0), Assembler::op_br) &&
++               is_op(int_at(12), Assembler::op_ldl) &&
++               is_op(int_at(16), Assembler::op_ldi)) {
++      set_long_at(4, (long) dest);
++    } else {
++      tty->print_cr("\nError!\nset_address: 0x%lx", addr_at(0));
++      Disassembler::decode(addr_at(0) - 10 * 4, addr_at(0) + 10 * 4, tty);
++      fatal("not a call ");
++    }
++  } else {
++    OrderAccess::fence();
++    int16_t msb_l, lsb_h, lsb_l;
++    NativeInstruction::imm48_split((long) dest, msb_l, lsb_h, lsb_l);
++    /* li48 or li64 */
++    if (is_op(int_at(0), Assembler::op_ldi) && is_op(int_at(4), Assembler::op_slll_l)) {
++      int first_word = int_at(0);
++      set_int_at(0, 0x13FFFFFF); /* .1: br .1 */
++      set_int_at(8, (int_at(8) & 0xffff0000) | (lsb_h & 0xffff));
++      set_int_at(12, (int_at(12) & 0xffff0000) | (lsb_l & 0xffff));
++      set_int_at(0, (first_word & 0xffff0000) | (msb_l & 0xffff));
++
++      //   ICache::invalidate_range(addr_at(0), 16);
++    } else if (is_op(int_at(0), Assembler::op_ldih) && is_op(int_at(8), Assembler::op_slll_l)) {
++      Unimplemented();
++    } else {
++      fatal("not a call ");
++    }
++  }
++}
++
++void NativeInstruction::set_long_at(int offset, long i) {
++  address addr = addr_at(offset);
++  *(long *) addr = i;
++}
++
++void NativeInstruction::wrote(int offset) {
++  //ICache::invalidate_word(addr_at(offset));
++}
++
++void NativeLoadGot::report_and_fail() const {
++  tty->print_cr("Addr: " INTPTR_FORMAT, p2i(instruction_address()));
++  fatal("not a indirect rip mov to rbx");
++}
++    
++void NativeLoadGot::verify() const {
++  if (has_rex) {
++    int rex = ubyte_at(0);
++    if (rex != rex_prefix) {
++      report_and_fail();
++    }
++  }
++    
++  int inst = ubyte_at(rex_size);
++  if (inst != instruction_code) {
++    report_and_fail();
++    }
++  int modrm = ubyte_at(rex_size + 1);
++  if (modrm != modrm_rbx_code && modrm != modrm_rax_code) {
++    report_and_fail();
++  }
++}
++
++intptr_t NativeLoadGot::data() const {
++  Unimplemented();
++  return *(intptr_t *) got_address();
++}
++
++address NativePltCall::destination() const {
++  ShouldNotReachHere();
++  NativeGotJump* jump = nativeGotJump_at(plt_jump());
++  return jump->destination();
++}
++
++address NativePltCall::plt_entry() const {
++  ShouldNotReachHere();
++  return return_address() + displacement();
++  }
++
++address NativePltCall::plt_jump() const {
++  ShouldNotReachHere();
++  address entry = plt_entry();
++  // Virtual PLT code has move instruction first
++  if (((NativeGotJump*)entry)->is_GotJump()) {
++    return entry;
++  } else {
++    return nativeLoadGot_at(entry)->next_instruction_address();
++  }
++}
++
++address NativePltCall::plt_load_got() const {
++  ShouldNotReachHere();
++  address entry = plt_entry();
++  if (!((NativeGotJump*)entry)->is_GotJump()) {
++    // Virtual PLT code has move instruction first
++    return entry;
++  } else {
++    // Static PLT code has move instruction second (from c2i stub)
++    return nativeGotJump_at(entry)->next_instruction_address();
++  }
++}
++
++address NativePltCall::plt_c2i_stub() const {
++  ShouldNotReachHere();
++  address entry = plt_load_got();
++  // This method should be called only for static calls which has C2I stub.
++  NativeLoadGot* load = nativeLoadGot_at(entry);
++  return entry;
++}
++
++address NativePltCall::plt_resolve_call() const {
++  ShouldNotReachHere();
++  NativeGotJump* jump = nativeGotJump_at(plt_jump());
++  address entry = jump->next_instruction_address();
++  if (((NativeGotJump*)entry)->is_GotJump()) {
++    return entry;
++  } else {
++    // c2i stub 2 instructions
++    entry = nativeLoadGot_at(entry)->next_instruction_address();
++    return nativeGotJump_at(entry)->next_instruction_address();
++}
++}
++
++void NativePltCall::reset_to_plt_resolve_call() {
++  set_destination_mt_safe(plt_resolve_call());
++}
++
++void NativePltCall::set_destination_mt_safe(address dest) {
++  ShouldNotReachHere();
++  // rewriting the value in the GOT, it should always be aligned
++  NativeGotJump* jump = nativeGotJump_at(plt_jump());
++  address* got = (address *) jump->got_address();
++  *got = dest;
++}
++
++void NativePltCall::set_stub_to_clean() {
++  ShouldNotReachHere();
++  NativeLoadGot* method_loader = nativeLoadGot_at(plt_c2i_stub());
++  NativeGotJump* jump          = nativeGotJump_at(method_loader->next_instruction_address());
++  method_loader->set_data(0);
++  jump->set_jump_destination((address)-1);
++}
++
++void NativePltCall::verify() const {
++  ShouldNotReachHere();
++  // Make sure code pattern is actually a call rip+off32 instruction.
++  int inst = ubyte_at(0);
++  if (inst != instruction_code) {
++    tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", p2i(instruction_address()),
++                                                        inst);
++    fatal("not a call rip+off32");
++  }
++}
++
++address NativeGotJump::destination() const {
++  ShouldNotReachHere();
++  address *got_entry = (address *) got_address();
++  return *got_entry;
++}
++
++void NativeGotJump::verify() const {
++  ShouldNotReachHere();
++  int inst = ubyte_at(0);
++  if (inst != instruction_code) {
++    tty->print_cr("Addr: " INTPTR_FORMAT " Code: 0x%x", p2i(instruction_address()),
++                                                        inst);
++    fatal("not a indirect rip jump");
++  }
++}
++
++void NativeCall::verify() {
++  NativeMovConstReg* mov  = nativeMovConstReg_at(addr_at(0));
++  NativeInstruction* call = nativeInstruction_at(addr_at(0) + NativeCall::instruction_size - 4);
++
++  if (mov->is_mov_ptr() && call->is_call_reg()) return;
++
++  fatal("not a call instruction");
++}
++
++address NativeCall::destination() const {
++  if (SafePatch) {
++    if (is_op(int_at(0), Assembler::op_ldi) &&
++        is_op(int_at(4), Assembler::op_br) &&
++        is_op(int_at(16), Assembler::op_ldl)) {
++      return (address) long_at(8);
++    } else if (is_op(int_at(0), Assembler::op_br) &&
++               is_op(int_at(12), Assembler::op_ldl) &&
++               is_op(int_at(16), Assembler::op_ldi)) {
++      return (address) long_at(4);
++    } else {
++      tty->print_cr("\nError!\ndestination: 0x%lx", addr_at(0));
++      Disassembler::decode(addr_at(0) - 10 * 4, addr_at(0) + 10 * 4, tty);
++      fatal("not a call ");
++    }
++  } else {
++    NativeMovConstReg *mov = nativeMovConstReg_at(addr_at(0));
++    return (address) mov->data();
++  }
++}
++
++void NativeCall::print() {
++  tty->print_cr(PTR_FORMAT ": call " PTR_FORMAT,
++                p2i(instruction_address()), p2i(destination()));
++}
++
++// Inserts a native call instruction at a given pc
++void NativeCall::insert(address code_pos, address entry) {
++  NativeCall *call = nativeCall_at(code_pos);
++  CodeBuffer cb(call->addr_at(0), instruction_size);
++  MacroAssembler masm(&cb);
++#define __ masm.
++  if (SafePatch) {
++    if (__ offset() % 8 == 0) {
++      __ nop();
++      __ br(T12, 2);
++      __ emit_int64((long) entry);
++      __ ldl(T12, 0, T12);
++    } else {
++      __ br(T12, 2);
++      __ emit_int64((long) entry);
++      __ ldl(T12, 0, T12);
++      __ nop();
++    }
++  } else {
++    __ prepare_patch_li48(T12, (long) entry);
++  }
++  __ call(T12);
++#undef __
++
++  //  ICache::invalidate_range(call->addr_at(0), instruction_size);
++}
++
++// MT-safe patching of a call instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeCall::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++  assert(Patching_lock->is_locked() ||
++         SafepointSynchronize::is_at_safepoint(), "concurrent code patching");
++  assert (instr_addr != NULL, "illegal address for code patching");
++
++  NativeCall* n_call =  nativeCall_at (instr_addr); // checking that it is a call
++  if (os::is_MP()) {
++    guarantee((intptr_t)instr_addr % BytesPerWord == 0, "must be aligned");
++    }
++
++  // First patch dummy jmp in place
++  unsigned char patch[4];
++  assert(sizeof(patch)==sizeof(jint), "sanity check");
++  patch[0] = 0xEB;       // jmp rel8
++  patch[1] = 0xFE;       // jmp to self
++  patch[2] = 0xEB;
++  patch[3] = 0xFE;
++
++  // First patch dummy jmp in place
++  *(jint*)instr_addr = *(jint *)patch;
++
++  // Invalidate.  Opteron requires a flush after every write.
++  n_call->wrote(0);
++
++  // Patch 4th byte
++  instr_addr[4] = code_buffer[4];
++
++  n_call->wrote(4);
++
++  // Patch bytes 0-3
++  *(jint*)instr_addr = *(jint *)code_buffer;
++
++  n_call->wrote(0);
++
++#ifdef ASSERT
++   // verify patching
++   for ( int i = 0; i < instruction_size; i++) {
++     address ptr = (address)((intptr_t)code_buffer + i);
++     int a_byte = (*ptr) & 0xFF;
++     assert(*((address)((intptr_t)instr_addr + i)) == a_byte, "mt safe patching failed");
++    }
++#endif
++
++}
++
++
++// Similar to replace_mt_safe, but just changes the destination.  The
++// important thing is that free-running threads are able to execute this
++// call instruction at all times.  If the displacement field is aligned
++// we can simply rely on atomicity of 32-bit writes to make sure other threads
++// will see no intermediate states.  Otherwise, the first two bytes of the
++// call are guaranteed to be aligned, and can be atomically patched to a
++// self-loop to guard the instruction while we change the other bytes.
++
++// We cannot rely on locks here, since the free-running threads must run at
++// full speed.
++//
++// Used in the runtime linkage of calls; see class CompiledIC.
++// (Cf. 4506997 and 4479829, where threads witnessed garbage displacements.)
++void NativeCall::set_destination_mt_safe(address dest) {//Unimplemented();
++  set_destination(dest);
++}
++
++
++void NativeMovConstReg::verify() {
++  if (is_op(int_at(0), Assembler::op_ldih) &&
++      is_op(int_at(4), Assembler::op_ldi) &&
++      is_op(int_at(8), Assembler::op_slll_l) &&
++      is_op(int_at(12), Assembler::op_ldih) &&
++      is_op(int_at(16), Assembler::op_ldi)) {
++    return;
++  }
++
++  if (is_op(int_at(0), Assembler::op_ldi) &&
++      is_op(int_at(4), Assembler::op_slll_l) &&
++      is_op(int_at(8), Assembler::op_ldih) &&
++      is_op(int_at(12), Assembler::op_ldi)) {
++    return;
++  }
++
++  if (is_op(int_at(0), Assembler::op_ldi) &&
++      is_op(int_at(4), Assembler::op_br) &&
++      is_op(int_at(16), Assembler::op_ldl)) {
++    return;
++  }
++  if (is_op(int_at(0), Assembler::op_br) &&
++      is_op(int_at(12), Assembler::op_ldl) &&
++      is_op(int_at(16), Assembler::op_ldi)) {
++    return;
++  }
++  if (!nativeInstruction_at(addr_at(0))->is_mov_ptr()) {
++    print();
++    fatal("not a mov reg64, ptr");
++  }
++}
++
++
++void NativeMovConstReg::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, " INTPTR_FORMAT,
++                p2i(instruction_address()), data());
++}
++
++//-------------------------------------------------------------------
++
++int NativeMovRegMem::instruction_start() const {
++  Unimplemented();
++  int off = 0;
++  u_char instr_0 = ubyte_at(off);
++  return off;
++}
++
++int NativeMovRegMem::patch_offset() const {
++  int off = data_offset + instruction_start();
++  u_char mod_rm = *(u_char*)(instruction_address() + 1);
++  // nnnn(r12|rsp) isn't coded as simple mod/rm since that is
++  // the encoding to use an SIB byte. Which will have the nnnn
++  // field off by one byte
++  if ((mod_rm & 7) == 0x4) {
++    off++;
++  }
++  return off;
++}
++
++void NativeMovRegMem::verify() {Unimplemented();
++  // make sure code pattern is actually a mov [reg+offset], reg instruction
++  u_char test_byte = *(u_char*)instruction_address();
++  switch (test_byte) {
++    case instruction_code_reg2memb:  // 0x88 movb a, r
++    case instruction_code_reg2mem:   // 0x89 movl a, r (can be movq in 64bit)
++    case instruction_code_mem2regb:  // 0x8a movb r, a
++    case instruction_code_mem2reg:   // 0x8b movl r, a (can be movq in 64bit)
++      break;
++
++    case instruction_code_mem2reg_movslq: // 0x63 movsql r, a
++    case instruction_code_mem2reg_movzxb: // 0xb6 movzbl r, a (movzxb)
++    case instruction_code_mem2reg_movzxw: // 0xb7 movzwl r, a (movzxw)
++    case instruction_code_mem2reg_movsxb: // 0xbe movsbl r, a (movsxb)
++    case instruction_code_mem2reg_movsxw: // 0xbf  movswl r, a (movsxw)
++      break;
++
++    case instruction_code_float_s:   // 0xd9 fld_s a
++    case instruction_code_float_d:   // 0xdd fld_d a
++    case instruction_code_xmm_load:  // 0x10 movsd xmm, a
++    case instruction_code_xmm_store: // 0x11 movsd a, xmm
++    case instruction_code_xmm_lpd:   // 0x12 movlpd xmm, a
++      break;
++
++    case instruction_code_lea:       // 0x8d lea r, a
++      break;
++
++    default:
++          fatal ("not a mov [reg+offs], reg instruction");
++  }
++}
++
++
++void NativeMovRegMem::print() {
++  tty->print_cr(PTR_FORMAT ": mov reg, [reg + %x]", p2i(instruction_address()), offset());
++}
++
++//-------------------------------------------------------------------
++
++void NativeLoadAddress::verify() {
++  // make sure code pattern is actually a mov [reg+offset], reg instruction
++  /*u_char test_byte = *(u_char*)instruction_address();
++
++  if ( (test_byte == instruction_prefix_wide ||
++        test_byte == instruction_prefix_wide_extended) ) {
++    test_byte = *(u_char*)(instruction_address() + 1);
++  }
++
++  if ( ! ((test_byte == lea_instruction_code)
++          LP64_ONLY(|| (test_byte == mov64_instruction_code) ))) {
++    fatal ("not a lea reg, [reg+offs] instruction");
++  }*/
++}
++
++
++void NativeLoadAddress::print() {
++  tty->print_cr(PTR_FORMAT ": lea [reg + %x], reg", p2i(instruction_address()), offset());
++}
++
++//--------------------------------------------------------------------------------
++
++void NativeJump::verify() {
++  NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0));
++  // -4 because not include jmp instruction
++  NativeInstruction* jmp = nativeInstruction_at(addr_at(0) + NativeJump::instruction_size - 4);
++
++  if (mov->is_mov_ptr() && jmp->is_jump_reg()) return;
++
++  fatal("not a jump instruction");
++}
++
++bool NativeInstruction::is_sigill_zombie_not_entrant() {
++  return int_at(0) == NativeIllegalInstruction::instruction_code;
++}
++void NativeJump::insert(address code_pos, address entry) {
++  Unimplemented();
++//  intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4);
++//
++//  guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset");
++//
++//  *code_pos = instruction_code;
++//  *((int32_t*)(code_pos + 1)) = (int32_t)disp;
++//
++//  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeJump::check_verified_entry_alignment(address entry, address verified_entry) {
++  //Unimplemented();
++  // Patching to not_entrant can happen while activations of the method are
++  // in use. The patching in that instance must happen only when certain
++  // alignment restrictions are true. These guarantees check those
++  // conditions.
++
++  const int linesize = 64;
++
++  // Must be wordSize aligned
++  guarantee(((uintptr_t) verified_entry & (wordSize -1)) == 0,
++            "illegal address for code patching 2");
++  // First 5 bytes must be within the same cache line - 4827828
++  guarantee((uintptr_t) verified_entry / linesize ==
++            ((uintptr_t) verified_entry + 4) / linesize,
++            "illegal address for code patching 3");
++}
++
++//  manual implementation of stl
++//
++//  00000001200009c0 <atomic_store64>:
++//     0:   10 01 11 42     addq    a0,a1,a0
++//     4:   00 00 50 ae     stq     a2,0(a0)
++//     8:   01 00 fa 0b     ret     zero,(ra),0x1
++//     c:   5f 07 ff 43     nop(excb)
++//
++typedef void (* atomic_store64_ptr)(long *addr, int offset, long data64);
++
++static int *buf;
++static atomic_store64_ptr get_atomic_store64_func() {
++  static atomic_store64_ptr p = NULL;
++  if (p != NULL)
++    return p;
++
++  buf = (int *)mmap(NULL, 64, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS,
++                       -1, 0);
++  buf[0] = 0x42110110;
++  buf[1] = 0xae500000;   /* stq $a2, 0($a0) */
++  buf[2] = 0x0bfa0001;
++  buf[3] = 0x43ff075f;   /* nop */
++
++  p = (atomic_store64_ptr)buf;
++  return p;
++}
++
++// MT safe inserting of a jump over an unknown instruction sequence (used by nmethod::makeZombie)
++// The problem: jmp <dest> is a 5-byte instruction. Atomical write can be only with 4 bytes.
++// First patches the first word atomically to be a jump to itself.
++// Then patches the last byte  and then atomically patches the first word (4-bytes),
++// thus inserting the desired jump
++// This code is mt-safe with the following conditions: entry point is 4 byte aligned,
++// entry point is in same cache line as unverified entry point, and the instruction being
++// patched is >= 5 byte (size of patch).
++//
++// In C2 the 5+ byte sized instruction is enforced by code in MachPrologNode::emit.
++// In C1 the restriction is enforced by CodeEmitter::method_entry
++// In JVMCI, the restriction is enforced by HotSpotFrameContext.enter(...)
++//
++void NativeJump::patch_verified_entry(address entry, address verified_entry, address dest) {
++
++  // ensure 100% atomicity.
++  // The destination is fixed and can be cached in JavaThread.
++
++  guarantee(dest == SharedRuntime::get_handle_wrong_method_stub(), "expected fixed destination of patch");
++  NativeIllegalInstruction::insert(verified_entry);
++
++//  guarantee(!os::is_MP() || (((long)verified_entry % BytesPerWord) == 0), "destination must be aligned for SD");
++//  bool is_aligned = !os::is_MP() || (((long)verified_entry % BytesPerWord) == 0);
++//
++//  if (is_aligned) {
++//    int code_buffer[4];
++//
++//    CodeBuffer cb((address)code_buffer, instruction_size);
++//    MacroAssembler masm(&cb);
++//#define __ masm.
++//    __ ldl(T12, Address(rthread, in_bytes(JavaThread::handle_wrong_method_stub_offset())));
++//    __ jmp(T12);
++//    __ nop();
++//    __ nop();
++//
++//    atomic_store64_ptr func = get_atomic_store64_func();
++//    (*func)((long *)verified_entry, 0, *(long *)&code_buffer[0]);
++//  } else {
++////   if (Assembler::reachable_from_branch_at(verified_entry, dest)) { //for SW8A
++////           ptrdiff_t disp = dest - verified_entry - 4;
++////           guarantee(disp < 1 << 27 && disp > - (1 << 27), "branch overflow");
++////           unsigned int insn = (0x1D << 26) | ((disp >> 2) & 0x3ffffff);
++//////           *(unsigned int*)verified_entry = insn;
++////   } else {
++//    // We use an illegal instruction for marking a method as
++//    // not_entrant or zombie
++//    NativeIllegalInstruction::insert(verified_entry);
++//  }
++}
++
++//address NativeFarJump::jump_destination() const          {
++//  NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0));
++//  return (address)mov->data();
++//}
++//
++//void NativeFarJump::verify() {
++//  if (is_far_jump()) {
++//    NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0));
++//    NativeInstruction* jmp = nativeInstruction_at(mov->next_instruction_address());
++//    if (jmp->is_jump_reg()) return;
++//  }
++//  fatal("not a jump instruction");
++//}
++
++void NativePopReg::insert(address code_pos, Register reg) {
++  Unimplemented();
++  assert(reg->encoding() < 8, "no space for REX");
++  assert(NativePopReg::instruction_size == sizeof(char), "right address unit for update");
++  *code_pos = (u_char)(instruction_code | reg->encoding());
++ // ICache::invalidate_range(code_pos, instruction_size);
++}
++
++
++void NativeIllegalInstruction::insert(address code_pos) {
++ // Unimplemented();
++  assert(NativeIllegalInstruction::instruction_size == sizeof(int), "right address unit for update");
++  *(juint*)code_pos = instruction_code;
++//  ICache::invalidate_range(code_pos, instruction_size);
++}
++
++void NativeGeneralJump::verify() {
++  Unimplemented();
++}
++
++
++void NativeGeneralJump::insert_unconditional(address code_pos, address entry) {
++  Unimplemented();
++  intptr_t disp = (intptr_t)entry - ((intptr_t)code_pos + 1 + 4);
++
++  guarantee(disp == (intptr_t)(int32_t)disp, "must be 32-bit offset");
++
++  *code_pos = unconditional_long_jump;
++  *((int32_t *)(code_pos+1)) = (int32_t) disp;
++  //ICache::invalidate_range(code_pos, instruction_size);
++}
++
++
++// MT-safe patching of a long jump instruction.
++// First patches first word of instruction to two jmp's that jmps to them
++// selfs (spinlock). Then patches the last byte, and then atomicly replaces
++// the jmp's with the first 4 byte of the new instruction.
++void NativeGeneralJump::replace_mt_safe(address instr_addr, address code_buffer) {
++  Unimplemented();
++}
++
++void NativeGeneralJump::set_jump_destination(address dest) {
++  Unimplemented();
++}
++
++
++address NativeGeneralJump::jump_destination() const {
++  Unimplemented();
++  return NULL;
++}
++
++intptr_t NativeMovConstReg::data() {
++ // wait_until_not_spinng();
++  if (nativeInstruction_at(addr_at(0)) -> is_mov_ptr()) {
++      if (is_op(int_at(0), Assembler::op_ldi) &&
++          is_op(int_at(4), Assembler::op_br) &&
++          is_op(int_at(16), Assembler::op_ldl)) {
++          return (intptr_t) long_at(8);
++      }
++      if (is_op(int_at(0), Assembler::op_br) &&
++          is_op(int_at(12), Assembler::op_ldl) &&
++          is_op(int_at(16), Assembler::op_ldi)) {
++          return (intptr_t) long_at(4);
++      }
++      if (is_op(int_at(4), Assembler::op_slll_l)){
++          /* li48 */
++          int16_t msb_l = int_at(0)&0xffff;
++          int16_t lsb_h = int_at(8)&0xffff;
++          int16_t lsb_l = int_at(12)&0xffff;
++
++          // -1 should be 0xffff ffff ffff ffff, so we can not use low 48 bits
++          return (((intptr_t) (msb_l) << 32) + ((intptr_t) (lsb_h) << 16) + (intptr_t) (lsb_l));
++      }
++      else {
++          int16_t high = int_at(0)&0xffff;
++          int16_t low  = int_at(4)&0xffff;
++
++          // -1 should be 0xffff ffff ffff ffff, so we can not use low 48 bits
++          return ( ((intptr_t) (high) << 16) + (intptr_t) (low));
++      }
++  }
++
++  Unimplemented();
++  return (intptr_t )NULL;
++}
++
++void NativeMovConstReg::set_data(intptr_t x) {
++  if (is_mov_ptr()) {
++    OrderAccess::fence();
++    //decide which type of data need be relocated li48 or 32
++    if (is_op(int_at(4), Assembler::op_slll_l)) {
++        int16_t msb_l, lsb_h, lsb_l;
++        NativeInstruction::imm48_split((long)x, msb_l, lsb_h, lsb_l);
++
++        int first_word = int_at(0);
++        set_int_at(0, 0x13FFFFFF); /* .1: br .1 */
++        set_int_at(8, (int_at(8) & 0xffff0000) | (lsb_h & 0xffff));
++        set_int_at(12, (int_at(12) & 0xffff0000) | (lsb_l & 0xffff));
++        set_int_at(0, (first_word & 0xffff0000) | (msb_l & 0xffff));
++
++       // ICache::invalidate_range(addr_at(0), 16);
++    }
++    else if (is_op(int_at(8), Assembler::op_zapnot_l)) {
++        int16_t high = (x - (int16_t)(x))>>16;
++        int16_t low = (int16_t)(x);
++        int first_word = int_at(0);
++        set_int_at(0, 0x13FFFFFF); /* .1: br .1 */
++        set_int_at(4, (int_at(4) & 0xffff0000) | (low & 0xffff));
++        set_int_at(0, (first_word & 0xffff0000) | (high & 0xffff));
++
++      //  ICache::invalidate_range(addr_at(0), 12);
++    }
++  } else {
++    fatal("not a call ");
++  }
++}
++
++address NativeJump::jump_destination() {
++  NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0));
++  address dest = (address) mov->data();
++  // We use jump to self as the unresolved address which the inline
++  // cache code (and relocs) know about
++
++  // return -1 if jump to self
++  dest = (dest == (address) this) ? (address) -1 : dest;
++  return dest;
++}
+diff --git a/src/hotspot/cpu/sw64/nativeInst_sw64.hpp b/src/hotspot/cpu/sw64/nativeInst_sw64.hpp
+new file mode 100755
+index 0000000000..ecadf9bd48
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/nativeInst_sw64.hpp
+@@ -0,0 +1,795 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_NATIVEINST_SW64_HPP
++#define CPU_SW64_VM_NATIVEINST_SW64_HPP
++
++#include "asm/assembler.hpp"
++#include "runtime/icache.hpp"
++#include "runtime/os.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++// We have interfaces for the following instructions:
++// - NativeInstruction
++// - - NativeCall
++// - - NativeMovConstReg
++// - - NativeMovConstRegPatching
++// - - NativeMovRegMem
++// - - NativeMovRegMemPatching
++// - - NativeJump
++// - - NativeFarJump
++// - - NativeIllegalOpCode
++// - - NativeGeneralJump
++// - - NativeReturn
++// - - NativeReturnX (return with argument)
++// - - NativePushConst
++// - - NativeTstRegMem
++
++// The base class for different kinds of native instruction abstractions.
++// Provides the primitive operations to manipulate code relative to this.
++
++class NativeInstruction {
++  friend class Relocation;
++  friend class MacroAssembler;
++
++ public:
++  enum Sw64_specific_constants {
++    nop_instruction_code        =    0,
++    nop_instruction_size        =    BytesPerInstWord
++  };
++
++  bool is_nop()                        { Unimplemented(); return ubyte_at(0) == nop_instruction_code; }
++  inline bool is_call();
++  inline bool is_call_reg();
++  inline bool is_illegal();
++  inline bool is_return();
++  inline bool is_jump();
++  inline bool is_jump_reg();
++  inline bool is_far_jump();
++  inline bool is_cond_jump();
++  inline bool is_safepoint_poll();
++  inline bool is_mov_ptr();
++  void  wait_until_not_spinng() {
++    while (*((volatile int*)this) > 0);// wait until the first inst is not spin any more. spin is 13ffffff(>0), ldi and ldih is fxxxxxxx < 0
++  }
++
++  //We use an illegal instruction for marking a method as not_entrant or zombie.
++  bool is_sigill_zombie_not_entrant();
++
++protected:
++  address addr_at(int offset) const    { return address(this) + offset; }
++
++  s_char sbyte_at(int offset) const    { return *(s_char*) addr_at(offset); }
++  u_char ubyte_at(int offset) const    { return *(u_char*) addr_at(offset); }
++
++  jint int_at(int offset) const         { return *(jint*) addr_at(offset); }
++
++  intptr_t ptr_at(int offset) const    { return *(intptr_t*) addr_at(offset); }
++
++  oop  oop_at (int offset) const       { return *(oop*) addr_at(offset); }
++
++  void set_char_at(int offset, char c)        { *addr_at(offset) = (u_char)c; wrote(offset); }
++  void set_int_at(int offset, jint  i)        { *(jint*)addr_at(offset) = i;  wrote(offset); }
++  void set_ptr_at (int offset, intptr_t  ptr) { *(intptr_t*) addr_at(offset) = ptr;  wrote(offset); }
++  void set_oop_at (int offset, oop  o)        { *(oop*) addr_at(offset) = o;  wrote(offset); }
++
++  static void imm48_split(long imm48, int16_t &msb_l, int16_t &lsb_h, int16_t &lsb_l);
++  void set_address(address dest);
++  void  set_long_at(int offset, long  i);
++  jlong  long_at(int offset) const       { return *(jlong*)addr_at(offset); }
++
++  static bool is_op (int insn, Assembler::ops_mem op) { return  Assembler::sw2_op(insn) == (int)op; }
++  static bool is_op (int insn, Assembler::ops_opr op) { return  Assembler::sw2_arith_op(insn) == (int)op; }
++  static bool is_op (int insn, Assembler::ops_oprl op) { return  Assembler::sw2_arith_op(insn) == (int)op; }
++  static bool is_op (int insn, Assembler::ops_extra op) { return  Assembler::sw2_mfc_op(insn) == (int)op; }
++  static bool is_op (int insn, Assembler::ops_bra op) { return  Assembler::sw2_op(insn) == (int)op; }
++  static bool is_op (int insn, Assembler::ops_fp op)  { return  Assembler::sw2_op(insn) == (int)op; }
++
++  // This doesn't really do anything on Intel, but it is the place where
++  // cache invalidation belongs, generically:
++  void wrote(int offset);
++
++ public:
++
++  // unit test stuff
++  static void test() {}                 // override for testing
++
++  inline friend NativeInstruction* nativeInstruction_at(address address);
++};
++
++inline NativeInstruction* nativeInstruction_at(address address) {
++  NativeInstruction* inst = (NativeInstruction*)address;
++#ifdef ASSERT
++  //inst->verify();
++#endif
++  return inst;
++}
++
++class NativePltCall: public NativeInstruction {
++public:
++  enum Sw64_specific_constants {
++    instruction_code           = 0xE8,
++    instruction_size           =    5,
++    instruction_offset         =    0,
++    displacement_offset        =    1,
++    return_address_offset      =    5
++  };
++  address instruction_address() const { return addr_at(instruction_offset); }
++  address next_instruction_address() const { return addr_at(return_address_offset); }
++  address displacement_address() const { return addr_at(displacement_offset); }
++  int displacement() const { Unimplemented(); return (jint) int_at(displacement_offset); }
++  address return_address() const { return addr_at(return_address_offset); }
++  address destination() const;
++  address plt_entry() const;
++  address plt_jump() const;
++  address plt_load_got() const;
++  address plt_resolve_call() const;
++  address plt_c2i_stub() const;
++  void set_stub_to_clean();
++
++  void  reset_to_plt_resolve_call();
++  void  set_destination_mt_safe(address dest);
++
++  void verify() const;
++};
++
++inline NativePltCall* nativePltCall_at(address address) {
++  NativePltCall* call = (NativePltCall*) address;
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativePltCall* nativePltCall_before(address addr) {
++  address at = addr - NativePltCall::instruction_size;
++  return nativePltCall_at(at);
++}
++
++// An interface for mov ptr to reg:
++//      ldi
++//      sll
++//      ldih
++//      ldi
++class NativeMovConstReg: public NativeInstruction {
++public:
++  enum Sw64_specific_constants {
++    instruction_size            =    4 * BytesPerInstWord,
++    instruction_offset          =    0,
++    next_instruction_offset     =    instruction_size,
++  };
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  intptr_t data();
++  void  set_data(intptr_t x);
++
++  void  verify();
++  void  print();
++
++  // unit test stuff
++  static void test() {}
++
++  // Creation
++  inline friend NativeMovConstReg* nativeMovConstReg_at(address address);
++  inline friend NativeMovConstReg* nativeMovConstReg_before(address address);
++};
++
++inline NativeMovConstReg* nativeMovConstReg_at(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeMovConstReg* nativeMovConstReg_before(address address) {
++  NativeMovConstReg* test = (NativeMovConstReg*)(address - NativeMovConstReg::instruction_size - NativeMovConstReg::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++inline NativeCall* nativeCall_at(address address);
++
++class NativeCall: public NativeInstruction {
++ public:
++  enum Sw64_specific_constants {
++   // instruction_size            =    5 * BytesPerInstWord,
++    instruction_offset          =    0,
++  //  return_address_offset       =    instruction_size
++  };
++  static int instruction_size; //member variables can be reassigned in the templateTable_sw64.cpp_sw64.cpp when SafePatch is true.
++  static int return_address_offset;
++
++  enum { cache_line_size = BytesPerWord };  // conservative estimate!
++
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(return_address_offset); }
++  address return_address() const            { return addr_at(return_address_offset); }
++  address destination() const;
++  void  set_destination(address dest)       {
++    /*NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0));
++    mov->set_data((intptr_t)dest);*/
++    set_address(dest);
++  }
++  void  set_destination_mt_safe(address dest);
++
++  void  verify_alignment() {  }
++  void  verify();
++  void  print();
++
++  // Creation
++  inline friend NativeCall* nativeCall_at(address address);
++  inline friend NativeCall* nativeCall_before(address return_address);
++
++  static bool is_call_at(address instr) {
++    return nativeInstruction_at(instr)->is_call();
++  }
++
++  static bool is_call_before(address return_address) {
++    return is_call_at(return_address - NativeCall::return_address_offset);
++  }
++
++//  static bool is_call_to(address instr, address target) {
++//    return nativeInstruction_at(instr)->is_call() &&
++//           nativeCall_at(instr)->destination() == target;
++//  }
++
++#if INCLUDE_AOT
++  static bool is_far_call(address instr, address target) {
++    intptr_t disp = target - (instr + sizeof(int32_t));
++    return !Assembler::is_simm32(disp);
++  }
++#endif
++
++  // MT-safe patching of a call instruction.
++  static void insert(address code_pos, address entry);
++
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++};
++
++inline NativeCall* nativeCall_at(address address) {
++  NativeCall* call = (NativeCall*)(address - NativeCall::instruction_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++inline NativeCall* nativeCall_before(address return_address) {
++  NativeCall* call = (NativeCall*)(return_address - NativeCall::return_address_offset);
++#ifdef ASSERT
++  call->verify();
++#endif
++  return call;
++}
++
++//class NativeCallReg: public NativeInstruction {
++// public:
++//  enum Sw64_specific_constants {
++//    instruction_size            = BytesPerInstWord
++//  };
++//
++//  int next_instruction_offset() const  {
++//    return instruction_size;
++//  }
++//};
++
++
++class NativeMovConstRegPatching: public NativeMovConstReg {
++ private:
++  friend NativeMovConstRegPatching* nativeMovConstRegPatching_at(address address) {
++    Unimplemented();
++    NativeMovConstRegPatching* test = (NativeMovConstRegPatching*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++// An interface for accessing/manipulating native moves of the form:
++//      mov[b/w/l/q] [reg + offset], reg   (instruction_code_reg2mem)
++//      mov[b/w/l/q] reg, [reg+offset]     (instruction_code_mem2reg
++//      mov[s/z]x[w/b/q] [reg + offset], reg
++//      fld_s  [reg+offset]
++//      fld_d  [reg+offset]
++//      fstp_s [reg + offset]
++//      fstp_d [reg + offset]
++//      mov_literal64  scratch,<pointer> ; mov[b/w/l/q] 0(scratch),reg | mov[b/w/l/q] reg,0(scratch)
++//
++// Warning: These routines must be able to handle any instruction sequences
++// that are generated as a result of the load/store byte,word,long
++// macros.  For example: The load_unsigned_byte instruction generates
++// an xor reg,reg inst prior to generating the movb instruction.  This
++// class must skip the xor instruction.
++
++class NativeMovRegMem: public NativeInstruction {
++ public:
++  enum Sw64_specific_constants {
++    //instruction_prefix_wide_lo          = Assembler::REX,
++    //instruction_prefix_wide_hi          = Assembler::REX_WRXB,
++    instruction_code_xor                = 0x33,
++    instruction_extended_prefix         = 0x0F,
++    instruction_code_mem2reg_movslq     = 0x63,
++    instruction_code_mem2reg_movzxb     = 0xB6,
++    instruction_code_mem2reg_movsxb     = 0xBE,
++    instruction_code_mem2reg_movzxw     = 0xB7,
++    instruction_code_mem2reg_movsxw     = 0xBF,
++    instruction_operandsize_prefix      = 0x66,
++    instruction_code_reg2mem            = 0x89,
++    instruction_code_mem2reg            = 0x8b,
++    instruction_code_reg2memb           = 0x88,
++    instruction_code_mem2regb           = 0x8a,
++    instruction_code_float_s            = 0xd9,
++    instruction_code_float_d            = 0xdd,
++    instruction_code_long_volatile      = 0xdf,
++    instruction_code_xmm_ss_prefix      = 0xf3,
++    instruction_code_xmm_sd_prefix      = 0xf2,
++    instruction_code_xmm_code           = 0x0f,
++    instruction_code_xmm_load           = 0x10,
++    instruction_code_xmm_store          = 0x11,
++    instruction_code_xmm_lpd            = 0x12,
++
++    instruction_code_lea                = 0x8d,
++
++    //instruction_VEX_prefix_2bytes       = Assembler::VEX_2bytes,
++    //instruction_VEX_prefix_3bytes       = Assembler::VEX_3bytes,
++    //instruction_EVEX_prefix_4bytes      = Assembler::EVEX_4bytes,
++
++    instruction_offset                  = 0,
++    data_offset                         = 2,
++    next_instruction_offset             = 4
++  };
++
++  // helper
++  int instruction_start() const;
++
++  address instruction_address() const {
++    Unimplemented();
++    return addr_at(instruction_start());
++  }
++
++  int num_bytes_to_end_of_patch() const {
++    Unimplemented();
++    return patch_offset() + sizeof(jint);
++  }
++
++  int offset() const {
++    Unimplemented();
++    return int_at(patch_offset());
++  }
++
++  void set_offset(int x) {
++    Unimplemented();
++    set_int_at(patch_offset(), x);
++  }
++
++  void add_offset_in_bytes(int add_offset) {
++    Unimplemented();
++    int patch_off = patch_offset();
++    set_int_at(patch_off, int_at(patch_off) + add_offset);
++  }
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  int patch_offset() const;
++  inline friend NativeMovRegMem* nativeMovRegMem_at (address address);
++};
++
++inline NativeMovRegMem* nativeMovRegMem_at (address address) {
++  Unimplemented();
++  NativeMovRegMem* test = (NativeMovRegMem*)(address - NativeMovRegMem::instruction_offset);
++#ifdef ASSERT
++  test->verify();
++#endif
++  return test;
++}
++
++
++// An interface for accessing/manipulating native leal instruction of form:
++//        leal reg, [reg + offset]
++
++class NativeLoadAddress: public NativeMovRegMem {
++ public:
++  enum Sw64_specific_constants {
++  };
++
++  void verify();
++  void print ();
++
++  // unit test stuff
++  static void test() {}
++
++ private:
++  friend NativeLoadAddress* nativeLoadAddress_at (address address) {
++    Unimplemented();
++    NativeLoadAddress* test = (NativeLoadAddress*)(address - instruction_offset);
++    #ifdef ASSERT
++      test->verify();
++    #endif
++    return test;
++  }
++};
++
++// destination is rbx or rax
++// mov rbx, [rip + offset]
++class NativeLoadGot: public NativeInstruction {
++  static const bool has_rex = true;
++  static const int rex_size = 1;
++public:
++  enum Sw64_specific_constants {
++    rex_prefix = 0x48,
++    instruction_code = 0x8b,
++    modrm_rbx_code = 0x1d,
++    modrm_rax_code = 0x05,
++    instruction_length = 6 + rex_size,
++    offset_offset = 2 + rex_size
++  };
++
++  address instruction_address() const { return addr_at(0); }
++  address rip_offset_address() const { return addr_at(offset_offset); }
++  int rip_offset() const { return int_at(offset_offset); }
++  address return_address() const { return addr_at(instruction_length); }
++  address got_address() const { return return_address() + rip_offset(); }
++  address next_instruction_address() const { return return_address(); }
++  intptr_t data() const;
++  void set_data(intptr_t data) {
++    Unimplemented();
++    intptr_t *addr = (intptr_t *) got_address();
++    *addr = data;
++  }
++
++  void verify() const;
++private:
++  void report_and_fail() const;
++};
++
++inline NativeLoadGot* nativeLoadGot_at(address addr) {
++  Unimplemented();
++  NativeLoadGot* load = (NativeLoadGot*) addr;
++#ifdef ASSERT
++  load->verify();
++#endif
++  return load;
++}
++
++class NativeJump: public NativeInstruction {
++ public:
++  enum Sw64_specific_constants {
++   // instruction_size            =    5 * BytesPerInstWord,
++    instruction_offset          =    0,
++   // next_instruction_offset     =    instruction_size
++  };
++  static int instruction_size; //member variables can be reassigned in the templateTable_sw64.cpp when SafePatch is true.
++  static int next_instruction_offset;
++  address instruction_address() const       { return addr_at(instruction_offset); }
++  address next_instruction_address() const  { return addr_at(next_instruction_offset); }
++  address jump_destination();
++
++  void  set_jump_destination(address dest)  {
++  //  NativeMovConstReg* mov = nativeMovConstReg_at(addr_at(0));
++  //  mov->set_data((intptr_t)dest);
++    set_address(dest);
++  }
++
++  // Creation
++  inline friend NativeJump* nativeJump_at(address address);
++
++  void verify();
++
++  // Unit testing stuff
++  static void test() {}
++
++  // Insertion of native jump instruction
++  static void insert(address code_pos, address entry);
++  // MT-safe insertion of native jump at verified method entry
++  static void check_verified_entry_alignment(address entry, address verified_entry);
++  static void patch_verified_entry(address entry, address verified_entry, address dest);
++};
++
++inline NativeJump* nativeJump_at(address address) {
++  NativeJump* jump = (NativeJump*)(address - NativeJump::instruction_offset);
++#ifdef ASSERT
++  jump->verify();
++#endif
++  return jump;
++}
++
++//// far jump reg
++//class NativeFarJump: public NativeInstruction {
++// public:
++//  address jump_destination() const;
++//
++//  // Creation
++//  inline friend NativeFarJump* nativeFarJump_at(address address);
++//
++//  void verify();
++//
++//  // Unit testing stuff
++//  static void test() {}
++//
++//};
++
++//inline NativeFarJump* nativeFarJump_at(address address) {
++//  NativeFarJump* jump = (NativeFarJump*)(address);
++//#ifdef ASSERT
++//  jump->verify();
++//#endif
++//  return jump;
++//}
++
++// Handles all kinds of jump on Intel. Long/far, conditional/unconditional
++class NativeGeneralJump: public NativeInstruction {
++public:
++  enum Sw64_specific_constants {
++    instruction_offset   =    0,
++    unconditional_long_jump  = 0xe9,
++    unconditional_short_jump = 0xeb,
++    instruction_size = 5
++  };
++
++  address instruction_address() const       { Unimplemented(); return addr_at(0); }
++  address jump_destination()    const;
++  
++  void  set_jump_destination(address dest);
++
++  // Creation
++  inline friend NativeGeneralJump* nativeGeneralJump_at(address address);
++
++  // Insertion of native general jump instruction
++  static void insert_unconditional(address code_pos, address entry);
++  static void replace_mt_safe(address instr_addr, address code_buffer);
++
++  void verify();
++};
++
++inline NativeGeneralJump* nativeGeneralJump_at(address address) {
++  Unimplemented();
++  NativeGeneralJump* jump = (NativeGeneralJump*)(address);
++  debug_only(jump->verify();)
++  return jump;
++}
++
++class NativeGotJump: public NativeInstruction {
++public:
++  enum Sw64_specific_constants {
++    instruction_code = 0xff,
++    instruction_offset = 0,
++    instruction_size = 6,
++    rip_offset = 2
++  };
++
++  void verify() const;
++  address instruction_address() const { Unimplemented(); return addr_at(instruction_offset); }
++  address destination() const;
++  address return_address() const { Unimplemented(); return addr_at(instruction_size); }
++  int got_offset() const { return (jint) int_at(rip_offset); }
++  address got_address() const { return return_address() + got_offset(); }
++  address next_instruction_address() const { return addr_at(instruction_size); }
++  bool is_GotJump() const { return ubyte_at(0) == instruction_code; }
++
++  void set_jump_destination(address dest)  {
++    Unimplemented();
++    address *got_entry = (address *) got_address();
++    *got_entry = dest;
++  }
++};
++
++inline NativeGotJump* nativeGotJump_at(address addr) {
++  Unimplemented();
++  NativeGotJump* jump = (NativeGotJump*)(addr);
++  debug_only(jump->verify());
++  return jump;
++}
++
++class NativePopReg : public NativeInstruction {
++ public:
++  enum Sw64_specific_constants {
++    instruction_code            = 0x58,
++    instruction_size            =    1,
++    instruction_offset          =    0,
++    data_offset                 =    1,
++    next_instruction_offset     =    1
++  };
++
++  // Insert a pop instruction
++  static void insert(address code_pos, Register reg);
++};
++
++
++class NativeIllegalInstruction: public NativeInstruction {
++public:
++  enum Sw64_specific_constants {
++    instruction_code            = 0x0000DEAD,    // Special instruction
++    instruction_size            =    4,          //TODO:not check jzy
++    instruction_offset          =    0,
++    next_instruction_offset     =    4          //TODO:not check jzy
++  };
++
++  // Insert illegal opcode as specific address
++  static void insert(address code_pos);
++};
++
++// return instruction that does not pop values of the stack
++class NativeReturn: public NativeInstruction {
++ public:
++  enum Sw64_specific_constants {
++    instruction_size            =    BytesPerInstWord
++  };
++};
++
++// Simple test vs memory
++class NativeTstRegMem: public NativeInstruction {
++ public:
++  enum Sw64_specific_constants {
++  };
++};
++
++//class NativeCondJump;
++//inline NativeCondJump* nativeCondJump_at(address address);
++//class NativeCondJump: public NativeInstruction {
++// public:
++//  enum Sw64_specific_constants {
++//    instruction_size          = 16,
++//    instruction_offset        = 12,
++//    next_instruction_offset   = 20
++//  };
++//
++//
++//  address instruction_address() const { Unimplemented(); return addr_at(0); }
++//  address next_instruction_address() const { Unimplemented(); return addr_at(next_instruction_offset); }
++//
++//  // Creation
++//  inline friend NativeCondJump* nativeCondJump_at(address address);
++//
++//  address jump_destination()  const {
++//    Unimplemented();
++//    return ::nativeCondJump_at(addr_at(12))->jump_destination();
++//  }
++//
++//  void set_jump_destination(address dest) {
++//    Unimplemented();
++//    ::nativeCondJump_at(addr_at(12))->set_jump_destination(dest);
++//  }
++//
++//};
++//
++//inline NativeCondJump* nativeCondJump_at(address address) {
++//  Unimplemented();
++//  NativeCondJump* jump = (NativeCondJump*)(address);
++//  return jump;
++//}
++
++inline bool NativeInstruction::is_illegal()      { Unimplemented(); return (short)int_at(0) == (short)NativeIllegalInstruction::instruction_code; }
++
++inline bool NativeInstruction::is_call() {
++  if (SafePatch) {
++    return is_op(int_at(20), Assembler::op_call) &&
++           ((is_op(int_at(0), Assembler::op_ldi) &&
++             is_op(int_at(4), Assembler::op_br) &&
++             is_op(int_at(16), Assembler::op_ldl)) ||
++            (is_op(int_at(0), Assembler::op_br) &&
++             is_op(int_at(12), Assembler::op_ldl) &&
++             is_op(int_at(16), Assembler::op_ldi)));
++  } else {
++    if (is_op(int_at(0), Assembler::op_ldi) &&
++        is_op(int_at(4), Assembler::op_slll_l) &&
++        is_op(int_at(8), Assembler::op_ldih) &&
++        is_op(int_at(12), Assembler::op_ldi) &&
++        is_op(int_at(16), Assembler::op_call))
++      return true;
++  }
++
++  if (is_op(int_at(0), Assembler::op_ldih) &&
++      is_op(int_at(4), Assembler::op_ldi) &&
++      is_op(int_at(8), Assembler::op_slll_l) &&
++      is_op(int_at(12), Assembler::op_ldih) &&
++      is_op(int_at(16), Assembler::op_ldi) &&
++    is_op(int_at(16), Assembler::op_call))
++    return true;
++
++  // Unimplemented();
++  return false;
++}
++inline bool NativeInstruction::is_call_reg()     {
++  return is_op(int_at(0), Assembler::op_call);
++}
++inline bool NativeInstruction::is_return()       {
++  return is_op(int_at(NativeMovConstReg::instruction_size), Assembler::op_ret);
++}
++inline bool NativeInstruction::is_jump()         {
++  if (SafePatch) {
++    return is_op(int_at(20), Assembler::op_jmp) &&
++           ((is_op(int_at(0), Assembler::op_ldi) &&
++             is_op(int_at(4), Assembler::op_br) &&
++             is_op(int_at(16), Assembler::op_ldl)) ||
++            (is_op(int_at(0), Assembler::op_br) &&
++             is_op(int_at(12), Assembler::op_ldl) &&
++             is_op(int_at(16), Assembler::op_ldi)));
++  } else {
++    if (is_op(int_at(0), Assembler::op_ldi) &&
++        is_op(int_at(4), Assembler::op_slll_l) &&
++        is_op(int_at(8), Assembler::op_ldih) &&
++        is_op(int_at(12), Assembler::op_ldi))
++      return true;
++  }
++
++  if (is_op(int_at(0), Assembler::op_ldih) &&
++      is_op(int_at(4), Assembler::op_ldi) &&
++      is_op(int_at(8), Assembler::op_slll_l) &&
++      is_op(int_at(12), Assembler::op_ldih) &&
++      is_op(int_at(16), Assembler::op_ldi))
++    return true;
++
++  // Unimplemented();
++  return false;
++}
++inline bool NativeInstruction::is_jump_reg()     {
++  return is_op(int_at(0), Assembler::op_jmp);
++}
++inline bool NativeInstruction::is_safepoint_poll() {
++  //Unimplemented();
++  //refer to relocInfo::poll_return_type in sw64.ad
++  int x = int_at(0);
++  int op = Assembler::sw2_op(x);
++  if (op != Assembler::op_ldw) return false;
++
++  Register ra = Assembler::sw2_ra(x);
++  if (ra != rscratch3) return false; //TODO:refactor jzy
++
++  int mdisp = Assembler::sw2_mdisp(x);
++  if (mdisp != 0) return false;
++
++  return true;
++}
++
++inline bool NativeInstruction::is_mov_ptr() {
++  //wait_until_not_spinng();
++  if ((is_op(int_at(0), Assembler::op_ldi) &&
++       is_op(int_at(4), Assembler::op_slll_l) &&
++       is_op(int_at(8), Assembler::op_ldih) &&
++       is_op(int_at(12), Assembler::op_ldi)) ||
++       (is_op(int_at(0), Assembler::op_ldih) &&
++       is_op(int_at(4), Assembler::op_ldi) &&
++       is_op(int_at(8), Assembler::op_zapnot_l)) ||
++       (is_op(int_at(0), Assembler::op_ldi) &&
++       is_op(int_at(4), Assembler::op_br) &&
++       is_op(int_at(16), Assembler::op_ldl))||
++       (is_op(int_at(0), Assembler::op_br) &&
++       is_op(int_at(12), Assembler::op_ldl) &&
++       is_op(int_at(16), Assembler::op_ldi) )){
++    return true;
++  }
++  return false;
++}
++
++#endif // CPU_SW64_VM_NATIVEINST_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/registerMap_sw64.hpp b/src/hotspot/cpu/sw64/registerMap_sw64.hpp
+new file mode 100644
+index 0000000000..d70e05dd64
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/registerMap_sw64.hpp
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 1998, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_REGISTERMAP_SW64_HPP
++#define CPU_SW64_VM_REGISTERMAP_SW64_HPP
++
++// machine-dependent implemention for register maps
++  friend class frame;
++
++ private:
++  // This is the hook for finding a register in an "well-known" location,
++  // such as a register block of a predetermined format.
++  // Since there is none, we just return NULL.
++  // See registerMap_sparc.hpp for an example of grabbing registers
++  // from register save areas of a standard layout.
++   address pd_location(VMReg reg) const {return NULL;}
++
++  // no PD state to clear or copy:
++  void pd_clear() {}
++  void pd_initialize() {}
++  void pd_initialize_from(const RegisterMap* map) {}
++
++#endif // CPU_SW64_VM_REGISTERMAP_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/register_definitions_sw64.cpp b/src/hotspot/cpu/sw64/register_definitions_sw64.cpp
+new file mode 100644
+index 0000000000..a0e731efb5
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/register_definitions_sw64.cpp
+@@ -0,0 +1,224 @@
++/*
++ * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "asm/register.hpp"
++#include "register_sw64.hpp"
++# include "interp_masm_sw64.hpp"
++
++REGISTER_DEFINITION(Register, noreg);
++
++REGISTER_DEFINITION(Register, i0);
++REGISTER_DEFINITION(Register, i1);
++REGISTER_DEFINITION(Register, i2);
++REGISTER_DEFINITION(Register, i3);
++REGISTER_DEFINITION(Register, i4);
++REGISTER_DEFINITION(Register, i5);
++REGISTER_DEFINITION(Register, i6);
++REGISTER_DEFINITION(Register, i7);
++REGISTER_DEFINITION(Register, i8);
++REGISTER_DEFINITION(Register, i9);
++REGISTER_DEFINITION(Register, i10);
++REGISTER_DEFINITION(Register, i11);
++REGISTER_DEFINITION(Register, i12);
++REGISTER_DEFINITION(Register, i13);
++REGISTER_DEFINITION(Register, i14);
++REGISTER_DEFINITION(Register, i15);
++REGISTER_DEFINITION(Register, i16);
++REGISTER_DEFINITION(Register, i17);
++REGISTER_DEFINITION(Register, i18);
++REGISTER_DEFINITION(Register, i19);
++REGISTER_DEFINITION(Register, i20);
++REGISTER_DEFINITION(Register, i21);
++REGISTER_DEFINITION(Register, i22);
++REGISTER_DEFINITION(Register, i23);
++REGISTER_DEFINITION(Register, i24);
++REGISTER_DEFINITION(Register, i25);
++REGISTER_DEFINITION(Register, i26);
++REGISTER_DEFINITION(Register, i27);
++REGISTER_DEFINITION(Register, i28);
++REGISTER_DEFINITION(Register, i29);
++REGISTER_DEFINITION(Register, i30);
++REGISTER_DEFINITION(Register, i31);
++REGISTER_DEFINITION(Register, sp);
++
++REGISTER_DEFINITION(FloatRegister, fnoreg);
++REGISTER_DEFINITION(FloatRegister, f0);
++REGISTER_DEFINITION(FloatRegister, f1);
++REGISTER_DEFINITION(FloatRegister, f2);
++REGISTER_DEFINITION(FloatRegister, f3);
++REGISTER_DEFINITION(FloatRegister, f4);
++REGISTER_DEFINITION(FloatRegister, f5);
++REGISTER_DEFINITION(FloatRegister, f6);
++REGISTER_DEFINITION(FloatRegister, f7);
++REGISTER_DEFINITION(FloatRegister, f8);
++REGISTER_DEFINITION(FloatRegister, f9);
++REGISTER_DEFINITION(FloatRegister, f10);
++REGISTER_DEFINITION(FloatRegister, f11);
++REGISTER_DEFINITION(FloatRegister, f12);
++REGISTER_DEFINITION(FloatRegister, f13);
++REGISTER_DEFINITION(FloatRegister, f14);
++REGISTER_DEFINITION(FloatRegister, f15);
++REGISTER_DEFINITION(FloatRegister, f16);
++REGISTER_DEFINITION(FloatRegister, f17);
++REGISTER_DEFINITION(FloatRegister, f18);
++REGISTER_DEFINITION(FloatRegister, f19);
++REGISTER_DEFINITION(FloatRegister, f20);
++REGISTER_DEFINITION(FloatRegister, f21);
++REGISTER_DEFINITION(FloatRegister, f22);
++REGISTER_DEFINITION(FloatRegister, f23);
++REGISTER_DEFINITION(FloatRegister, f24);
++REGISTER_DEFINITION(FloatRegister, f25);
++REGISTER_DEFINITION(FloatRegister, f26);
++REGISTER_DEFINITION(FloatRegister, f27);
++REGISTER_DEFINITION(FloatRegister, f28);
++REGISTER_DEFINITION(FloatRegister, f29);
++REGISTER_DEFINITION(FloatRegister, f30);
++REGISTER_DEFINITION(FloatRegister, f31);
++
++REGISTER_DEFINITION(Register, A0);
++REGISTER_DEFINITION(Register, A1);
++REGISTER_DEFINITION(Register, A2);
++REGISTER_DEFINITION(Register, A3);
++REGISTER_DEFINITION(Register, A4);
++REGISTER_DEFINITION(Register, A5);
++
++REGISTER_DEFINITION(FloatRegister, F16);
++REGISTER_DEFINITION(FloatRegister, F17);
++REGISTER_DEFINITION(FloatRegister, F18);
++REGISTER_DEFINITION(FloatRegister, F19);
++REGISTER_DEFINITION(FloatRegister, F20);
++REGISTER_DEFINITION(FloatRegister, F21);
++
++REGISTER_DEFINITION(Register, zr);
++REGISTER_DEFINITION(Register, c_rarg0);
++REGISTER_DEFINITION(Register, c_rarg1);
++REGISTER_DEFINITION(Register, c_rarg2);
++REGISTER_DEFINITION(Register, c_rarg3);
++REGISTER_DEFINITION(Register, c_rarg4);
++REGISTER_DEFINITION(Register, c_rarg5);
++
++REGISTER_DEFINITION(FloatRegister, c_farg0);
++REGISTER_DEFINITION(FloatRegister, c_farg1);
++REGISTER_DEFINITION(FloatRegister, c_farg2);
++REGISTER_DEFINITION(FloatRegister, c_farg3);
++REGISTER_DEFINITION(FloatRegister, c_farg4);
++REGISTER_DEFINITION(FloatRegister, c_farg5);
++
++REGISTER_DEFINITION(Register, j_rarg0);//A1
++REGISTER_DEFINITION(Register, j_rarg1);
++REGISTER_DEFINITION(Register, j_rarg2);
++REGISTER_DEFINITION(Register, j_rarg3);
++REGISTER_DEFINITION(Register, j_rarg4);//A5
++REGISTER_DEFINITION(Register, j_rarg5);//A0
++
++REGISTER_DEFINITION(FloatRegister, j_farg0);//F16
++REGISTER_DEFINITION(FloatRegister, j_farg1);
++REGISTER_DEFINITION(FloatRegister, j_farg2);
++REGISTER_DEFINITION(FloatRegister, j_farg3);
++REGISTER_DEFINITION(FloatRegister, j_farg4);
++REGISTER_DEFINITION(FloatRegister, j_farg5);//F21
++
++REGISTER_DEFINITION(Register, rscratch1);   //t5
++REGISTER_DEFINITION(Register, rscratch2);   //t6
++
++REGISTER_DEFINITION(Register, rscratch3);   //t11
++REGISTER_DEFINITION(Register, rscratch4);   //at
++
++REGISTER_DEFINITION(Register, rscratch1_GP);   //GP
++REGISTER_DEFINITION(Register, rscratch2_AT);   //AT
++REGISTER_DEFINITION(Register, rdispatch);   //t8
++REGISTER_DEFINITION(Register, rnext);       //t10, jdk8 use s1
++REGISTER_DEFINITION(Register, rmonitors);   //t11
++REGISTER_DEFINITION(Register, pv);          //t12 
++//REGISTER_DEFINITION(Register, rcpool);      //t12, ok??
++
++REGISTER_DEFINITION(Register, rbcp);        //s0, consist with jdk8
++REGISTER_DEFINITION(Register, rlocals);     //s1, jdk8 use s5
++REGISTER_DEFINITION(Register, rthread);     //s2, consist with jdk8
++REGISTER_DEFINITION(Register, rmethod);     //s3, consist with jdk8
++REGISTER_DEFINITION(Register, rsender);     //s4, consist with jdk8
++REGISTER_DEFINITION(Register, r12_heapbase);   //s5, jdk8 use t5
++REGISTER_DEFINITION(Register, rcc);         //gp
++
++REGISTER_DEFINITION(Register, RA);
++REGISTER_DEFINITION(Register, esp);
++REGISTER_DEFINITION(Register, lr);
++REGISTER_DEFINITION(Register, rfp);
++
++REGISTER_DEFINITION(Register,      FSR);    //v0, First Stack Register
++REGISTER_DEFINITION(Register,      SSR);    //t4, Second Stack Register
++
++REGISTER_DEFINITION(FloatRegister, FSF);    //f0, First Stack Float
++REGISTER_DEFINITION(FloatRegister, SSF);    //f1, Second Stack Float 
++REGISTER_DEFINITION(FloatRegister, FTF);    //f14, Float temp??
++REGISTER_DEFINITION(FloatRegister, FcmpRES);//f29, TODO:need delete jzy
++REGISTER_DEFINITION(FloatRegister, fcc);//f29
++REGISTER_DEFINITION(FloatRegister, fscratch1);//f28
++REGISTER_DEFINITION(FloatRegister, fzero);//f31
++
++REGISTER_DEFINITION(Register, V0);
++REGISTER_DEFINITION(Register, T0);
++REGISTER_DEFINITION(Register, T1);
++REGISTER_DEFINITION(Register, T2);
++REGISTER_DEFINITION(Register, T3);
++REGISTER_DEFINITION(Register, T4);
++REGISTER_DEFINITION(Register, T5);
++REGISTER_DEFINITION(Register, T6);
++REGISTER_DEFINITION(Register, T7);
++REGISTER_DEFINITION(Register, S0);
++REGISTER_DEFINITION(Register, S1);
++REGISTER_DEFINITION(Register, S2);
++REGISTER_DEFINITION(Register, S3);
++REGISTER_DEFINITION(Register, S4);
++REGISTER_DEFINITION(Register, S5);
++REGISTER_DEFINITION(Register, T8);
++REGISTER_DEFINITION(Register, T9);
++REGISTER_DEFINITION(Register, T10);
++REGISTER_DEFINITION(Register, T11);
++REGISTER_DEFINITION(Register, T12);
++REGISTER_DEFINITION(Register, AT);
++REGISTER_DEFINITION(Register, GP);
++REGISTER_DEFINITION(Register, R0);
++
++// x86 GPR simulation
++REGISTER_DEFINITION(Register, rax);
++REGISTER_DEFINITION(Register, rcx);
++REGISTER_DEFINITION(Register, rdx);
++REGISTER_DEFINITION(Register, rbx);
++REGISTER_DEFINITION(Register, rsi);
++REGISTER_DEFINITION(Register, rdi);
++REGISTER_DEFINITION(Register, rbp);
++REGISTER_DEFINITION(Register, rsp);
++REGISTER_DEFINITION(Register, r8);
++REGISTER_DEFINITION(Register, r9);
++REGISTER_DEFINITION(Register, r10);
++REGISTER_DEFINITION(Register, r11);
++REGISTER_DEFINITION(Register, r12);
++REGISTER_DEFINITION(Register, r13);
++REGISTER_DEFINITION(Register, r14);
++REGISTER_DEFINITION(Register, r15);
+\ No newline at end of file
+diff --git a/src/hotspot/cpu/sw64/register_sw64.cpp b/src/hotspot/cpu/sw64/register_sw64.cpp
+new file mode 100644
+index 0000000000..b3c2870071
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/register_sw64.cpp
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "register_sw64.hpp"
++
++const int ConcreteRegisterImpl::max_gpr = RegisterImpl::number_of_registers << 1;
++
++const int ConcreteRegisterImpl::max_fpr
++  = ConcreteRegisterImpl::max_gpr + (FloatRegisterImpl::number_of_registers << 1);
++
++const char* RegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "V0", "T0", "T1", "T2", "T3", "T4", "T5", "T6", "T7",
++    "S0", "S1", "S2", "S3", "S4", "S5",
++    "rfp", "A0", "A1", "A2", "A3", "A4", "A5",
++    "T8", "T9", "T10", "T11",
++    "RA", "T12", "AT", "GP", "esp", "Zero"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
++
++const char* FloatRegisterImpl::name() const {
++  const char* names[number_of_registers] = {
++    "f0", "f1", "f2", "f3", "f4", "f5", "f6", "f7",
++    "f8", "f9", "f10", "f11", "f12", "f13", "f14", "f15",
++    "f16", "f17", "f18", "f19", "f20", "f21", "f22", "f23",
++    "f24", "f25", "f26", "f27", "f28", "f29", "f30", "f31"
++  };
++  return is_valid() ? names[encoding()] : "noreg";
++}
+diff --git a/src/hotspot/cpu/sw64/register_sw64.hpp b/src/hotspot/cpu/sw64/register_sw64.hpp
+new file mode 100644
+index 0000000000..3fa76a6249
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/register_sw64.hpp
+@@ -0,0 +1,261 @@
++/*
++ * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_REGISTER_SW64_HPP
++#define CPU_SW64_VM_REGISTER_SW64_HPP
++
++#include "asm/register.hpp"
++
++class VMRegImpl;
++typedef VMRegImpl* VMReg;
++
++// Use Register as shortcut
++class RegisterImpl;
++typedef RegisterImpl* Register;
++
++inline Register as_Register(int encoding) {
++  return (Register)(intptr_t) encoding;
++}
++
++class RegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    number_of_registers         =   32,
++    number_of_byte_registers      = 32,
++    number_of_registers_for_jvmci = 34   // Including SP and ZR.
++  };
++
++  // derived registers, offsets, and addresses
++  Register successor() const                          { return as_Register(encoding() + 1); }
++
++  // construction
++  inline friend Register as_Register(int encoding);
++
++  VMReg as_VMReg();
++
++  // accessors
++  int   encoding() const          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  bool  is_valid() const          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++  int   encoding_nocheck() const                 { return (intptr_t)this; }
++
++  // Return the bit which represents this register.  This is intended
++  // to be ORed into a bitmask: for usage see class RegSet below.
++  unsigned long bit(bool should_set = true) const { return should_set ? 1 << encoding() : 0; }
++};
++
++// The integer registers of the sw64 architecture
++
++CONSTANT_REGISTER_DECLARATION(Register, noreg, (-1));
++
++
++CONSTANT_REGISTER_DECLARATION(Register, i0,    (0));
++CONSTANT_REGISTER_DECLARATION(Register, i1,    (1));
++CONSTANT_REGISTER_DECLARATION(Register, i2,    (2));
++CONSTANT_REGISTER_DECLARATION(Register, i3,    (3));
++CONSTANT_REGISTER_DECLARATION(Register, i4,    (4));
++CONSTANT_REGISTER_DECLARATION(Register, i5,    (5));
++CONSTANT_REGISTER_DECLARATION(Register, i6,    (6));
++CONSTANT_REGISTER_DECLARATION(Register, i7,    (7));
++CONSTANT_REGISTER_DECLARATION(Register, i8,    (8));
++CONSTANT_REGISTER_DECLARATION(Register, i9,    (9));
++CONSTANT_REGISTER_DECLARATION(Register, i10,   (10));
++CONSTANT_REGISTER_DECLARATION(Register, i11,   (11));
++CONSTANT_REGISTER_DECLARATION(Register, i12,   (12));
++CONSTANT_REGISTER_DECLARATION(Register, i13,   (13));
++CONSTANT_REGISTER_DECLARATION(Register, i14,   (14));
++CONSTANT_REGISTER_DECLARATION(Register, i15,   (15));
++CONSTANT_REGISTER_DECLARATION(Register, i16,   (16));
++CONSTANT_REGISTER_DECLARATION(Register, i17,   (17));
++CONSTANT_REGISTER_DECLARATION(Register, i18,   (18));
++CONSTANT_REGISTER_DECLARATION(Register, i19,   (19));
++CONSTANT_REGISTER_DECLARATION(Register, i20,   (20));
++CONSTANT_REGISTER_DECLARATION(Register, i21,   (21));
++CONSTANT_REGISTER_DECLARATION(Register, i22,   (22));
++CONSTANT_REGISTER_DECLARATION(Register, i23,   (23));
++CONSTANT_REGISTER_DECLARATION(Register, i24,   (24));
++CONSTANT_REGISTER_DECLARATION(Register, i25,   (25));
++CONSTANT_REGISTER_DECLARATION(Register, i26,   (26));
++CONSTANT_REGISTER_DECLARATION(Register, i27,   (27));
++CONSTANT_REGISTER_DECLARATION(Register, i28,   (28));
++CONSTANT_REGISTER_DECLARATION(Register, i29,   (29));
++CONSTANT_REGISTER_DECLARATION(Register, i30,   (30));
++CONSTANT_REGISTER_DECLARATION(Register, i31,   (31));
++
++
++// r31 is not a general purpose register, but represents either the
++// stack pointer or the zero/discard register depending on the
++// instruction.
++//CONSTANT_REGISTER_DECLARATION(Register, r31_sp, (31));
++CONSTANT_REGISTER_DECLARATION(Register, zr,  (31));
++CONSTANT_REGISTER_DECLARATION(Register, sp,  (30));
++
++// Used as a filler in instructions where a register field is unused.
++const Register dummy_reg = zr;
++
++// Use FloatRegister as shortcut
++class FloatRegisterImpl;
++typedef FloatRegisterImpl* FloatRegister;
++
++inline FloatRegister as_FloatRegister(int encoding) {
++  return (FloatRegister)(intptr_t) encoding;
++}
++
++// The implementation of floating point registers for the architecture
++class FloatRegisterImpl: public AbstractRegisterImpl {
++ public:
++  enum {
++    float_arg_base      = 16,
++    number_of_registers = 32
++  };
++
++  // construction
++  inline friend FloatRegister as_FloatRegister(int encoding);
++
++  VMReg as_VMReg();
++
++  // derived registers, offsets, and addresses
++  FloatRegister successor() const                          { return as_FloatRegister(encoding() + 1); }
++
++  // accessors
++  int   encoding() const                          { assert(is_valid(), "invalid register"); return (intptr_t)this; }
++  int   encoding_nocheck() const                         { return (intptr_t)this; }
++  bool  is_valid() const                          { return 0 <= (intptr_t)this && (intptr_t)this < number_of_registers; }
++  const char* name() const;
++
++};
++
++// The float registers of the SW64 architecture
++CONSTANT_REGISTER_DECLARATION(FloatRegister, fnoreg , (-1));
++
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f0     , ( 0));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f1     , ( 1));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f2     , ( 2));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f3     , ( 3));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f4     , ( 4));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f5     , ( 5));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f6     , ( 6));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f7     , ( 7));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f8     , ( 8));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f9     , ( 9));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f10    , (10));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f11    , (11));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f12    , (12));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f13    , (13));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f14    , (14));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f15    , (15));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f16    , (16));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f17    , (17));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f18    , (18));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f19    , (19));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f20    , (20));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f21    , (21));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f22    , (22));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f23    , (23));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f24    , (24));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f25    , (25));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f26    , (26));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f27    , (27));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f28    , (28));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f29    , (29));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f30    , (30));
++CONSTANT_REGISTER_DECLARATION(FloatRegister, f31    , (31));
++
++// Need to know the total number of registers of all sorts for SharedInfo.
++// Define a class that exports it.
++class ConcreteRegisterImpl : public AbstractRegisterImpl {
++ public:
++  enum {
++  // A big enough number for C2: all the registers plus flags
++  // This number must be large enough to cover REG_COUNT (defined by c2) registers.
++  // There is no requirement that any ordering here matches any ordering c2 gives
++  // it's optoregs.
++
++    number_of_registers = (2 * RegisterImpl::number_of_registers +
++                           4 * FloatRegisterImpl::number_of_registers +
++                           1) // flags
++  };
++
++  // added to make it compile
++  static const int max_gpr;
++  static const int max_fpr;
++};
++
++// A set of registers
++class RegSet {
++  uint32_t _bitset;
++
++  RegSet(uint32_t bitset) : _bitset(bitset) { }
++
++public:
++
++  RegSet() : _bitset(0) { }
++
++  RegSet(Register r1) : _bitset(r1->bit()) { }
++
++  RegSet operator+(const RegSet aSet) const {
++    RegSet result(_bitset | aSet._bitset);
++    return result;
++  }
++
++  RegSet operator-(const RegSet aSet) const {
++    RegSet result(_bitset & ~aSet._bitset);
++    return result;
++  }
++
++  RegSet &operator+=(const RegSet aSet) {
++    *this = *this + aSet;
++    return *this;
++  }
++
++  static RegSet of(Register r1) {
++    return RegSet(r1);
++  }
++
++  static RegSet of(Register r1, Register r2) {
++    return of(r1) + r2;
++  }
++
++  static RegSet of(Register r1, Register r2, Register r3) {
++    return of(r1, r2) + r3;
++  }
++
++  static RegSet of(Register r1, Register r2, Register r3, Register r4) {
++    return of(r1, r2, r3) + r4;
++  }
++
++  static RegSet range(Register start, Register end) {
++    uint32_t bits = ~0;
++    bits <<= start->encoding();
++    bits <<= 31 - end->encoding();
++    bits >>= 31 - end->encoding();
++
++    return RegSet(bits);
++  }
++
++  uint32_t bits() const { return _bitset; }
++};
++
++#endif // CPU_SW64_VM_REGISTER_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/relocInfo_sw64.cpp b/src/hotspot/cpu/sw64/relocInfo_sw64.cpp
+new file mode 100755
+index 0000000000..f0302a751f
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/relocInfo_sw64.cpp
+@@ -0,0 +1,130 @@
++/*
++ * Copyright (c) 1998, 2017, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "code/relocInfo.hpp"
++#include "nativeInst_sw64.hpp"
++#include "oops/compressedOops.inline.hpp"
++#include "oops/klass.inline.hpp"
++#include "oops/oop.inline.hpp"
++#include "runtime/safepoint.hpp"
++#include "runtime/safepointMechanism.hpp"
++
++
++void Relocation::pd_set_data_value(address x, intptr_t o, bool verify_only) {
++  x += o;
++  typedef Assembler::WhichOperand WhichOperand;
++  WhichOperand which = (WhichOperand) format(); // that is, disp32 or imm, call32, narrow oop
++  assert(which == Assembler::disp32_operand ||
++         which == Assembler::narrow_oop_operand ||
++         which == Assembler::imm_operand, "format unpacks ok");
++  if (which == Assembler::imm_operand) {
++    if (verify_only) {
++      assert(nativeMovConstReg_at(addr())->data() == (long)x, "instructions must match");
++    } else {
++      nativeMovConstReg_at(addr())->set_data((intptr_t)(x));
++    }
++  } else if (which == Assembler::narrow_oop_operand) {
++    // Unimplemented();
++    // both compressed oops and compressed classes look the same
++    if (Universe::heap()->is_in_reserved((oop)x)) {
++      if (verify_only) {
++        assert((int32_t)nativeMovConstReg_at(addr())->data() == (int32_t)CompressedOops::encode((oop)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(CompressedOops::encode((oop)x)));
++      }
++    } else {
++      if (verify_only) {
++        assert((int32_t)nativeMovConstReg_at(addr())->data() == (int32_t)Klass::encode_klass((Klass*)x), "instructions must match");
++      } else {
++        nativeMovConstReg_at(addr())->set_data((intptr_t)(Klass::encode_klass((Klass*)x)));
++      }
++    }
++  } else {
++    // Note:  Use runtime_call_type relocations for call32_operand.
++    Unimplemented();
++    assert(0, "call32_operand not supported in SW64");
++ }
++}
++
++
++//NOTICE HERE, this relocate is not need for SW64, since SW64 USE abosolutly target,
++//Maybe We should FORGET CALL RELOCATION
++address Relocation::pd_call_destination(address orig_addr) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    return nativeCall_at(addr())->destination();
++  } else if (ni->is_jump()) {
++    return nativeJump_at(addr())->jump_destination();
++  } else {
++    tty->print_cr("\nError!\ncall destination: 0x%lx", (long)addr());
++    Disassembler::decode(addr() - 10 * 4, addr() + 10 * 4, tty);
++    Unimplemented();
++    return NULL;
++  }
++}
++
++
++void Relocation::pd_set_call_destination(address x) {
++  NativeInstruction* ni = nativeInstruction_at(addr());
++  if (ni->is_call()) {
++    nativeCall_at(addr())->set_destination(x);
++  } else if (ni->is_jump()) {
++    NativeJump* nj = nativeJump_at(addr());
++
++    // Unresolved jumps are recognized by a destination of -1
++    // However 64bit can't actually produce such an address
++    // and encodes a jump to self but jump_destination will
++    // return a -1 as the signal. We must not relocate this
++    // jmp or the ic code will not see it as unresolved.
++
++    if (nj->jump_destination() == (address) -1) {
++      x = addr(); // jump to self
++    }
++    nj->set_jump_destination(x);
++  } else {
++    ShouldNotReachHere();
++  }
++}
++
++
++address* Relocation::pd_address_in_code() {
++  Unimplemented();
++  return (address*)addr();
++}
++
++
++address Relocation::pd_get_address_from_code() {
++  NativeMovConstReg* ni = nativeMovConstReg_at(addr());
++  return (address)ni->data();
++}
++
++
++
++void poll_Relocation::fix_relocation_after_move(const CodeBuffer* src, CodeBuffer* dest) {
++}
++
++void metadata_Relocation::pd_fix_value(address x) {
++}
+diff --git a/src/hotspot/cpu/sw64/relocInfo_sw64.hpp b/src/hotspot/cpu/sw64/relocInfo_sw64.hpp
+new file mode 100644
+index 0000000000..bebf11b307
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/relocInfo_sw64.hpp
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_RELOCINFO_SW64_HPP
++#define CPU_SW64_VM_RELOCINFO_SW64_HPP
++
++  // machine-dependent parts of class relocInfo
++ private:
++  enum {
++    // Since SW64 instructions are whole words,
++    // the two low-order offset bits can always be discarded.
++    offset_unit        =  4,
++
++    // imm_oop_operand vs. narrow_oop_operand
++    format_width       =  2
++  };
++
++ public:
++
++  // This platform has no oops in the code that are not also
++  // listed in the oop section.
++  static bool mustIterateImmediateOopsInCode() { return false; }
++
++#endif // CPU_SW64_VM_RELOCINFO_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/runtime_sw64.cpp b/src/hotspot/cpu/sw64/runtime_sw64.cpp
+new file mode 100644
+index 0000000000..940a7c403a
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/runtime_sw64.cpp
+@@ -0,0 +1,188 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#ifdef COMPILER2
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "code/vmreg.hpp"
++#include "interpreter/interpreter.hpp"
++#include "opto/runtime.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/globalDefinitions.hpp"
++#include "vmreg_sw64.inline.hpp"
++#endif
++
++#define __ masm->
++
++//-------------- generate_exception_blob -----------
++// creates _exception_blob.
++// The exception blob is jumped to from a compiled method.
++// (see emit_exception_handler in sparc.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jump, and left with a jump.
++//
++// Arguments:
++//   V0: exception oop
++//   T4: exception pc
++//
++// Results:
++//   A0: exception oop
++//   A1: exception pc in caller or ???
++//   jumps to: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//
++//  [stubGenerator_sw64.cpp] generate_forward_exception()
++//       |- V0, T4 are created
++//       |- T12 <= SharedRuntime::exception_handler_for_return_address
++//       `- jr T12
++//            `- the caller's exception_handler
++//                  `- jr OptoRuntime::exception_blob
++//                         `- here
++//
++//void OptoRuntime::generate_exception_blob() {
++//  // Capture info about frame layout
++//  enum layout {
++//    fp_off,
++//    return_off,                 // slot for return address
++//    framesize
++//  };
++//
++//  // allocate space for the code
++//  ResourceMark rm;
++//  // setup code generation tools
++//  CodeBuffer   buffer("exception_blob", 5120, 5120);
++//  MacroAssembler* masm = new MacroAssembler(&buffer);
++//
++//
++//  address start = __ pc();
++//
++//  __ addiu(esp, -1 * framesize * wordSize, esp);   // Prolog!
++//
++//  // this frame will be treated as the original caller method.
++//  // So, the return pc should be filled with the original exception pc.
++//  //   ref: X86's implementation
++//  __ stl(T4, return_off  *wordSize, esp);	// return address
++//  __ stl(rfp, fp_off  *wordSize, esp);
++//
++//  // Save callee saved registers.  None for UseSSE=0,
++//  // floats-only for UseSSE=1, and doubles for UseSSE=2.
++//
++//  __ addiu(esp, fp_off * wordSize, rfp);
++//
++//  // Store exception in Thread object. We cannot pass any arguments to the
++//  // handle_exception call, since we do not want to make any assumption
++//  // about the size of the frame where the exception happened in.
++//  Register thread = rthread;
++//
++//  __ std(V0, Address(thread, JavaThread::exception_oop_offset()));
++//  __ std(T4, Address(thread, JavaThread::exception_pc_offset()));
++//
++//  // This call does all the hard work.  It checks if an exception handler
++//  // exists in the method.
++//  // If so, it returns the handler address.
++//  // If not, it prepares for stack-unwinding, restoring the callee-save
++//  // registers of the frame being removed.
++//  //no matching function for call to 'MacroAssembler::set_last_Java_frame(RegisterImpl*&, RegisterImpl* const&, RegisterImpl* const&, address)
++////  __ set_last_Java_frame(thread, noreg, noreg, (address)NULL);
++//
++//  __ mov(AT, -(StackAlignmentInBytes));
++//  __ andr(esp, esp, AT);   // Fix stack alignment as required by ABI
++//
++//#ifdef ZHJ20180909
++//  __ relocate(relocInfo::internal_pc_type);
++//  {
++//    // patchable_set48 (4) + sd (1) + move (1) + patchable_call_setfpec1
++//    long save_pc = (long)__ pc() +  24 + NativeCall::return_address_offset;
++//    __ patchable_set48(AT, save_pc);
++//  }
++//#else
++//  {
++//    // addl (1) + sd (1) + move(1) + patchable_call_setfpec1
++//    intptr_t patch_off =  3 * BytesPerInstWord + NativeCall::return_address_offset;
++//    __ br(AT, 0);
++//    __ addl(AT, patch_off, AT);
++//  }
++//#endif
++//  __ stl(AT, in_bytes(JavaThread::last_Java_pc_offset()), thread);
++//
++//  __ move(A0, thread);
++//  __ patchable_call_setfpec1((address)OptoRuntime::handle_exception_C);
++//
++//  // Set an oopmap for the call site
++//  OopMapSet *oop_maps = new OopMapSet();
++//  OopMap* map =  new OopMap( framesize, 0 );
++//
++//  oop_maps->add_gc_map( __ offset() - 4, map);
++//
++//  __ reset_last_Java_frame(thread, true);
++//
++//  // Pop self-frame.
++//  __ leave();     // Epilog!
++//
++//  // V0: exception handler
++//
++//  // We have a handler in V0, (could be deopt blob)
++//  __ move(T12, V0);
++//
++//  // Get the exception
++//  __ ld(A0, Address(thread, JavaThread::exception_oop_offset()));
++//  // Get the exception pc in case we are deoptimized
++//  __ ld(A1, Address(thread, JavaThread::exception_pc_offset()));
++//#ifdef ASSERT
++//  __ std(R0, Address(thread, JavaThread::exception_handler_pc_offset()));
++//  __ std(R0, Address(thread, JavaThread::exception_pc_offset()));
++//#endif
++//  // Clear the exception oop so GC no longer processes it as a root.
++//  __ std(R0, Address(thread, JavaThread::exception_oop_offset()));
++//
++//  // Fix seg fault when running:
++//  //    Eclipse + Plugin + Debug As
++//  //  This is the only condition where C2 calls SharedRuntime::generate_deopt_blob()
++//  //
++//  __ move(V0, A0);
++//  __ move(T4, A1);
++//
++//  // V0: exception oop
++//  // T12: exception handler
++//  // A1: exception pc
++//  __ jr(T12);
++//
++//  // make sure all code is generated
++//  masm->flush();
++//
++//  _exception_blob = ExceptionBlob::create(&buffer, oop_maps, framesize);
++//}
+diff --git a/src/hotspot/cpu/sw64/sharedRuntime_sw64.cpp b/src/hotspot/cpu/sw64/sharedRuntime_sw64.cpp
+new file mode 100644
+index 0000000000..29307b90d1
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/sharedRuntime_sw64.cpp
+@@ -0,0 +1,4578 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "code/debugInfoRec.hpp"
++#include "code/icBuffer.hpp"
++#include "code/nativeInst.hpp"
++#include "code/vtableStubs.hpp"
++#include "gc/shared/gcLocker.hpp"
++#include "interpreter/interpreter.hpp"
++#include "logging/log.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/vframeArray.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/align.hpp"
++#include "utilities/formatBuffer.hpp"
++#include "utilities/macros.hpp"
++#include "vmreg_sw64.inline.hpp"
++#ifdef COMPILER1
++#include "c1/c1_Runtime1.hpp"
++#endif
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++#if INCLUDE_JVMCI
++#include "jvmci/jvmciJavaClasses.hpp"
++#endif
++#if INCLUDE_SHENANDOAHGC
++#include "gc/shenandoah/shenandoahBarrierSet.hpp"
++#include "gc/shenandoah/shenandoahBarrierSetAssembler.hpp"
++#endif
++
++#define __ masm->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++const int StackAlignmentInSlots = StackAlignmentInBytes / VMRegImpl::stack_slot_size;
++
++class SimpleRuntimeFrame {
++
++  public:
++
++  // Most of the runtime stubs have this simple frame layout.
++  // This class exists to make the layout shared in one place.
++  // Offsets are for compiler stack slots, which are jints.
++  enum layout {
++    // The frame sender code expects that rbp will be in the "natural" place and
++    // will override any oopMap setting for it. We must therefore force the layout
++    // so that it agrees with the frame sender code.
++//    rfp_off = frame::arg_reg_save_area_bytes/BytesPerInt,//not understand? jzy
++    rfp_off = 0,
++    rfp_off2,
++    return_off, return_off2,
++    framesize
++  };
++};
++     
++class RegisterSaver {
++public:
++  enum { FPU_regs_live = 32 };
++  // Capture info about frame layout
++  enum layout {
++#define DEF_LAYOUT_OFFS(regname)  regname ## _off,  regname ## H_off,
++    DEF_LAYOUT_OFFS(for_16_bytes_aligned)
++    DEF_LAYOUT_OFFS(fpr0)
++    DEF_LAYOUT_OFFS(fpr1)
++    DEF_LAYOUT_OFFS(fpr2)
++    DEF_LAYOUT_OFFS(fpr3)
++    DEF_LAYOUT_OFFS(fpr4)
++    DEF_LAYOUT_OFFS(fpr5)
++    DEF_LAYOUT_OFFS(fpr6)
++    DEF_LAYOUT_OFFS(fpr7)
++    DEF_LAYOUT_OFFS(fpr8)
++    DEF_LAYOUT_OFFS(fpr9)
++    DEF_LAYOUT_OFFS(fpr10)
++    DEF_LAYOUT_OFFS(fpr11)
++    DEF_LAYOUT_OFFS(fpr12)
++    DEF_LAYOUT_OFFS(fpr13)
++    DEF_LAYOUT_OFFS(fpr14)
++    DEF_LAYOUT_OFFS(fpr15)
++    DEF_LAYOUT_OFFS(fpr16)
++    DEF_LAYOUT_OFFS(fpr17)
++    DEF_LAYOUT_OFFS(fpr18)
++    DEF_LAYOUT_OFFS(fpr19)
++    DEF_LAYOUT_OFFS(fpr20)
++    DEF_LAYOUT_OFFS(fpr21)
++    DEF_LAYOUT_OFFS(fpr22)
++    DEF_LAYOUT_OFFS(fpr23)
++    DEF_LAYOUT_OFFS(fpr24)
++    DEF_LAYOUT_OFFS(fpr25)
++    DEF_LAYOUT_OFFS(fpr26)
++    DEF_LAYOUT_OFFS(fpr27)
++    DEF_LAYOUT_OFFS(fpr28)
++    DEF_LAYOUT_OFFS(fpr29)
++    DEF_LAYOUT_OFFS(fpr30)
++    DEF_LAYOUT_OFFS(fpr31)
++
++    DEF_LAYOUT_OFFS(v0)
++    DEF_LAYOUT_OFFS(t0)
++    DEF_LAYOUT_OFFS(t1)
++    DEF_LAYOUT_OFFS(t2)
++    DEF_LAYOUT_OFFS(t3)
++    DEF_LAYOUT_OFFS(t4)
++    DEF_LAYOUT_OFFS(t5)
++    DEF_LAYOUT_OFFS(t6)
++    DEF_LAYOUT_OFFS(t7)
++    DEF_LAYOUT_OFFS(s0)
++    DEF_LAYOUT_OFFS(s1)
++    DEF_LAYOUT_OFFS(s2)
++    DEF_LAYOUT_OFFS(s3)
++    DEF_LAYOUT_OFFS(s4)
++    DEF_LAYOUT_OFFS(s5)
++    // rfp move down
++    DEF_LAYOUT_OFFS(a0)
++    DEF_LAYOUT_OFFS(a1)
++    DEF_LAYOUT_OFFS(a2)
++    DEF_LAYOUT_OFFS(a3)
++    DEF_LAYOUT_OFFS(a4)
++    DEF_LAYOUT_OFFS(a5)
++    DEF_LAYOUT_OFFS(t8)
++    DEF_LAYOUT_OFFS(t9)
++    DEF_LAYOUT_OFFS(t10)
++    DEF_LAYOUT_OFFS(t11)
++    // RA move down
++    DEF_LAYOUT_OFFS(t12)
++    // no AT
++    DEF_LAYOUT_OFFS(gp)
++    // no esp
++    // no R0
++    DEF_LAYOUT_OFFS(fp)
++    DEF_LAYOUT_OFFS(return)
++    reg_save_size
++  };
++
++  public:
++  static OopMap* save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors = false);
++  static void restore_live_registers(MacroAssembler* masm, bool restore_vectors = false);
++
++  //static int raOffset(void) { return return_off / 2; }
++  //static int methodOffset(void) { return s3_off / 2; }
++  //static int v0Offset(void) { return v0_off / 2; }
++
++  //static int fpResultOffset_todelete(void)             { ShouldNotReachHere();return fpr0_off / 2; }
++  static int v0_offset_in_bytes(void)         { return BytesPerInt * v0_off; }
++  static int a2_offset_in_bytes(void)         { return a2_off / 2; }
++  static int rmethod_offset_in_bytes(void)    { return BytesPerInt * s3_off; }
++  static int fsf_offset_in_bytes(void)        { return BytesPerInt * fpr0_off; }
++  static int return_offset_in_bytes(void)     { return BytesPerInt * return_off; }
++  // During deoptimization only the result registers need to be restored,
++  // all the other values have already been extracted.
++  static void restore_result_registers(MacroAssembler* masm);
++};
++
++//put here becauseof RegisterSaver's layout
++static void push_CPU_state(MacroAssembler* masm) {
++  __ subptr(esp, (RegisterSaver::reg_save_size-4) * jintSize, esp);
++
++  __ fstd(f0, RegisterSaver::fpr0_off * jintSize, esp); __ fstd(f1, RegisterSaver::fpr1_off * jintSize, esp);
++  __ fstd(f2, RegisterSaver::fpr2_off * jintSize, esp); __ fstd(f3, RegisterSaver::fpr3_off * jintSize, esp);
++  __ fstd(f4, RegisterSaver::fpr4_off * jintSize, esp); __ fstd(f5, RegisterSaver::fpr5_off * jintSize, esp);
++  __ fstd(f6, RegisterSaver::fpr6_off * jintSize, esp);  __ fstd(f7, RegisterSaver::fpr7_off * jintSize, esp);
++  __ fstd(f8, RegisterSaver::fpr8_off * jintSize, esp);  __ fstd(f9, RegisterSaver::fpr9_off * jintSize, esp);
++  __ fstd(f10, RegisterSaver::fpr10_off * jintSize, esp); __ fstd(f11, RegisterSaver::fpr11_off * jintSize, esp);
++  __ fstd(f12, RegisterSaver::fpr12_off * jintSize, esp); __ fstd(f13, RegisterSaver::fpr13_off * jintSize, esp);
++  __ fstd(f14, RegisterSaver::fpr14_off * jintSize, esp); __ fstd(f15, RegisterSaver::fpr15_off * jintSize, esp);
++  __ fstd(f16, RegisterSaver::fpr16_off * jintSize, esp); __ fstd(f17, RegisterSaver::fpr17_off * jintSize, esp);
++  __ fstd(f18, RegisterSaver::fpr18_off * jintSize, esp); __ fstd(f19, RegisterSaver::fpr19_off * jintSize, esp);
++  __ fstd(f20, RegisterSaver::fpr20_off * jintSize, esp); __ fstd(f21, RegisterSaver::fpr21_off * jintSize, esp);
++  __ fstd(f22, RegisterSaver::fpr22_off * jintSize, esp); __ fstd(f23, RegisterSaver::fpr23_off * jintSize, esp);
++  __ fstd(f24, RegisterSaver::fpr24_off * jintSize, esp); __ fstd(f25, RegisterSaver::fpr25_off * jintSize, esp);
++  __ fstd(f26, RegisterSaver::fpr26_off * jintSize, esp); __ fstd(f27, RegisterSaver::fpr27_off * jintSize, esp);
++  __ fstd(f28, RegisterSaver::fpr28_off * jintSize, esp); __ fstd(f29, RegisterSaver::fpr29_off * jintSize, esp);
++  __ fstd(f30, RegisterSaver::fpr30_off * jintSize, esp);
++
++  __ stl(V0, Address(esp, RegisterSaver::v0_off * jintSize));
++  __ stl(i1, Address(esp, RegisterSaver::t0_off * jintSize));
++  __ stl(i2, Address(esp, RegisterSaver::t1_off * jintSize));
++  __ stl(i3, Address(esp, RegisterSaver::t2_off * jintSize));
++  __ stl(i4, Address(esp, RegisterSaver::t3_off * jintSize));
++  __ stl(i5, Address(esp, RegisterSaver::t4_off * jintSize));
++  __ stl(i6, Address(esp, RegisterSaver::t5_off * jintSize));
++  __ stl(i7, Address(esp, RegisterSaver::t6_off * jintSize));
++  __ stl(i8, Address(esp, RegisterSaver::t7_off * jintSize));
++  __ stl(i9, Address(esp, RegisterSaver::s0_off * jintSize));
++  __ stl(i10, Address(esp, RegisterSaver::s1_off * jintSize));
++  __ stl(i11, Address(esp, RegisterSaver::s2_off * jintSize));
++  __ stl(i12, Address(esp, RegisterSaver::s3_off * jintSize));
++  __ stl(i13, Address(esp, RegisterSaver::s4_off * jintSize));
++  __ stl(i14, Address(esp, RegisterSaver::s5_off * jintSize));
++  __ stl(i16, Address(esp, RegisterSaver::a0_off * jintSize));
++  __ stl(i17, Address(esp, RegisterSaver::a1_off * jintSize));
++  __ stl(i18, Address(esp, RegisterSaver::a2_off * jintSize));
++  __ stl(i19, Address(esp, RegisterSaver::a3_off * jintSize));
++  __ stl(i20, Address(esp, RegisterSaver::a4_off * jintSize));
++  __ stl(i21, Address(esp, RegisterSaver::a5_off * jintSize));
++  __ stl(i22, Address(esp, RegisterSaver::t8_off * jintSize));
++  __ stl(i23, Address(esp, RegisterSaver::t9_off * jintSize));
++  __ stl(i24, Address(esp, RegisterSaver::t10_off * jintSize));
++  __ stl(i25, Address(esp, RegisterSaver::t11_off * jintSize));
++  __ stl(i27, Address(esp, RegisterSaver::t12_off * jintSize));
++
++  __ stl(GP, Address(esp, RegisterSaver::gp_off * jintSize));
++  //__ stl(rfp, Address(esp, RegisterSaver::fp_off * jintSize));
++  //__ stl(RA, Address(esp, RegisterSaver::return_off * jintSize));
++}
++
++static void pop_CPU_state(MacroAssembler* masm) {
++  __ fldd(f0, RegisterSaver::fpr0_off * jintSize, esp); __ fldd(f1, RegisterSaver::fpr1_off * jintSize, esp);
++  __ fldd(f2, RegisterSaver::fpr2_off * jintSize, esp); __ fldd(f3, RegisterSaver::fpr3_off * jintSize, esp);
++  __ fldd(f4, RegisterSaver::fpr4_off * jintSize, esp); __ fldd(f5, RegisterSaver::fpr5_off * jintSize, esp);
++  __ fldd(f6, RegisterSaver::fpr6_off * jintSize, esp);  __ fldd(f7, RegisterSaver::fpr7_off * jintSize, esp);
++  __ fldd(f8, RegisterSaver::fpr8_off * jintSize, esp);  __ fldd(f9, RegisterSaver::fpr9_off * jintSize, esp);
++  __ fldd(f10, RegisterSaver::fpr10_off * jintSize, esp);  __ fldd(f11, RegisterSaver::fpr11_off * jintSize, esp);
++  __ fldd(f12, RegisterSaver::fpr12_off * jintSize, esp);  __ fldd(f13, RegisterSaver::fpr13_off * jintSize, esp);
++  __ fldd(f14, RegisterSaver::fpr14_off * jintSize, esp);  __ fldd(f15, RegisterSaver::fpr15_off * jintSize, esp);
++  __ fldd(f16, RegisterSaver::fpr16_off * jintSize, esp);  __ fldd(f17, RegisterSaver::fpr17_off * jintSize, esp);
++  __ fldd(f18, RegisterSaver::fpr18_off * jintSize, esp);  __ fldd(f19, RegisterSaver::fpr19_off * jintSize, esp);
++  __ fldd(f20, RegisterSaver::fpr20_off * jintSize, esp);  __ fldd(f21, RegisterSaver::fpr21_off * jintSize, esp);
++  __ fldd(f22, RegisterSaver::fpr22_off * jintSize, esp);  __ fldd(f23, RegisterSaver::fpr23_off * jintSize, esp);
++  __ fldd(f24, RegisterSaver::fpr24_off * jintSize, esp);  __ fldd(f25, RegisterSaver::fpr25_off * jintSize, esp);
++  __ fldd(f26, RegisterSaver::fpr26_off * jintSize, esp);  __ fldd(f27, RegisterSaver::fpr27_off * jintSize, esp);
++  __ fldd(f28, RegisterSaver::fpr28_off * jintSize, esp);  __ fldd(f29, RegisterSaver::fpr29_off * jintSize, esp);
++  __ fldd(f30, RegisterSaver::fpr30_off * jintSize, esp);
++
++  __ ldl(V0, Address(esp, RegisterSaver::v0_off * jintSize));
++  __ ldl(i1, Address(esp, RegisterSaver::t0_off * jintSize));
++  __ ldl(i2, Address(esp, RegisterSaver::t1_off * jintSize));
++  __ ldl(i3, Address(esp, RegisterSaver::t2_off * jintSize));
++  __ ldl(i4, Address(esp, RegisterSaver::t3_off * jintSize));
++  __ ldl(i5, Address(esp, RegisterSaver::t4_off * jintSize));
++  __ ldl(i6, Address(esp, RegisterSaver::t5_off * jintSize));
++  __ ldl(i7, Address(esp, RegisterSaver::t6_off * jintSize));
++  __ ldl(i8, Address(esp, RegisterSaver::t7_off * jintSize));
++  __ ldl(i9, Address(esp, RegisterSaver::s0_off * jintSize));
++  __ ldl(i10, Address(esp, RegisterSaver::s1_off * jintSize));
++  __ ldl(i11, Address(esp, RegisterSaver::s2_off * jintSize));
++  __ ldl(i12, Address(esp, RegisterSaver::s3_off * jintSize));
++  __ ldl(i13, Address(esp, RegisterSaver::s4_off * jintSize));
++  __ ldl(i14, Address(esp, RegisterSaver::s5_off * jintSize));
++  __ ldl(i16, Address(esp, RegisterSaver::a0_off * jintSize));
++  __ ldl(i17, Address(esp, RegisterSaver::a1_off * jintSize));
++  __ ldl(i18, Address(esp, RegisterSaver::a2_off * jintSize));
++  __ ldl(i19, Address(esp, RegisterSaver::a3_off * jintSize));
++  __ ldl(i20, Address(esp, RegisterSaver::a4_off * jintSize));
++  __ ldl(i21, Address(esp, RegisterSaver::a5_off * jintSize));
++  __ ldl(i22, Address(esp, RegisterSaver::t8_off * jintSize));
++  __ ldl(i23, Address(esp, RegisterSaver::t9_off * jintSize));
++  __ ldl(i24, Address(esp, RegisterSaver::t10_off * jintSize));
++  __ ldl(i25, Address(esp, RegisterSaver::t11_off * jintSize));
++  __ ldl(i27, Address(esp, RegisterSaver::t12_off * jintSize));
++
++  __ ldl(GP, Address(esp, RegisterSaver::gp_off * jintSize));
++//  __ ldl(rfp, Address(esp, RegisterSaver::fp_off * jintSize));
++//  __ ldl(RA, Address(esp, RegisterSaver::return_off * jintSize));
++
++  __ addptr(esp, (RegisterSaver::reg_save_size-4) * jintSize, esp);
++}
++
++
++OopMap* RegisterSaver::save_live_registers(MacroAssembler* masm, int additional_frame_words, int* total_frame_words, bool save_vectors) {SCOPEMARK_NAME(save_live_registers, masm);//__ stop("save_live_registers");
++/*#if COMPILER2_OR_JVMCI
++  if (save_vectors) {
++    // Save upper half of vector registers
++    int vect_words = 32 * 8 / wordSize;
++    additional_frame_words += vect_words;
++  }
++#else
++  assert(!save_vectors, "vectors are generated only by C2 and JVMCI");
++#endif
++*/
++  int frame_size_in_bytes = align_up(additional_frame_words*wordSize +
++                                     reg_save_size*BytesPerInt, 16);
++  // OopMap frame size is in compiler stack slots (jint's) not bytes or words
++  int frame_size_in_slots = frame_size_in_bytes / BytesPerInt;
++  // The caller will allocate additional_frame_words
++  int additional_frame_slots = additional_frame_words*wordSize / BytesPerInt;
++  // CodeBlob frame size is in words.
++  int frame_size_in_words = frame_size_in_bytes / wordSize;
++  *total_frame_words = frame_size_in_words;
++
++  // save registers
++  __ enter();
++  push_CPU_state(masm);
++  /*__ subptr(esp, reg_save_size * jintSize, esp, rscratch1_GP);
++
++  __ fstd(f0, fpr0_off * jintSize, esp); __ fstd(f1, fpr1_off * jintSize, esp);
++  __ fstd(f2, fpr2_off * jintSize, esp); __ fstd(f3, fpr3_off * jintSize, esp);
++  __ fstd(f4, fpr4_off * jintSize, esp); __ fstd(f5, fpr5_off * jintSize, esp);
++  __ fstd(f6, fpr6_off * jintSize, esp);  __ fstd(f7, fpr7_off * jintSize, esp);
++  __ fstd(f8, fpr8_off * jintSize, esp);  __ fstd(f9, fpr9_off * jintSize, esp);
++  __ fstd(f10, fpr10_off * jintSize, esp); __ fstd(f11, fpr11_off * jintSize, esp);
++  __ fstd(f12, fpr12_off * jintSize, esp); __ fstd(f13, fpr13_off * jintSize, esp);
++  __ fstd(f14, fpr14_off * jintSize, esp); __ fstd(f15, fpr15_off * jintSize, esp);
++  __ fstd(f16, fpr16_off * jintSize, esp); __ fstd(f17, fpr17_off * jintSize, esp);
++  __ fstd(f18, fpr18_off * jintSize, esp); __ fstd(f19, fpr19_off * jintSize, esp);
++  __ fstd(f20, fpr20_off * jintSize, esp); __ fstd(f21, fpr21_off * jintSize, esp);
++  __ fstd(f22, fpr22_off * jintSize, esp); __ fstd(f23, fpr23_off * jintSize, esp);
++  __ fstd(f24, fpr24_off * jintSize, esp); __ fstd(f25, fpr25_off * jintSize, esp);
++  __ fstd(f26, fpr26_off * jintSize, esp); __ fstd(f27, fpr27_off * jintSize, esp);
++  __ fstd(f28, fpr28_off * jintSize, esp); __ fstd(f29, fpr29_off * jintSize, esp);
++  __ fstd(f30, fpr30_off * jintSize, esp);
++
++  __ stl(V0, Address(esp, v0_off * jintSize));
++  __ stl(i1, Address(esp, t0_off * jintSize));
++  __ stl(i2, Address(esp, t1_off * jintSize));
++  __ stl(i3, Address(esp, t2_off * jintSize));
++  __ stl(i4, Address(esp, t3_off * jintSize));
++  __ stl(i5, Address(esp, t4_off * jintSize));
++  __ stl(i6, Address(esp, t5_off * jintSize));
++  __ stl(i7, Address(esp, t6_off * jintSize));
++  __ stl(i8, Address(esp, t7_off * jintSize));
++  __ stl(i9, Address(esp, s0_off * jintSize));
++  __ stl(i10, Address(esp, s1_off * jintSize));
++  __ stl(i11, Address(esp, s2_off * jintSize));
++  __ stl(i12, Address(esp, s3_off * jintSize));
++  __ stl(i13, Address(esp, s4_off * jintSize));
++  __ stl(i14, Address(esp, s5_off * jintSize));
++  __ stl(i16, Address(esp, a0_off * jintSize));
++  __ stl(i17, Address(esp, a1_off * jintSize));
++  __ stl(i18, Address(esp, a2_off * jintSize));
++  __ stl(i19, Address(esp, a3_off * jintSize));
++  __ stl(i20, Address(esp, a4_off * jintSize));
++  __ stl(i21, Address(esp, a5_off * jintSize));
++  __ stl(i22, Address(esp, t8_off * jintSize));
++  __ stl(i23, Address(esp, t9_off * jintSize));
++  __ stl(i24, Address(esp, t10_off * jintSize));
++  __ stl(i25, Address(esp, t11_off * jintSize));
++  __ stl(i27, Address(esp, t12_off * jintSize));
++
++  __ stl(GP, Address(esp, gp_off * jintSize));
++  __ stl(rfp, Address(esp, fp_off * jintSize));
++  __ stl(RA, Address(esp, return_off * jintSize));*/
++  //__ addiu(SP, fp_off * jintSize, FP); //TODO:why add this in sw8? jzy
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = new OopMap(frame_size_in_slots, 0);
++
++#define STACK_OFFSET(x) VMRegImpl::stack2reg((x) + additional_frame_slots) 
++  
++  map->set_callee_saved(STACK_OFFSET( v0_off), V0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t0_off), i1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t1_off), i2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t2_off), i3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t3_off), i4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t4_off), i5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t5_off), i6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t6_off), i7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t7_off), i8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s0_off), i9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s1_off), i10->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s2_off), i11->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s3_off), i12->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s4_off), i13->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( s5_off), i14->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a0_off), A0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a1_off), A1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a2_off), A2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a3_off), A3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a4_off), A4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( a5_off), A5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t8_off), i22->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t9_off), i23->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t10_off), i24->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t11_off), i25->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( t12_off), i27->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( gp_off), GP->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fp_off), rfp->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( return_off), RA->as_VMReg());
++
++  map->set_callee_saved(STACK_OFFSET( fpr0_off), f0->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr1_off), f1->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr2_off), f2->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr3_off), f3->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr4_off), f4->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr5_off), f5->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr6_off), f6->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr7_off), f7->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr8_off), f8->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr9_off), f9->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr10_off), f10->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr11_off), f11->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr12_off), f12->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr13_off), f13->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr14_off), f14->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr15_off), f15->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr16_off), f16->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr17_off), f17->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr18_off), f18->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr19_off), f19->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr20_off), f20->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr21_off), f21->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr22_off), f22->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr23_off), f23->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr24_off), f24->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr25_off), f25->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr26_off), f26->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr27_off), f27->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr28_off), f28->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr29_off), f29->as_VMReg());
++  map->set_callee_saved(STACK_OFFSET( fpr30_off), f30->as_VMReg());
++
++#undef STACK_OFFSET
++  return map;
++}
++
++void RegisterSaver::restore_live_registers(MacroAssembler* masm, bool restore_vectors) {SCOPEMARK_NAME(restore_live_registers, masm);//__ stop("restore_live_registers");
++  /*__ fldd(f0, fpr0_off * jintSize, esp); __ fldd(f1, fpr1_off * jintSize, esp);
++  __ fldd(f2, fpr2_off * jintSize, esp); __ fldd(f3, fpr3_off * jintSize, esp);
++  __ fldd(f4, fpr4_off * jintSize, esp); __ fldd(f5, fpr5_off * jintSize, esp);
++  __ fldd(f6, fpr6_off * jintSize, esp);  __ fldd(f7, fpr7_off * jintSize, esp);
++  __ fldd(f8, fpr8_off * jintSize, esp);  __ fldd(f9, fpr9_off * jintSize, esp);
++  __ fldd(f10, fpr10_off * jintSize, esp);  __ fldd(f11, fpr11_off * jintSize, esp);
++  __ fldd(f12, fpr12_off * jintSize, esp);  __ fldd(f13, fpr13_off * jintSize, esp);
++  __ fldd(f14, fpr14_off * jintSize, esp);  __ fldd(f15, fpr15_off * jintSize, esp);
++  __ fldd(f16, fpr16_off * jintSize, esp);  __ fldd(f17, fpr17_off * jintSize, esp);
++  __ fldd(f18, fpr18_off * jintSize, esp);  __ fldd(f19, fpr19_off * jintSize, esp);
++  __ fldd(f20, fpr20_off * jintSize, esp);  __ fldd(f21, fpr21_off * jintSize, esp);
++  __ fldd(f22, fpr22_off * jintSize, esp);  __ fldd(f23, fpr23_off * jintSize, esp);
++  __ fldd(f24, fpr24_off * jintSize, esp);  __ fldd(f25, fpr25_off * jintSize, esp);
++  __ fldd(f26, fpr26_off * jintSize, esp);  __ fldd(f27, fpr27_off * jintSize, esp);
++  __ fldd(f28, fpr28_off * jintSize, esp);  __ fldd(f29, fpr29_off * jintSize, esp);
++  __ fldd(f30, fpr30_off * jintSize, esp);
++
++  __ ldl(V0, Address(esp, v0_off * jintSize));
++  __ ldl(i1, Address(esp, t0_off * jintSize));
++  __ ldl(i2, Address(esp, t1_off * jintSize));
++  __ ldl(i3, Address(esp, t2_off * jintSize));
++  __ ldl(i4, Address(esp, t3_off * jintSize));
++  __ ldl(i5, Address(esp, t4_off * jintSize));
++  __ ldl(i6, Address(esp, t5_off * jintSize));
++  __ ldl(i7, Address(esp, t6_off * jintSize));
++  __ ldl(i8, Address(esp, t7_off * jintSize));
++  __ ldl(i9, Address(esp, s0_off * jintSize));
++  __ ldl(i10, Address(esp, s1_off * jintSize));
++  __ ldl(i11, Address(esp, s2_off * jintSize));
++  __ ldl(i12, Address(esp, s3_off * jintSize));
++  __ ldl(i13, Address(esp, s4_off * jintSize));
++  __ ldl(i14, Address(esp, s5_off * jintSize));
++  __ ldl(i16, Address(esp, a0_off * jintSize));
++  __ ldl(i17, Address(esp, a1_off * jintSize));
++  __ ldl(i18, Address(esp, a2_off * jintSize));
++  __ ldl(i19, Address(esp, a3_off * jintSize));
++  __ ldl(i20, Address(esp, a4_off * jintSize));
++  __ ldl(i21, Address(esp, a5_off * jintSize));
++  __ ldl(i22, Address(esp, t8_off * jintSize));
++  __ ldl(i23, Address(esp, t9_off * jintSize));
++  __ ldl(i24, Address(esp, t10_off * jintSize));
++  __ ldl(i25, Address(esp, t11_off * jintSize));
++  __ ldl(i27, Address(esp, t12_off * jintSize));
++
++  __ ldl(GP, Address(esp, gp_off * jintSize));
++  __ ldl(rfp, Address(esp, fp_off * jintSize));
++  __ ldl(RA, Address(esp, return_off * jintSize));
++
++  __ addptr(esp, reg_save_size * jintSize, esp, rscratch1_GP);*/
++
++    // Recover CPU state
++  pop_CPU_state(masm);
++  // Get the rbp described implicitly by the calling convention (no oopMap)
++  __ leave();
++}
++
++void RegisterSaver::restore_result_registers(MacroAssembler* masm) {//__ stop("restore_result_registers");
++
++  // Just restore result register. Only used by deoptimization. By
++  // now any callee save register that needs to be restored to a c2
++  // caller of the deoptee has been extracted into the vframeArray
++  // and will be stuffed into the c2i adapter we create for later
++  // restoration so only result registers need to be restored here.
++
++    // Restore integer result register
++  __ ldl(V0, v0_offset_in_bytes(), esp);
++    // Restore fp result register
++  __ load_double(FSF, Address(esp, fsf_offset_in_bytes()));
++
++  // Pop all of the register save are off the stack
++  __ addptr(esp, return_offset_in_bytes(), esp);
++}
++
++// Is vector's size (in bytes) bigger than a size saved by default?
++// 16 bytes XMM registers are saved by default using fxsave/fxrstor instructions.
++bool SharedRuntime::is_wide_vector(int size) {
++  return size > 16;
++}
++
++size_t SharedRuntime::trampoline_size() {
++  ShouldNotCallThis();
++  return 16;
++}
++
++void SharedRuntime::generate_trampoline(MacroAssembler *masm, address destination) {__ stop("generate_trampoline");
++  __ jump(RuntimeAddress(destination));//TODO:which rscratch register in C2?
++}
++
++// The java_calling_convention describes stack locations as ideal slots on
++// a frame with no abi restrictions. Si
++// nce we must observe abi restrictions
++// (like the placement of the register window) the slots must be biased by
++// the following value.
++static int reg2offset_in(VMReg r) {
++  // Account for saved rbp and return address
++  // This should really be in_preserve_stack_slots
++  return (r->reg2stack() + 4) * VMRegImpl::stack_slot_size;
++}
++
++static int reg2offset_out(VMReg r) {
++  return (r->reg2stack() + SharedRuntime::out_preserve_stack_slots()) * VMRegImpl::stack_slot_size;
++}
++
++// ---------------------------------------------------------------------------
++// Read the array of BasicTypes from a signature, and compute where the
++// arguments should go.  Values in the VMRegPair regs array refer to 4-byte
++// quantities.  Values less than VMRegImpl::stack0 are registers, those above
++// refer to 4-byte stack slots.  All stack slots are based off of the stack pointer
++// as framesizes are fixed.
++// VMRegImpl::stack0 refers to the first slot 0(sp).
++// and VMRegImpl::stack0+1 refers to the memory word 4-byes higher.  Register
++// up to RegisterImpl::number_of_registers) are the 64-bit
++// integer registers.
++
++// Note: the INPUTS in sig_bt are in units of Java argument words, which are
++// either 32-bit or 64-bit depending on the build.  The OUTPUTS are in 32-bit
++// units regardless of build. Of course for i486 there is no 64 bit build
++
++// The Java calling convention is a "shifted" version of the C ABI.
++// By skipping the first C ABI register we can call non-static jni methods
++// with small numbers of arguments without having to shuffle the arguments
++// at all. Since we control the java ABI we ought to at least get some
++// advantage out of it.
++
++int SharedRuntime::java_calling_convention(const BasicType *sig_bt,
++                                           VMRegPair *regs,
++                                           int total_args_passed,
++                                           int is_outgoing) {
++
++  // Create the mapping between argument positions and
++  // registers.
++  static const Register INT_ArgReg[Argument::n_int_register_parameters_j] = {
++    j_rarg0, j_rarg1, j_rarg2, j_rarg3, j_rarg4, j_rarg5
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters_j] = {
++    j_farg0, j_farg1, j_farg2, j_farg3,
++    j_farg4, j_farg5,
++  };
++
++
++  uint i_args = 0;
++  uint f_args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++      if (i_args < Argument::n_int_register_parameters_j) {
++        regs[i].set1(INT_ArgReg[i_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_VOID:
++      // halves of T_LONG or T_DOUBLE
++      assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++      regs[i].set_bad();
++      break;
++    case T_LONG:
++      assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++      if (i_args < Argument::n_int_register_parameters_j) {
++        regs[i].set2(INT_ArgReg[i_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++      if (f_args < Argument::n_float_register_parameters_j) {
++        regs[i].set1(FP_ArgReg[f_args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++      assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      if (f_args < Argument::n_float_register_parameters_j) {
++        regs[i].set2(FP_ArgReg[f_args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return align_up(stk_args, 2);
++}
++
++// Patch the callers callsite with entry to compiled code if it exists.
++static void patch_callers_callsite(MacroAssembler *masm) {
++  Label L;
++  //__ stop("patch_callers_callsite");
++  __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), (int32_t)NULL_WORD, rscratch1_GP);
++  __ jcc(Assembler::equal, L, rscratch1_GP);
++
++  __ enter();
++  push_CPU_state(masm);
++
++  // VM needs caller's callsite
++  // VM needs target method
++  // This needs to be a long call since we will relocate this adapter to
++  // the codeBuffer and it may not reach
++
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++
++  __ movl(c_rarg0, rbx);
++  __ movl(c_rarg1, RA);
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::fixup_callers_callsite)));
++
++  pop_CPU_state(masm);
++  __ leave();
++  __ bind(L);
++}
++
++
++static void gen_c2i_adapter(MacroAssembler *masm,
++                            int total_args_passed,
++                            int comp_args_on_stack,
++                            const BasicType *sig_bt,
++                            const VMRegPair *regs,
++                            Label& skip_fixup) {//__ stop("gen_c2i_adapter");
++  // Before we get into the guts of the C2I adapter, see if we should be here
++  // at all.  We've come from compiled code and are attempting to jump to the
++  // interpreter, which means the caller made a static call to get here
++  // (vcalls always get a compiled target if there is one).  Check for a
++  // compiled target.  If there is one, we need to patch the caller's call.
++  patch_callers_callsite(masm);
++
++  __ bind(skip_fixup);
++
++  // Since all args are passed on the stack, total_args_passed *
++  // Interpreter::stackElementSize is the space we need.
++  // Return address is in RA.
++
++  int extraspace = (total_args_passed * Interpreter::stackElementSize);
++
++  // stack is aligned, keep it that way
++  extraspace = align_up(extraspace, 2*wordSize);
++
++  // set senderSP value
++  __ movl(rsender, esp);
++
++  if (extraspace)
++    __ subptr(esp, extraspace, esp);
++
++  // Now write the args into the outgoing interpreter space
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // offset to start parameters
++    int st_off   = (total_args_passed - i - 1) * Interpreter::stackElementSize;
++    int next_off = st_off - Interpreter::stackElementSize;
++
++    // Say 4 args:
++    // i   st_off
++    // 0   32 T_LONG
++    // 1   24 T_VOID
++    // 2   16 T_OBJECT
++    // 3    8 T_BOOL
++    // -    0 return address
++    //
++    // However to make thing extra confusing. Because we can fit a long/double in
++    // a single slot on a 64 bt vm and it would be silly to break them up, the interpreter
++    // leaves one slot empty and only stores to a single slot. In this case the
++    // slot that is occupied is the T_VOID slot. See I said it was confusing.
++
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // memory to memory use rax
++      int ld_off = r_1->reg2stack() * VMRegImpl::stack_slot_size + extraspace;
++      if (!r_2->is_valid()) {
++        // sign extend??
++        __ ldws(rax, Address(esp, ld_off));
++        __ stptr(rax, Address(esp, st_off));
++
++      } else {
++
++        __ ldl(rax, Address(esp, ld_off));
++
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++          // ld_off == LSW, ld_off+wordSize == MSW
++          // st_off == MSW, next_off == LSW
++          __ stl(rax, Address(esp, next_off));
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ mov_immediate64(rax, CONST64(0xdeadffffdeadaaaa));
++          __ stptr(rax, Address(esp, st_off));
++#endif /* ASSERT */
++        } else {
++          __ stl(rax, Address(esp, st_off));
++        }
++      }
++    } else if (r_1->is_Register()) {
++      Register r = r_1->as_Register();
++      if (!r_2->is_valid()) {
++        // must be only an int (or less ) so move only 32bits to slot
++        // why not sign extend??
++        __ stw(r, Address(esp, st_off));
++      } else {
++        // Two VMREgs|OptoRegs can be T_OBJECT, T_ADDRESS, T_DOUBLE, T_LONG
++        // T_DOUBLE and T_LONG use two slots in the interpreter
++        if ( sig_bt[i] == T_LONG || sig_bt[i] == T_DOUBLE) {
++        // long/double in gpr
++#ifdef ASSERT
++          // Overwrite the unused slot with known junk
++          __ mov_immediate64(rax, CONST64(0xdeadffffdeadaaab));
++          __ stptr(rax, Address(esp, st_off));
++#endif /* ASSERT */
++          __ stl(r, Address(esp, next_off));
++        } else {
++          __ stl(r, Address(esp, st_off));
++      }
++      }
++    } else {
++      assert(r_1->is_FloatRegister(), "");
++      if (!r_2->is_valid()) {
++        // only a float use just part of the slot
++        __ store_float(r_1->as_FloatRegister(), Address(esp, st_off));
++      } else {
++#ifdef ASSERT
++        // Overwrite the unused slot with known junk
++        __ mov_immediate64(rax, CONST64(0xdeadffffdeadaaac));
++        __ stptr(rax, Address(esp, st_off));
++#endif /* ASSERT */
++        __ store_double(r_1->as_FloatRegister(), Address(esp, next_off));
++    }
++  }
++  }
++
++  // Schedule the branch target address early.
++  __ ldptr(rcx, Address(rbx, in_bytes(Method::interpreter_entry_offset())));
++  __ jmp(rcx);
++}
++
++static void range_check(MacroAssembler* masm, Register pc_reg, Register temp_reg,
++                        address code_start, address code_end,
++                        Label& L_ok) {SCOPEMARK_NAME(range_check, masm);
++  Label L_fail;
++  __ lea(temp_reg, ExternalAddress(code_start));
++  __ cmpptr(pc_reg, temp_reg, temp_reg);
++  __ jcc(Assembler::belowEqual, L_fail, temp_reg);
++  __ lea(temp_reg, ExternalAddress(code_end));
++  __ cmpptr(pc_reg, temp_reg, temp_reg);
++  __ jcc(Assembler::below, L_ok, temp_reg);
++  __ bind(L_fail);
++}
++
++void SharedRuntime::gen_i2c_adapter(MacroAssembler *masm,
++                                    int total_args_passed,
++                                    int comp_args_on_stack,
++                                       const BasicType *sig_bt,
++                                       const VMRegPair *regs) {__ block_comment("gen_i2c_adapter");//__ debug_stop("gen_i2c_adapter");
++
++  // Note: r13 contains the senderSP on entry. We must preserve it since
++  // we may do a i2c -> c2i transition if we lose a race where compiled
++  // code goes non-entrant while we get args ready.
++  // In addition we use r13 to locate all the interpreter args as
++  // we must align the stack to 16 bytes on an i2c entry else we
++  // lose alignment we expect in all compiled code and register
++  // save code can segv when fxsave instructions find improperly
++  // aligned stack pointer.
++
++  // Adapters can be frameless because they do not require the caller
++  // to perform additional cleanup work, such as correcting the stack pointer.
++  // An i2c adapter is frameless because the *caller* frame, which is interpreted,
++  // routinely repairs its own stack pointer (from interpreter_frame_last_sp),
++  // even if a callee has modified the stack pointer.
++  // A c2i adapter is frameless because the *callee* frame, which is interpreted,
++  // routinely repairs its caller's stack pointer (from sender_sp, which is set
++  // up via the senderSP register).
++  // In other words, if *either* the caller or callee is interpreted, we can
++  // get the stack pointer repaired after a call.
++  // This is why c2i and i2c adapters cannot be indefinitely composed.
++  // In particular, if a c2i adapter were to somehow call an i2c adapter,
++  // both caller and callee would be compiled methods, and neither would
++  // clean up the stack pointer changes performed by the two adapters.
++  // If this happens, control eventually transfers back to the compiled
++  // caller, but with an uncorrected stack, causing delayed havoc.
++
++  // Pick up the return address
++  __ movl(rax, RA);
++
++  if (VerifyAdapterCalls &&
++      (Interpreter::code() != NULL || StubRoutines::code1() != NULL)) {
++    // So, let's test for cascading c2i/i2c adapters right now.
++    //  assert(Interpreter::contains($return_addr) ||
++    //         StubRoutines::contains($return_addr),
++    //         "i2c adapter must return to an interpreter frame");
++    __ block_comment("verify_i2c { ");
++    Label L_ok;
++    if (Interpreter::code() != NULL)
++      range_check(masm, rax, r11,
++                  Interpreter::code()->code_start(), Interpreter::code()->code_end(),
++                  L_ok);
++    if (StubRoutines::code1() != NULL)
++      range_check(masm, rax, r11,
++                  StubRoutines::code1()->code_begin(), StubRoutines::code1()->code_end(),
++                  L_ok);
++    if (StubRoutines::code2() != NULL)
++      range_check(masm, rax, r11,
++                  StubRoutines::code2()->code_begin(), StubRoutines::code2()->code_end(),
++                  L_ok);
++    const char* msg = "i2c adapter must return to an interpreter frame";
++    __ block_comment(msg);
++    __ stop(msg);
++    __ bind(L_ok);
++    __ block_comment("} verify_i2ce ");
++  }
++
++  // Must preserve original SP for loading incoming arguments because
++  // we need to align the outgoing SP for compiled code.
++  __ movl(r11, rsp);
++
++  // Cut-out for having no stack args.  Since up to 2 int/oop args are passed
++  // in registers, we will occasionally have no stack args.
++  int comp_words_on_stack = 0;
++  if (comp_args_on_stack) {
++    // Sig words on the stack are greater-than VMRegImpl::stack0.  Those in
++    // registers are below.  By subtracting stack0, we either get a negative
++    // number (all values in registers) or the maximum stack slot accessed.
++
++    // Convert 4-byte c2 stack slots to words.
++    comp_words_on_stack = align_up(comp_args_on_stack*VMRegImpl::stack_slot_size, wordSize)>>LogBytesPerWord;
++    // Round up to miminum stack alignment, in wordSize
++    comp_words_on_stack = align_up(comp_words_on_stack, 2);
++    __ subptr(esp, comp_words_on_stack * wordSize, esp);
++  }
++
++  // push the return address and misalign the stack that youngest frame always sees
++  // as far as the placement of the call instruction
++  //__ push(rax); //TODO:How to resolve this ? jzy
++
++  // Put saved SP in another register
++  const Register saved_sp = rax;
++  __ movl(saved_sp, r11);
++
++  // Will jump to the compiled code just as if compiled code was doing it.
++  // Pre-load the register-jump target early, to schedule it better.
++  __ ldptr(r11, Address(rmethod, in_bytes(Method::from_compiled_offset()))); //check jzy?
++  
++#if INCLUDE_JVMCI
++  if (EnableJVMCI || UseAOT) {
++    // check if this call should be routed towards a specific entry point
++    __ cmpptr(Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())), R0);
++    Label no_alternative_target;
++    __ jcc(Assembler::equal, no_alternative_target);
++    __ ldptr(r11, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    __ stptr(R0, Address(rthread, in_bytes(JavaThread::jvmci_alternate_call_target_offset())));
++    __ bind(no_alternative_target);
++  }
++#endif // INCLUDE_JVMCI
++
++  // Now generate the shuffle code.  Pick up all register args and move the
++  // rest through the floating point stack top.
++  for (int i = 0; i < total_args_passed; i++) {
++    if (sig_bt[i] == T_VOID) {
++      // Longs and doubles are passed in native word order, but misaligned
++      // in the 32-bit build.
++      assert(i > 0 && (sig_bt[i-1] == T_LONG || sig_bt[i-1] == T_DOUBLE), "missing half");
++      continue;
++    }
++
++    // Pick up 0, 1 or 2 words from SP+offset.
++
++    assert(!regs[i].second()->is_valid() || regs[i].first()->next() == regs[i].second(),
++            "scrambled load targets?");
++    // Load in argument order going down.
++    int ld_off = (total_args_passed - 1 - i)*Interpreter::stackElementSize;
++    // Point to interpreter value (vs. tag)
++    int next_off = ld_off - Interpreter::stackElementSize;
++    //
++    //
++    //
++    VMReg r_1 = regs[i].first();
++    VMReg r_2 = regs[i].second();
++    if (!r_1->is_valid()) {
++      assert(!r_2->is_valid(), "");
++      continue;
++    }
++    if (r_1->is_stack()) {
++      // Convert stack slot to an SP offset
++      int st_off = regs[i].first()->reg2stack()*VMRegImpl::stack_slot_size ;
++
++      // We can use r13 as a temp here because compiled code doesn't need r13 as an input
++      // and if we end up going thru a c2i because of a miss a reasonable value of r13
++      // will be generated.
++      if (!r_2->is_valid()) {
++        // sign extend???
++        __ ldws(r13, Address(saved_sp, ld_off));
++        __ stptr(r13, Address(esp, st_off), rscratch2_AT);
++      } else {
++        //
++        // We are using two optoregs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
++        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the interpreter.
++        //
++        // Interpreter local[n] == MSW, local[n+1] == LSW however locals
++        // are accessed as negative so LSW is at LOW address
++
++        // ld_off is MSW so get LSW
++        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
++                           next_off : ld_off;
++        __ ldl(r13, Address(saved_sp, offset));
++        // st_off is LSW (i.e. reg.first())
++        __ stl(r13, Address(esp, st_off));
++      }
++    } else if (r_1->is_Register()) {  // Register argument
++      Register r = r_1->as_Register();
++      assert(r != rax, "must be different");
++      if (r_2->is_valid()) {
++        //
++        // We are using two VMRegs. This can be either T_OBJECT, T_ADDRESS, T_LONG, or T_DOUBLE
++        // the interpreter allocates two slots but only uses one for thr T_LONG or T_DOUBLE case
++        // So we must adjust where to pick up the data to match the interpreter.
++
++        const int offset = (sig_bt[i]==T_LONG||sig_bt[i]==T_DOUBLE)?
++                           next_off : ld_off;
++
++        // this can be a misaligned move
++        __ ldl(r, Address(saved_sp, offset));
++      } else {
++        // sign extend and use a full word?
++        __ ldws(r, Address(saved_sp, ld_off));
++      }
++    } else {
++      if (!r_2->is_valid()) {
++        __ load_float(r_1->as_FloatRegister(), Address(saved_sp, ld_off));
++      } else {
++        __ load_double(r_1->as_FloatRegister(), Address(saved_sp, next_off));
++      }
++    }
++  }
++
++  // 6243940 We might end up in handle_wrong_method if
++  // the callee is deoptimized as we race thru here. If that
++  // happens we don't want to take a safepoint because the
++  // caller frame will look interpreted and arguments are now
++  // "compiled" so it is much better to make this transition
++  // invisible to the stack walking code. Unfortunately if
++  // we try and find the callee by normal means a safepoint
++  // is possible. So we stash the desired callee in the thread
++  // and the vm will find there should this case occur.
++
++  __ stptr(rbx, Address(rthread, JavaThread::callee_target_offset()));
++
++  // put Method* where a c2i would expect should we end up there
++  // only needed becaus eof c2 resolve stubs return Method* as a result in
++  // rax
++  __ movl(rax, rbx); //TODO:why need this? jzy
++  __ jmp(r11);
++}
++
++// ---------------------------------------------------------------
++AdapterHandlerEntry* SharedRuntime::generate_i2c2i_adapters(MacroAssembler *masm,
++                                                            int total_args_passed,
++                                                            int comp_args_on_stack,
++                                                            const BasicType *sig_bt,
++                                                            const VMRegPair *regs,
++                                                            AdapterFingerPrint* fingerprint) {__ block_comment("generate_i2c2i_adapters");//__ stop("generate_i2c2i_adapters");
++  address i2c_entry = __ pc();
++
++  gen_i2c_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs);
++
++  // -------------------------------------------------------------------------
++  // Generate a C2I adapter.  On entry we know rbx holds the Method* during calls
++  // to the interpreter.  The args start out packed in the compiled layout.  They
++  // need to be unpacked into the interpreter layout.  This will almost always
++  // require some stack space.  We grow the current (compiled) stack, then repack
++  // the args.  We  finally end in a jump to the generic interpreter entry point. 
++  // On exit from the interpreter, the interpreter will restore our SP (lest the
++  // compiled code, which relys solely on SP and not RBP, get sick).
++
++  address c2i_unverified_entry = __ pc();
++  Label skip_fixup;
++  Label ok;
++
++  Register holder = rax;
++  Register receiver = j_rarg0;
++  Register temp = rbx;
++
++  {
++    __ load_klass(temp, receiver);
++    __ cmpptr(temp, Address(holder, CompiledICHolder::holder_klass_offset()), rscratch1_GP);
++    __ ldptr(rbx, Address(holder, CompiledICHolder::holder_metadata_offset()));
++    __ jcc(Assembler::equal, ok, rscratch1_GP);
++    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++
++    __ bind(ok);
++    // Method might have been compiled since the call site was patched to
++    // interpreted if that is the case treat it as a miss so we can get
++    // the call site corrected.
++    __ cmpptr(Address(rbx, in_bytes(Method::code_offset())), R0, rscratch1_GP);
++    __ jcc(Assembler::equal, skip_fixup, rscratch1_GP);
++    __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()), rscratch1_GP);
++  }
++
++  address c2i_entry = __ pc();
++
++  gen_c2i_adapter(masm, total_args_passed, comp_args_on_stack, sig_bt, regs, skip_fixup);
++
++  __ flush();
++  return AdapterHandlerLibrary::new_entry(fingerprint, i2c_entry, c2i_entry, c2i_unverified_entry);
++}
++
++int SharedRuntime::c_calling_convention(const BasicType *sig_bt,
++                                         VMRegPair *regs,
++                                         VMRegPair *regs2,
++                                         int total_args_passed) {//ShouldNotReachHere();
++  assert(regs2 == NULL, "not needed on Sw64");
++
++  // We return the amount of VMRegImpl stack slots we need to reserve for all
++  // the arguments NOT counting out_preserve_stack_slots.
++  static const Register INT_ArgReg[Argument::n_register_parameters] = {
++    c_rarg0, c_rarg1, c_rarg2, c_rarg3, c_rarg4, c_rarg5
++  };
++  static const FloatRegister FP_ArgReg[Argument::n_float_register_parameters] = {
++    c_farg0, c_farg1, c_farg2, c_farg3,
++    c_farg4, c_farg5
++  };
++
++
++  uint args = 0;
++  uint stk_args = 0; // inc by 2 each time
++
++  for (int i = 0; i < total_args_passed; i++) {
++    switch (sig_bt[i]) {
++    case T_BOOLEAN:
++    case T_CHAR:
++    case T_BYTE:
++    case T_SHORT:
++    case T_INT:
++        if (args < Argument::n_int_register_parameters_c) {
++          regs[i].set1(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_LONG:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++      // fall through
++    case T_OBJECT:
++    case T_ARRAY:
++    case T_ADDRESS:
++    case T_METADATA:
++        if (args < Argument::n_int_register_parameters_c) {
++          regs[i].set2(INT_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_FLOAT:
++        if (args < Argument::n_float_register_parameters_c) {
++          regs[i].set1(FP_ArgReg[args++]->as_VMReg());
++
++      } else {
++        regs[i].set1(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++    case T_DOUBLE:
++        assert((i + 1) < total_args_passed && sig_bt[i + 1] == T_VOID, "expecting half");
++        if (args < Argument::n_float_register_parameters_c) {
++          regs[i].set2(FP_ArgReg[args++]->as_VMReg());
++      } else {
++        regs[i].set2(VMRegImpl::stack2reg(stk_args));
++        stk_args += 2;
++      }
++      break;
++      case T_VOID: // Halves of longs and doubles
++        assert(i != 0 && (sig_bt[i - 1] == T_LONG || sig_bt[i - 1] == T_DOUBLE), "expecting half");
++        regs[i].set_bad();
++        break;
++    default:
++      ShouldNotReachHere();
++      break;
++    }
++  }
++
++  return stk_args;
++}
++
++// On 64 bit we will store integer like items to the stack as
++// 64 bits items (sparc abi) even though java would only store
++// 32bits for a parameter. On 32bit it will simply be 32 bits
++// So this routine will do 32->32 on 32bit and 32->64 on 64bit
++static void move32_64(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register tmp_reg = rax) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ldws(tmp_reg, Address(rbp, reg2offset_in(src.first())));
++      __ stl(tmp_reg, Address(rsp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ ldws(dst.first()->as_Register(), Address(rfp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    // Do we really have to sign extend???
++    // __ movslq(src.first()->as_Register(), src.first()->as_Register());
++    __ stl(src.first()->as_Register(), Address(rsp, reg2offset_out(dst.first())));
++  } else {
++    // Do we really have to sign extend???
++    // __ movslq(dst.first()->as_Register(), src.first()->as_Register());
++    if (dst.first() != src.first()) {
++      __ movl(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++static void move_ptr(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register tmp_reg = rax) {
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      // stack to stack
++      __ ldl(tmp_reg, Address(rbp, reg2offset_in(src.first())));
++      __ stl(tmp_reg, Address(rsp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      __ ldl(dst.first()->as_Register(), Address(rbp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    __ stl(src.first()->as_Register(), Address(rsp, reg2offset_out(dst.first())));
++  } else {
++    if (dst.first() != src.first()) {
++      __ movl(dst.first()->as_Register(), src.first()->as_Register());
++    }
++  }
++}
++
++// An oop arg. Must pass a handle not the oop itself
++static void object_move(MacroAssembler* masm,
++                        OopMap* map,
++                        int oop_handle_offset,
++                        int framesize_in_slots,
++                        VMRegPair src,
++                        VMRegPair dst,
++                        bool is_receiver,
++                        int* receiver_offset, Register tmp_reg = rax) {
++
++  // must pass a handle. First figure out the location we use as a handle
++
++  Register rHandle = dst.first()->is_stack() ? tmp_reg : dst.first()->as_Register();
++
++  // See if oop is NULL if it is we need no handle
++
++  if (src.first()->is_stack()) {
++
++    // Oop is already on the stack as an argument
++    int offset_in_older_frame = src.first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++    map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + framesize_in_slots));
++    if (is_receiver) {
++      *receiver_offset = (offset_in_older_frame + framesize_in_slots) * VMRegImpl::stack_slot_size;
++    }
++
++    __ cmpptr(Address(rbp, reg2offset_in(src.first())), R0);
++    __ lea(rHandle, Address(rbp, reg2offset_in(src.first())));
++    // conditionally move a NULL
++    __ ldptr(rscratch3, Address(rbp, reg2offset_in(src.first())));
++    __ cmove(Assembler::equal, rHandle, rscratch3, rHandle);
++  } else {
++
++    // Oop is in an a register we must store it to the space we reserve
++    // on the stack for oop_handles and pass a handle if oop is non-NULL
++
++    const Register rOop = src.first()->as_Register();
++    int oop_slot;
++    if (rOop == j_rarg0)
++      oop_slot = 0;
++    else if (rOop == j_rarg1)
++      oop_slot = 1;
++    else if (rOop == j_rarg2)
++      oop_slot = 2;
++    else if (rOop == j_rarg3)
++      oop_slot = 3;
++    else if (rOop == j_rarg4)
++      oop_slot = 4;
++    else {
++      assert(rOop == j_rarg5, "wrong register");
++      oop_slot = 5;
++    }
++
++    oop_slot = oop_slot * VMRegImpl::slots_per_word + oop_handle_offset;
++    int offset = oop_slot*VMRegImpl::stack_slot_size;
++
++    map->set_oop(VMRegImpl::stack2reg(oop_slot));
++    // Store oop in handle area, may be NULL
++    __ stptr(rOop, Address(rsp, offset));
++    if (is_receiver) {
++      *receiver_offset = offset;
++    }
++
++    __ cmpptr(rOop, R0);
++    __ lea(rHandle, Address(rsp, offset));
++    // conditionally move a NULL from the handle area where it was just stored
++    __ ldptr(rscratch3, Address(rsp, offset));
++    __ cmove(Assembler::equal, rHandle, rscratch3, rHandle);
++  }
++
++  // If arg is on the stack then place it otherwise it is already in correct reg.
++  if (dst.first()->is_stack()) {
++    __ stptr(rHandle, Address(rsp, reg2offset_out(dst.first())));
++  }
++}
++
++// A float arg may have to do float reg int reg conversion
++static void float_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register tmp_reg = rax) {
++  assert(!src.second()->is_valid() && !dst.second()->is_valid(), "bad float_move");
++
++  // The calling conventions assures us that each VMregpair is either
++  // all really one physical register or adjacent stack slots.
++  // This greatly simplifies the cases here compared to sparc.
++
++  if (src.first()->is_stack()) {
++    if (dst.first()->is_stack()) {
++      __ ldwu (tmp_reg, Address(rfp, reg2offset_in(src.first())));//TODO:check jzy
++      __ stptr(tmp_reg, Address(rsp, reg2offset_out(dst.first())));
++    } else {
++      // stack to reg
++      assert(dst.first()->is_FloatRegister(), "only expect float registers as parameters");
++      __ load_float(dst.first()->as_FloatRegister(), Address(rfp, reg2offset_in(src.first())));
++    }
++  } else if (dst.first()->is_stack()) {
++    // reg to stack
++    assert(src.first()->is_FloatRegister(), "only expect xmm registers as parameters");
++    __ store_float(src.first()->as_FloatRegister(), Address(esp, reg2offset_out(dst.first())));
++  } else {
++    // reg to reg
++    // In theory these overlap but the ordering is such that this is likely a nop
++    if ( src.first() != dst.first()) {
++      __ fmovs(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++    }
++  }
++}
++
++// A long move
++static void long_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register tmp_reg = rax) {
++
++  // The calling conventions assures us that each VMregpair is either
++  // all really one physical register or adjacent stack slots.
++  // This greatly simplifies the cases here compared to sparc.
++
++  if (src.is_single_phys_reg() ) {
++    if (dst.is_single_phys_reg()) {
++      if (dst.first() != src.first()) {
++        __ movl(dst.first()->as_Register(), src.first()->as_Register());
++      }
++    } else {
++      assert(dst.is_single_reg(), "not a stack pair");
++      __ stl(src.first()->as_Register(), Address(rsp, reg2offset_out(dst.first())));
++    }
++  } else if (dst.is_single_phys_reg()) {
++    assert(src.is_single_reg(),  "not a stack pair");
++    __ ldl(dst.first()->as_Register(), Address(rbp, reg2offset_out(src.first())));
++  } else {
++    assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
++    __ ldl(tmp_reg, Address(rbp, reg2offset_in(src.first())));
++    __ stl(tmp_reg, Address(rsp, reg2offset_out(dst.first())));
++  }
++}
++
++// A double move
++static void double_move(MacroAssembler* masm, VMRegPair src, VMRegPair dst, Register tmp_reg = rax) {
++
++  // The calling conventions assures us that each VMregpair is either
++  // all really one physical register or adjacent stack slots.
++  // This greatly simplifies the cases here compared to sparc.
++
++  if (src.is_single_phys_reg() ) {
++    if (dst.is_single_phys_reg()) {
++      // In theory these overlap but the ordering is such that this is likely a nop
++      if ( src.first() != dst.first()) {
++        __ fmovd(dst.first()->as_FloatRegister(), src.first()->as_FloatRegister());
++      }
++    } else {
++      assert(dst.is_single_reg(), "not a stack pair");
++      __ store_double(src.first()->as_FloatRegister(), Address(rsp, reg2offset_out(dst.first())));
++    }
++  } else if (dst.is_single_phys_reg()) {
++    assert(src.is_single_reg(),  "not a stack pair");
++    __ load_double(dst.first()->as_FloatRegister(), Address(rbp, reg2offset_out(src.first())));
++  } else {
++    assert(src.is_single_reg() && dst.is_single_reg(), "not stack pairs");
++    __ ldl(tmp_reg, Address(rbp, reg2offset_in(src.first())));
++    __ stl(tmp_reg, Address(rsp, reg2offset_out(dst.first())));
++  }
++}
++
++
++void SharedRuntime::save_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {//__ stop("save_native_result");
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++    __ store_float(FSF, Address(rfp, -wordSize));
++      break;
++    case T_DOUBLE:
++    __ store_double(FSF, Address(rfp, -wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++    __ stptr(V0, Address(rfp, -wordSize));
++    }
++  }
++}
++
++void SharedRuntime::restore_native_result(MacroAssembler *masm, BasicType ret_type, int frame_slots) {//__ stop("restore_native_result");
++  // We always ignore the frame_slots arg and just use the space just below frame pointer
++  // which by this time is free to use
++  switch (ret_type) {
++    case T_FLOAT:
++    __ load_float(FSF, Address(rfp, -wordSize));
++      break;
++    case T_DOUBLE:
++    __ load_double(FSF, Address(rfp, -wordSize));
++      break;
++    case T_VOID:  break;
++    default: {
++    __ ldptr(V0, Address(rfp, -wordSize));
++    }
++  }
++}
++
++static void save_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {//__ stop("save_args");
++  for ( int i = first_arg ; i < arg_count ; i++ ) {
++    if (args[i].first()->is_Register()) {
++      __ push(args[i].first()->as_Register());
++      } else if (args[i].first()->is_FloatRegister()) {
++        __ subptr(esp, 2*wordSize, esp);
++        __ store_double(args[i].first()->as_FloatRegister(), Address(esp, 0));
++    }
++  }
++}
++
++static void restore_args(MacroAssembler *masm, int arg_count, int first_arg, VMRegPair *args) {//__ stop("restore_args");
++  for ( int i = arg_count - 1 ; i >= first_arg ; i-- ) {
++    if (args[i].first()->is_Register()) {
++      __ pop(args[i].first()->as_Register());
++      } else if (args[i].first()->is_FloatRegister()) {
++        __ load_double(args[i].first()->as_FloatRegister(), Address(esp, 0));
++        __ addptr(esp, 2*wordSize, esp);
++    }
++  }
++}
++
++
++static void save_or_restore_arguments(MacroAssembler* masm,
++                                      const int stack_slots,
++                                      const int total_in_args,
++                                      const int arg_save_area,
++                                      OopMap* map,
++                                      VMRegPair* in_regs,
++                                      BasicType* in_sig_bt) {//__ stop("save_or_restore_arguments");
++  // if map is non-NULL then the code should store the values,
++  // otherwise it should load them.
++  int slot = arg_save_area;
++  // Save down double word first
++  for ( int i = 0; i < total_in_args; i++) {
++    if (in_regs[i].first()->is_FloatRegister() && in_sig_bt[i] == T_DOUBLE) {
++      int offset = slot * VMRegImpl::stack_slot_size;
++      slot += VMRegImpl::slots_per_word;
++      assert(slot <= stack_slots, "overflow");
++      if (map != NULL) {
++        __ store_double(in_regs[i].first()->as_FloatRegister(), Address(esp, offset));
++    } else {
++        __ load_double(in_regs[i].first()->as_FloatRegister(), Address(esp, offset));
++    }
++    }
++    if (in_regs[i].first()->is_Register() &&
++        (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_ARRAY)) {
++      int offset = slot * VMRegImpl::stack_slot_size;
++      if (map != NULL) {
++        __ stl(in_regs[i].first()->as_Register(), Address(esp, offset));
++        if (in_sig_bt[i] == T_ARRAY) {
++          map->set_oop(VMRegImpl::stack2reg(slot));;
++        }
++      } else {
++        __ ldl(in_regs[i].first()->as_Register(), Address(esp, offset));
++      }
++      slot += VMRegImpl::slots_per_word;
++    }
++  }
++  // Save or restore single word registers
++  for ( int i = 0; i < total_in_args; i++) {
++    if (in_regs[i].first()->is_Register()) {
++      int offset = slot * VMRegImpl::stack_slot_size;
++      slot++;
++      assert(slot <= stack_slots, "overflow");
++
++      // Value is in an input register pass we must flush it to the stack
++      const Register reg = in_regs[i].first()->as_Register();
++      switch (in_sig_bt[i]) {
++        case T_BOOLEAN:
++        case T_CHAR:
++        case T_BYTE:
++        case T_SHORT:
++        case T_INT:
++          if (map != NULL) {
++            __ stw(reg, Address(esp, offset));
++          } else {
++            //__ stop("check @jzy 32-64bits");
++            __ ldws(reg, Address(esp, offset));
++          }
++          break;
++        case T_ARRAY:
++        case T_LONG:
++          // handled above
++          break;
++        case T_OBJECT:
++        default: ShouldNotReachHere();
++      }
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      if (in_sig_bt[i] == T_FLOAT) {
++        int offset = slot * VMRegImpl::stack_slot_size;
++        slot++;
++        assert(slot <= stack_slots, "overflow");
++        if (map != NULL) {
++          __ store_float(in_regs[i].first()->as_FloatRegister(), Address(esp, offset));
++        } else {
++          __ load_float(in_regs[i].first()->as_FloatRegister(), Address(esp, offset));
++        }
++      }
++    } else if (in_regs[i].first()->is_stack()) {
++      if (in_sig_bt[i] == T_ARRAY && map != NULL) {
++        int offset_in_older_frame = in_regs[i].first()->reg2stack() + SharedRuntime::out_preserve_stack_slots();
++        map->set_oop(VMRegImpl::stack2reg(offset_in_older_frame + stack_slots));
++      }
++    }
++  }
++}
++
++
++// Check GCLocker::needs_gc and enter the runtime if it's true.  This
++// keeps a new JNI critical region from starting until a GC has been
++// forced.  Save down any oops in registers and describe them in an
++// OopMap.
++static void check_needs_gc_for_critical_native(MacroAssembler* masm,
++                                               int stack_slots,
++                                               int total_c_args,
++                                               int total_in_args,
++                                               int arg_save_area,
++                                               OopMapSet* oop_maps,
++                                               VMRegPair* in_regs,
++                                               BasicType* in_sig_bt) {//__ stop("check_needs_gc_for_critical_native");
++  __ block_comment("check GCLocker::needs_gc");
++  Label cont;
++  __ cmpb(ExternalAddress((address)GCLocker::needs_gc_address()), 0); //TODO:jzy? check
++  __ jcc(Assembler::equal, cont);
++
++  // Save down any incoming oops and call into the runtime to halt for a GC
++
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++  save_or_restore_arguments(masm, stack_slots, total_in_args,
++                            arg_save_area, map, in_regs, in_sig_bt);
++
++  address the_pc = __ pc();
++  oop_maps->add_gc_map( __ offset(), map);
++  __ set_last_Java_frame(esp, noreg, the_pc, rscratch3);
++
++  __ block_comment("block_for_jni_critical");
++  __ movl(c_rarg0, rthread);
++  __ movl(r12, esp); // remember sp
++  __ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows
++  __ andptr(esp, -16, esp); // align stack as required by ABI
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::block_for_jni_critical)));
++  __ movl(esp, r12); // restore sp
++  __ reinit_heapbase();
++
++  __ reset_last_Java_frame(false);
++
++  save_or_restore_arguments(masm, stack_slots, total_in_args,
++                            arg_save_area, NULL, in_regs, in_sig_bt);
++  __ bind(cont);
++#ifdef ASSERT
++  if (StressCriticalJNINatives) {
++    // Stress register saving
++    OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++    save_or_restore_arguments(masm, stack_slots, total_in_args,
++                              arg_save_area, map, in_regs, in_sig_bt);
++    // Destroy argument registers
++    for (int i = 0; i < total_in_args - 1; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        __ xorptr(reg, reg, reg);
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++          Unimplemented();//jzy
++        //__ xorp(in_regs[i].first()->is_FloatRegister(), in_regs[i].first()->is_FloatRegister());
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        ShouldNotReachHere();
++      } else if (in_regs[i].first()->is_stack()) {
++        // Nothing to do
++      } else {
++        ShouldNotReachHere();
++      }
++      if (in_sig_bt[i] == T_LONG || in_sig_bt[i] == T_DOUBLE) {
++        i++;
++      }
++    }
++
++    save_or_restore_arguments(masm, stack_slots, total_in_args,
++                              arg_save_area, NULL, in_regs, in_sig_bt);
++  }
++#endif
++}
++
++// Unpack an array argument into a pointer to the body and the length
++// if the array is non-null, otherwise pass 0 for both.
++static void unpack_array_argument(MacroAssembler* masm, VMRegPair reg, BasicType in_elem_type, VMRegPair body_arg, VMRegPair length_arg) {
++  Register tmp_reg = rax;
++  assert(!body_arg.first()->is_Register() || body_arg.first()->as_Register() != tmp_reg,
++         "possible collision");
++  assert(!length_arg.first()->is_Register() || length_arg.first()->as_Register() != tmp_reg,
++         "possible collision");
++
++  __ block_comment("unpack_array_argument {");
++
++  // Pass the length, ptr pair
++  Label is_null, done;
++  VMRegPair tmp;
++  tmp.set_ptr(tmp_reg->as_VMReg());
++  if (reg.first()->is_stack()) {
++    // Load the arg up from the stack
++    move_ptr(masm, reg, tmp);
++    reg = tmp;
++  }
++  __ testptr(reg.first()->as_Register(), reg.first()->as_Register());
++  __ jcc(Assembler::equal, is_null);
++  __ lea(tmp_reg, Address(reg.first()->as_Register(), arrayOopDesc::base_offset_in_bytes(in_elem_type)));
++  move_ptr(masm, tmp, body_arg);
++  // load the length relative to the body.
++  __ ldws(tmp_reg, Address(tmp_reg, arrayOopDesc::length_offset_in_bytes() -
++                           arrayOopDesc::base_offset_in_bytes(in_elem_type)));
++  move32_64(masm, tmp, length_arg);
++  __ jmp(done);
++  __ bind(is_null);
++  // Pass zeros
++  __ movl(tmp_reg, R0);
++  move_ptr(masm, tmp, body_arg);
++  move32_64(masm, tmp, length_arg);
++  __ bind(done);
++
++  __ block_comment("} unpack_array_argument");
++}
++
++
++// Different signatures may require very different orders for the move
++// to avoid clobbering other arguments.  There's no simple way to
++// order them safely.  Compute a safe order for issuing stores and
++// break any cycles in those stores.  This code is fairly general but
++// it's not necessary on the other platforms so we keep it in the
++// platform dependent code instead of moving it into a shared file.
++// (See bugs 7013347 & 7145024.)
++// Note that this code is specific to LP64.
++class ComputeMoveOrder: public StackObj {
++  class MoveOperation: public ResourceObj {
++    friend class ComputeMoveOrder;
++   private:
++    VMRegPair        _src;
++    VMRegPair        _dst;
++    int              _src_index;
++    int              _dst_index;
++    bool             _processed;
++    MoveOperation*  _next;
++    MoveOperation*  _prev;
++
++    static int get_id(VMRegPair r) {
++      return r.first()->value();
++    }
++
++   public:
++    MoveOperation(int src_index, VMRegPair src, int dst_index, VMRegPair dst):
++      _src(src)
++    , _src_index(src_index)
++    , _dst(dst)
++    , _dst_index(dst_index)
++    , _next(NULL)
++    , _prev(NULL)
++    , _processed(false) {
++    }
++
++    VMRegPair src() const              { return _src; }
++    int src_id() const                 { return get_id(src()); }
++    int src_index() const              { return _src_index; }
++    VMRegPair dst() const              { return _dst; }
++    void set_dst(int i, VMRegPair dst) { _dst_index = i, _dst = dst; }
++    int dst_index() const              { return _dst_index; }
++    int dst_id() const                 { return get_id(dst()); }
++    MoveOperation* next() const       { return _next; }
++    MoveOperation* prev() const       { return _prev; }
++    void set_processed()               { _processed = true; }
++    bool is_processed() const          { return _processed; }
++
++    // insert
++    void break_cycle(VMRegPair temp_register) {
++      // create a new store following the last store
++      // to move from the temp_register to the original
++      MoveOperation* new_store = new MoveOperation(-1, temp_register, dst_index(), dst());
++
++      // break the cycle of links and insert new_store at the end
++      // break the reverse link.
++      MoveOperation* p = prev();
++      assert(p->next() == this, "must be");
++      _prev = NULL;
++      p->_next = new_store;
++      new_store->_prev = p;
++
++      // change the original store to save it's value in the temp.
++      set_dst(-1, temp_register);
++    }
++
++    void link(GrowableArray<MoveOperation*>& killer) {
++      // link this store in front the store that it depends on
++      MoveOperation* n = killer.at_grow(src_id(), NULL);
++      if (n != NULL) {
++        assert(_next == NULL && n->_prev == NULL, "shouldn't have been set yet");
++        _next = n;
++        n->_prev = this;
++      }
++    }
++  };
++
++ private:
++  GrowableArray<MoveOperation*> edges;
++
++ public:
++  ComputeMoveOrder(int total_in_args, VMRegPair* in_regs, int total_c_args, VMRegPair* out_regs,
++                    BasicType* in_sig_bt, GrowableArray<int>& arg_order, VMRegPair tmp_vmreg) {
++    // Move operations where the dest is the stack can all be
++    // scheduled first since they can't interfere with the other moves.
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        c_arg--;
++        if (out_regs[c_arg].first()->is_stack() &&
++            out_regs[c_arg + 1].first()->is_stack()) {
++          arg_order.push(i);
++          arg_order.push(c_arg);
++        } else {
++          if (out_regs[c_arg].first()->is_stack() ||
++              in_regs[i].first() == out_regs[c_arg].first()) {
++            add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg + 1]);
++          } else {
++            add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]);
++          }
++        }
++      } else if (in_sig_bt[i] == T_VOID) {
++        arg_order.push(i);
++        arg_order.push(c_arg);
++      } else {
++        if (out_regs[c_arg].first()->is_stack() ||
++            in_regs[i].first() == out_regs[c_arg].first()) {
++          arg_order.push(i);
++          arg_order.push(c_arg);
++        } else {
++          add_edge(i, in_regs[i].first(), c_arg, out_regs[c_arg]);
++        }
++      }
++    }
++    // Break any cycles in the register moves and emit the in the
++    // proper order.
++    GrowableArray<MoveOperation*>* stores = get_store_order(tmp_vmreg);
++    for (int i = 0; i < stores->length(); i++) {
++      arg_order.push(stores->at(i)->src_index());
++      arg_order.push(stores->at(i)->dst_index());
++    }
++ }
++
++  // Collected all the move operations
++  void add_edge(int src_index, VMRegPair src, int dst_index, VMRegPair dst) {
++    if (src.first() == dst.first()) return;
++    edges.append(new MoveOperation(src_index, src, dst_index, dst));
++  }
++
++  // Walk the edges breaking cycles between moves.  The result list
++  // can be walked in order to produce the proper set of loads
++  GrowableArray<MoveOperation*>* get_store_order(VMRegPair temp_register) {
++    // Record which moves kill which values
++    GrowableArray<MoveOperation*> killer;
++    for (int i = 0; i < edges.length(); i++) {
++      MoveOperation* s = edges.at(i);
++      assert(killer.at_grow(s->dst_id(), NULL) == NULL, "only one killer");
++      killer.at_put_grow(s->dst_id(), s, NULL);
++    }
++    assert(killer.at_grow(MoveOperation::get_id(temp_register), NULL) == NULL,
++           "make sure temp isn't in the registers that are killed");
++
++    // create links between loads and stores
++    for (int i = 0; i < edges.length(); i++) {
++      edges.at(i)->link(killer);
++    }
++
++    // at this point, all the move operations are chained together
++    // in a doubly linked list.  Processing it backwards finds
++    // the beginning of the chain, forwards finds the end.  If there's
++    // a cycle it can be broken at any point,  so pick an edge and walk
++    // backward until the list ends or we end where we started.
++    GrowableArray<MoveOperation*>* stores = new GrowableArray<MoveOperation*>();
++    for (int e = 0; e < edges.length(); e++) {
++      MoveOperation* s = edges.at(e);
++      if (!s->is_processed()) {
++        MoveOperation* start = s;
++        // search for the beginning of the chain or cycle
++        while (start->prev() != NULL && start->prev() != s) {
++          start = start->prev();
++        }
++        if (start->prev() == s) {
++          start->break_cycle(temp_register);
++        }
++        // walk the chain forward inserting to store list
++        while (start != NULL) {
++          stores->append(start);
++          start->set_processed();
++          start = start->next();
++        }
++      }
++    }
++    return stores;
++  }
++};
++
++static void verify_oop_args(MacroAssembler* masm,
++                            const methodHandle& method,
++                            const BasicType* sig_bt,
++                            const VMRegPair* regs) {//__ stop("verify_oop_args");
++  Register temp_reg = rmethod;  // not part of any compiled calling seq
++  if (VerifyOops) {
++    for (int i = 0; i < method->size_of_parameters(); i++) {
++      if (sig_bt[i] == T_OBJECT ||
++          sig_bt[i] == T_ARRAY) {
++        VMReg r = regs[i].first();
++        assert(r->is_valid(), "bad oop arg");
++        if (r->is_stack()) {
++          __ ldptr(temp_reg, Address(esp, r->reg2stack() * VMRegImpl::stack_slot_size));
++          __ verify_oop(temp_reg);
++        } else {
++          __ verify_oop(r->as_Register());
++        }
++      }
++    }
++  }
++}
++
++static void gen_special_dispatch(MacroAssembler* masm,
++                                 const methodHandle& method,
++                                 const BasicType* sig_bt,
++                                 const VMRegPair* regs) {SCOPEMARK_NAME(gen_special_dispatch, masm);
++  verify_oop_args(masm, method, sig_bt, regs);
++  vmIntrinsics::ID iid = method->intrinsic_id();
++  //__ stop("gen_special_dispatch");
++  // Now write the args into the outgoing interpreter space
++  bool     has_receiver   = false;
++  Register receiver_reg   = noreg;
++  int      member_arg_pos = -1;
++  Register member_reg     = noreg;
++  int      ref_kind       = MethodHandles::signature_polymorphic_intrinsic_ref_kind(iid);
++  if (ref_kind != 0) {
++    member_arg_pos = method->size_of_parameters() - 1;  // trailing MemberName argument
++    member_reg = rmethod;  // known to be free at this point
++    has_receiver = MethodHandles::ref_kind_has_receiver(ref_kind);
++  } else if (iid == vmIntrinsics::_invokeBasic) {
++    has_receiver = true;
++  } else {
++    fatal("unexpected intrinsic id %d", iid);
++  }
++
++  if (member_reg != noreg) {
++    // Load the member_arg into register, if necessary.
++    SharedRuntime::check_member_name_argument_is_last_argument(method, sig_bt, regs);
++    VMReg r = regs[member_arg_pos].first();
++    if (r->is_stack()) {
++      __ ldptr(member_reg, Address(esp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      member_reg = r->as_Register();
++    }
++  }
++
++  if (has_receiver) {
++    // Make sure the receiver is loaded into a register.
++    assert(method->size_of_parameters() > 0, "oob");
++    assert(sig_bt[0] == T_OBJECT, "receiver argument must be an object");
++    VMReg r = regs[0].first();
++    assert(r->is_valid(), "bad receiver arg");
++    if (r->is_stack()) {
++      // Porting note:  This assumes that compiled calling conventions always
++      // pass the receiver oop in a register.  If this is not true on some
++      // platform, pick a temp and load the receiver from stack.
++      fatal("receiver always in a register");
++      receiver_reg = j_rarg0;  // known to be free at this point
++      __ ldptr(receiver_reg, Address(esp, r->reg2stack() * VMRegImpl::stack_slot_size));
++    } else {
++      // no data motion is needed
++      receiver_reg = r->as_Register();
++    }
++  }
++
++  // Figure out which address we are really jumping to:
++  MethodHandles::generate_method_handle_dispatch(masm, iid,
++                                                 receiver_reg, member_reg, /*for_compiler_entry:*/ true);
++}
++
++// ---------------------------------------------------------------------------
++// Generate a native wrapper for a given method.  The method takes arguments
++// in the Java compiled code convention, marshals them to the native
++// convention (handlizes oops, etc), transitions to native, makes the call,
++// returns to java state (possibly blocking), unhandlizes any result and
++// returns.
++//
++// Critical native functions are a shorthand for the use of
++// GetPrimtiveArrayCritical and disallow the use of any other JNI
++// functions.  The wrapper is expected to unpack the arguments before
++// passing them to the callee and perform checks before and after the
++// native call to ensure that they GCLocker
++// lock_critical/unlock_critical semantics are followed.  Some other
++// parts of JNI setup are skipped like the tear down of the JNI handle
++// block and the check for pending exceptions it's impossible for them
++// to be thrown.
++//
++// They are roughly structured like this:
++//    if (GCLocker::needs_gc())
++//      SharedRuntime::block_for_jni_critical();
++//    tranistion to thread_in_native
++//    unpack arrray arguments and call native entry point
++//    check for safepoint in progress
++//    check if any thread suspend flags are set
++//      call into JVM and possible unlock the JNI critical
++//      if a GC was suppressed while in the critical native.
++//    transition back to thread_in_Java
++//    return to caller
++//
++nmethod* SharedRuntime::generate_native_wrapper(MacroAssembler* masm,
++                                                const methodHandle& method,
++                                                int compile_id,
++                                                BasicType* in_sig_bt,
++                                                VMRegPair* in_regs,
++                                                BasicType ret_type,
++                                                address critical_entry) {//__ stop("generate_native_wrapper");
++  if (method->is_method_handle_intrinsic()) {
++    vmIntrinsics::ID iid = method->intrinsic_id();
++    intptr_t start = (intptr_t)__ pc();
++    int vep_offset = ((intptr_t)__ pc()) - start;
++    gen_special_dispatch(masm,
++                         method,
++                         in_sig_bt,
++                         in_regs);
++    int frame_complete = ((intptr_t)__ pc()) - start;  // not complete, period
++    __ flush();
++    int stack_slots = SharedRuntime::out_preserve_stack_slots();  // no out slots at all, actually
++    return nmethod::new_native_nmethod(method,
++                                       compile_id,
++                                       masm->code(),
++                                       vep_offset,
++                                       frame_complete,
++                                       stack_slots / VMRegImpl::slots_per_word,
++                                       in_ByteSize(-1),
++                                       in_ByteSize(-1),
++                                       (OopMapSet*)NULL);
++  }
++  bool is_critical_native = true;
++  address native_func = critical_entry;
++  if (native_func == NULL) {
++    native_func = method->native_function();
++    is_critical_native = false;
++  }
++  assert(native_func != NULL, "must have function");
++
++  // An OopMap for lock (and class if static)
++  OopMapSet *oop_maps = new OopMapSet();
++  intptr_t start = (intptr_t)__ pc();
++
++  // We have received a description of where all the java arg are located
++  // on entry to the wrapper. We need to convert these args to where
++  // the jni function will expect them. To figure out where they go
++  // we convert the java signature to a C signature by inserting
++  // the hidden arguments as arg[0] and possibly arg[1] (static method)
++
++  const int total_in_args = method->size_of_parameters();
++  int total_c_args = total_in_args;
++  if (!is_critical_native) {
++    total_c_args += 1;
++    if (method->is_static()) {
++      total_c_args++;
++    }
++  } else {
++    for (int i = 0; i < total_in_args; i++) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        total_c_args++;
++      }
++    }
++  }
++
++  BasicType* out_sig_bt = NEW_RESOURCE_ARRAY(BasicType, total_c_args);
++  VMRegPair* out_regs   = NEW_RESOURCE_ARRAY(VMRegPair, total_c_args);
++  BasicType* in_elem_bt = NULL;
++
++  int argc = 0;
++  if (!is_critical_native) {
++    out_sig_bt[argc++] = T_ADDRESS;
++    if (method->is_static()) {
++      out_sig_bt[argc++] = T_OBJECT;
++    }
++
++    for (int i = 0; i < total_in_args ; i++ ) {
++      out_sig_bt[argc++] = in_sig_bt[i];
++    }
++  } else {
++    Thread* THREAD = Thread::current();
++    in_elem_bt = NEW_RESOURCE_ARRAY(BasicType, total_in_args);
++    SignatureStream ss(method->signature());
++    for (int i = 0; i < total_in_args ; i++ ) {
++      if (in_sig_bt[i] == T_ARRAY) {
++        // Arrays are passed as int, elem* pair
++        out_sig_bt[argc++] = T_INT;
++        out_sig_bt[argc++] = T_ADDRESS;
++        Symbol* atype = ss.as_symbol(CHECK_NULL);
++        const char* at = atype->as_C_string();
++        if (strlen(at) == 2) {
++          assert(at[0] == '[', "must be");
++          switch (at[1]) {
++            case 'B': in_elem_bt[i]  = T_BYTE; break;
++            case 'C': in_elem_bt[i]  = T_CHAR; break;
++            case 'D': in_elem_bt[i]  = T_DOUBLE; break;
++            case 'F': in_elem_bt[i]  = T_FLOAT; break;
++            case 'I': in_elem_bt[i]  = T_INT; break;
++            case 'J': in_elem_bt[i]  = T_LONG; break;
++            case 'S': in_elem_bt[i]  = T_SHORT; break;
++            case 'Z': in_elem_bt[i]  = T_BOOLEAN; break;
++            default: ShouldNotReachHere();
++          }
++        }
++      } else {
++        out_sig_bt[argc++] = in_sig_bt[i];
++        in_elem_bt[i] = T_VOID;
++      }
++      if (in_sig_bt[i] != T_VOID) {
++        assert(in_sig_bt[i] == ss.type(), "must match");
++        ss.next();
++      }
++    }
++  }
++  
++  // Now figure out where the args must be stored and how much stack space
++  // they require.
++  int out_arg_slots;
++  out_arg_slots = c_calling_convention(out_sig_bt, out_regs, NULL, total_c_args);
++
++  // Compute framesize for the wrapper.  We need to handlize all oops in
++  // incoming registers
++
++  // Calculate the total number of stack slots we will need.
++
++  // First count the abi requirement plus all of the outgoing args
++  int stack_slots = SharedRuntime::out_preserve_stack_slots() + out_arg_slots;
++
++  // Now the space for the inbound oop handle area
++  int total_save_slots = 6 * VMRegImpl::slots_per_word;  // 6 arguments passed in registers
++  if (is_critical_native) {
++    // Critical natives may have to call out so they need a save area
++    // for register arguments.
++    int double_slots = 0;
++    int single_slots = 0;
++    for ( int i = 0; i < total_in_args; i++) {
++      if (in_regs[i].first()->is_Register()) {
++        const Register reg = in_regs[i].first()->as_Register();
++        switch (in_sig_bt[i]) {
++          case T_BOOLEAN:
++          case T_BYTE:
++          case T_SHORT:
++          case T_CHAR:
++          case T_INT:  single_slots++; break;
++          case T_ARRAY:  // specific to LP64 (7145024)
++          case T_LONG: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        switch (in_sig_bt[i]) {
++          case T_FLOAT:  single_slots++; break;
++          case T_DOUBLE: double_slots++; break;
++          default:  ShouldNotReachHere();
++        }
++      } else if (in_regs[i].first()->is_FloatRegister()) {
++        ShouldNotReachHere();
++      }
++    }
++    total_save_slots = double_slots * 2 + single_slots;
++    // align the save area
++    if (double_slots != 0) {
++      stack_slots = align_up(stack_slots, 2);
++    }
++  }
++
++  int oop_handle_offset = stack_slots;
++  stack_slots += total_save_slots;
++
++  // Now any space we need for handlizing a klass if static method
++
++  int klass_slot_offset = 0;
++  int klass_offset = -1;
++  int lock_slot_offset = 0;
++  bool is_static = false;
++
++  if (method->is_static()) {
++    klass_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++    klass_offset = klass_slot_offset * VMRegImpl::stack_slot_size;
++    is_static = true;
++  }
++
++  // Plus a lock if needed
++
++  if (method->is_synchronized()) {
++    lock_slot_offset = stack_slots;
++    stack_slots += VMRegImpl::slots_per_word;
++  }
++
++  // Now a place (+2) to save return values or temp during shuffling
++  // + 4 for return address (which we own) and saved rbp
++  stack_slots += 6;// swjdk8 is 2+6, but i think 6 is enough
++
++  // Ok The space we have allocated will look like:
++  //
++  //
++  // FP-> |                     |
++  //      |---------------------|
++  //      | 2 slots for moves   |
++  //      |---------------------|
++  //      | lock box (if sync)  |
++  //      |---------------------| <- lock_slot_offset
++  //      | klass (if static)   |
++  //      |---------------------| <- klass_slot_offset
++  //      | oopHandle area      |
++  //      |---------------------| <- oop_handle_offset (6 java arg registers)
++  //      | outbound memory     |
++  //      | based arguments     |
++  //      |                     |
++  //      |---------------------|
++  //      |                     |
++  // SP-> | out_preserved_slots |
++  //
++  //
++
++
++  // Now compute actual number of stack words we need rounding to make
++  // stack properly aligned.
++  stack_slots = align_up(stack_slots, StackAlignmentInSlots);
++
++  int stack_size = stack_slots * VMRegImpl::stack_slot_size;
++
++  // First thing make an ic check to see if we should even be here
++
++  // We are free to use all registers as temps without saving them and
++  // restoring them except rbp. rbp is the only callee save register
++  // as far as the interpreter and the compiler(s) are concerned.
++
++
++  const Register ic_reg = V0;
++  const Register receiver = j_rarg0;
++
++  Label hit;
++  Label exception_pending;
++  //__ stop("generate_native_wrapper");
++  assert_different_registers(ic_reg, receiver, rscratch3);
++  __ verify_oop(receiver);
++  __ load_klass(rscratch3, receiver);
++//  __ cmpl(ic_reg, rscratch3);
++//  __ jcc(Assembler::equal, hit);
++    __ beq_c(ic_reg, rscratch3, hit);
++  __ jump(RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
++
++  // Verified entry point must be aligned
++  __ align(8);
++
++  __ bind(hit);
++
++  int vep_offset = ((intptr_t)__ pc()) - start;
++
++#ifdef COMPILER1
++  // For Object.hashCode, System.identityHashCode try to pull hashCode from object header if available.
++  if ((InlineObjectHash && method->intrinsic_id() == vmIntrinsics::_hashCode) || (method->intrinsic_id() == vmIntrinsics::_identityHashCode)) {
++    inline_check_hashcode_from_object_header(masm, method, j_rarg0 /*obj_reg*/, rax /*result*/);
++  }
++#endif // COMPILER1
++
++  // The instruction at the verified entry point must be 5 bytes or longer
++  // because it can be patched on the fly by make_non_entrant. The stack bang
++  // instruction fits that requirement.
++
++  // Generate stack overflow check
++
++  if (UseStackBanging) {
++    __ bang_stack_with_offset((int)JavaThread::stack_shadow_zone_size());
++  } else {
++    // need a 5 byte instruction to allow MT safe patching to non-entrant
++    Unimplemented();
++  }
++
++  // Generate a new frame for the wrapper.
++  __ enter();
++  // -2 because return address is already present and so is saved rbp
++  __ subptr(rsp, stack_size - 2*wordSize, rsp);
++
++  // Frame is now completed as far as size and linkage.
++  int frame_complete = ((intptr_t)__ pc()) - start;
++
++    //if (UseRTMLocking) {
++      // Abort RTM transaction before calling JNI
++      // because critical section will be large and will be
++      // aborted anyway. Also nmethod could be deoptimized.
++      //__ xabort(0); jzy?
++    //}
++//TODO:sw don't aligned? jzy
++/*#ifdef ASSERT
++    {
++      Label L;
++      __ movl(V0, esp);
++      __ andptr(V0, -16, V0); // must be 16 byte boundary (see amd64 ABI)
++      __ cmpptr(V0, esp);
++      __ jcc(Assembler::equal, L);
++      __ stop("improperly aligned stack");
++      __ bind(L);
++    }
++#endif*/ /* ASSERT */
++
++
++  // We use r14 as the oop handle for the receiver/klass
++  // It is callee save so it survives the call to native
++
++  const Register oop_handle_reg = r14;//TODO:check jzy
++
++  if (is_critical_native SHENANDOAHGC_ONLY(&& !UseShenandoahGC)) {
++    check_needs_gc_for_critical_native(masm, stack_slots, total_c_args, total_in_args,
++                                       oop_handle_offset, oop_maps, in_regs, in_sig_bt);
++  }
++
++  //
++  // We immediately shuffle the arguments so that any vm call we have to
++  // make from here on out (sync slow path, jvmti, etc.) we will have
++  // captured the oops from our caller and have a valid oopMap for
++  // them.
++
++  // -----------------
++  // The Grand Shuffle
++
++  // The Java calling convention is either equal (linux) or denser (win64) than the
++  // c calling convention. However the because of the jni_env argument the c calling
++  // convention always has at least one more (and two for static) arguments than Java.
++  // Therefore if we move the args from java -> c backwards then we will never have
++  // a register->register conflict and we don't have to build a dependency graph
++  // and figure out how to break any cycles.
++  //
++
++  // Record esp-based slot for receiver on stack for non-static methods
++  int receiver_offset = -1;
++
++  // This is a trick. We double the stack slots so we can claim
++  // the oops in the caller's frame. Since we are sure to have
++  // more args than the caller doubling is enough to make
++  // sure we can capture all the incoming oop args from the
++  // caller.
++  //
++  OopMap* map = new OopMap(stack_slots * 2, 0 /* arg_slots*/);
++
++  // Mark location of rbp (someday)
++  // map->set_callee_saved(VMRegImpl::stack2reg( stack_slots - 2), stack_slots * 2, 0, vmreg(rbp));
++
++  // Use eax, ebx as temporaries during any memory-memory moves we have to do
++  // All inbound args are referenced based on rbp and all outbound args via rsp.
++
++
++#ifdef ASSERT
++  bool reg_destroyed[RegisterImpl::number_of_registers];
++  bool freg_destroyed[FloatRegisterImpl::number_of_registers];
++  for ( int r = 0 ; r < RegisterImpl::number_of_registers ; r++ ) {
++    reg_destroyed[r] = false;
++  }
++  for ( int f = 0 ; f < FloatRegisterImpl::number_of_registers ; f++ ) {
++    freg_destroyed[f] = false;
++  }
++
++#endif /* ASSERT */
++
++  // This may iterate in two different directions depending on the
++  // kind of native it is.  The reason is that for regular JNI natives
++  // the incoming and outgoing registers are offset upwards and for
++  // critical natives they are offset down.
++  GrowableArray<int> arg_order(2 * total_in_args);
++#if INCLUDE_SHENANDOAHGC
++  // Inbound arguments that need to be pinned for critical natives
++  GrowableArray<int> pinned_args(total_in_args);
++  // Current stack slot for storing register based array argument
++  int pinned_slot = oop_handle_offset;
++#endif
++  VMRegPair tmp_vmreg;
++  tmp_vmreg.set2(rbx->as_VMReg());
++
++  if (!is_critical_native) {
++    for (int i = total_in_args - 1, c_arg = total_c_args - 1; i >= 0; i--, c_arg--) {
++      arg_order.push(i);
++      arg_order.push(c_arg);
++    }
++  } else {
++    // Compute a valid move order, using tmp_vmreg to break any cycles
++    ComputeMoveOrder cmo(total_in_args, in_regs, total_c_args, out_regs, in_sig_bt, arg_order, tmp_vmreg);
++  }
++
++  int temploc = -1;
++  for (int ai = 0; ai < arg_order.length(); ai += 2) {
++    int i = arg_order.at(ai);
++    int c_arg = arg_order.at(ai + 1);
++    __ block_comment(err_msg("move %d -> %d", i, c_arg));
++    if (c_arg == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // This arg needs to be moved to a temporary
++      __ movl(tmp_vmreg.first()->as_Register(), in_regs[i].first()->as_Register());
++      in_regs[i] = tmp_vmreg;
++      temploc = i;
++      continue;
++    } else if (i == -1) {
++      assert(is_critical_native, "should only be required for critical natives");
++      // Read from the temporary location
++      assert(temploc != -1, "must be valid");
++      i = temploc;
++      temploc = -1;
++    }
++#ifdef ASSERT
++    if (in_regs[i].first()->is_Register()) {
++      assert(!reg_destroyed[in_regs[i].first()->as_Register()->encoding()], "destroyed reg!");
++    } else if (in_regs[i].first()->is_FloatRegister()) {
++      assert(!freg_destroyed[in_regs[i].first()->as_FloatRegister()->encoding()], "destroyed reg!");
++    }
++    if (out_regs[c_arg].first()->is_Register()) {
++      reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++    } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++      freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++    }
++#endif /* ASSERT */
++    switch (in_sig_bt[i]) {
++      case T_ARRAY:
++        if (is_critical_native) {
++#if INCLUDE_SHENANDOAHGC
++          // pin before unpack
++          if (UseShenandoahGC) {
++            assert(pinned_slot <= stack_slots, "overflow");
++            ShenandoahBarrierSet::assembler()->pin_critical_native_array(masm, in_regs[i], pinned_slot);
++            pinned_args.append(i);
++          }
++#endif
++          unpack_array_argument(masm, in_regs[i], in_elem_bt[i], out_regs[c_arg + 1], out_regs[c_arg]);
++          c_arg++;
++#ifdef ASSERT
++          if (out_regs[c_arg].first()->is_Register()) {
++            reg_destroyed[out_regs[c_arg].first()->as_Register()->encoding()] = true;
++          } else if (out_regs[c_arg].first()->is_FloatRegister()) {
++            freg_destroyed[out_regs[c_arg].first()->as_FloatRegister()->encoding()] = true;
++          }
++#endif
++          break;
++        }
++      case T_OBJECT:
++        assert(!is_critical_native, "no oop arguments");
++        object_move(masm, map, oop_handle_offset, stack_slots, in_regs[i], out_regs[c_arg],
++                    ((i == 0) && (!is_static)),
++                    &receiver_offset);
++        break;
++      case T_VOID:
++        break;
++
++      case T_FLOAT:
++        float_move(masm, in_regs[i], out_regs[c_arg]);
++          break;
++
++      case T_DOUBLE:
++        assert( i + 1 < total_in_args &&
++                in_sig_bt[i + 1] == T_VOID &&
++                out_sig_bt[c_arg+1] == T_VOID, "bad arg list");
++        double_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_LONG :
++        long_move(masm, in_regs[i], out_regs[c_arg]);
++        break;
++
++      case T_ADDRESS: assert(false, "found T_ADDRESS in java args");
++
++      default:
++        move32_64(masm, in_regs[i], out_regs[c_arg]);
++    }
++  }
++
++  int c_arg;
++
++  // Pre-load a static method's oop into r14.  Used both by locking code and
++  // the normal JNI call code.
++  if (!is_critical_native) {
++    // point c_arg at the first arg that is already loaded in case we
++    // need to spill before we call out
++    c_arg = total_c_args - total_in_args;
++
++    if (method->is_static()) {
++
++      //  load oop into a register
++      int oop_index = __ oop_recorder()->find_index(JNIHandles::make_local((method->method_holder())->java_mirror()));
++      RelocationHolder rspec = oop_Relocation::spec(oop_index);
++      __ relocate(rspec);
++      __ prepare_patch_li48(oop_handle_reg, (long)JNIHandles::make_local((method->method_holder())->java_mirror()));
++
++      // Now handlize the static class mirror it's known not-null.
++      __ stptr(oop_handle_reg, Address(rsp, klass_offset));
++      map->set_oop(VMRegImpl::stack2reg(klass_slot_offset));
++
++      // Now get the handle
++      __ lea(oop_handle_reg, Address(rsp, klass_offset));
++      // store the klass handle as second argument
++      __ movl(c_rarg1, oop_handle_reg);
++      // and protect the arg if we must spill
++      c_arg--;
++    }
++  } else {
++    // For JNI critical methods we need to save all registers in save_args.
++    c_arg = 0;
++  }
++
++  // Change state to native (we save the return address in the thread, since it might not
++  // be pushed on the stack when we do a a stack traversal). It is enough that the pc()
++  // points into the right code segment. It does not have to be the correct return pc.
++  // We use the same pc/oopMap repeatedly when we call out
++
++  //intptr_t the_pc = (intptr_t) __ pc();
++  //oop_maps->add_gc_map(the_pc - start, map);
++  Label native_return;
++  __ set_last_Java_frame(rsp, noreg, native_return, rscratch3);
++  //__ set_last_Java_frame(rsp, noreg, (address)the_pc, rscratch3);
++  // We have all of the arguments setup at this point. We must not touch any register
++  // argument registers at this point (what if we save/restore them there are no oop?
++
++  {
++    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry),
++      rthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // RedefineClasses() tracing support for obsolete method entry
++  if (log_is_enabled(Trace, redefine, class, obsolete)) {
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++      CAST_FROM_FN_PTR(address, SharedRuntime::rc_trace_method_entry),
++      rthread, c_rarg1);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++  }
++
++  // Lock a synchronized method
++
++  // Register definitions used by locking and unlocking
++
++  const Register swap_reg = V0;  // Must use rax for cmpxchg instruction ?jzy
++  const Register obj_reg  = rmethod;  // Will contain the oop
++  const Register lock_reg = rbcp;  // Address of compiler lock object (BasicLock)
++  const Register old_hdr  = rbcp;  // value of old header at unlock time
++
++  Label slow_path_lock;
++  Label lock_done;
++
++  if (method->is_synchronized()) {
++    assert(!is_critical_native, "unhandled");
++
++
++    const int mark_word_offset = BasicLock::displaced_header_offset_in_bytes();
++
++    // Get the handle (the 2nd argument)
++    __ movl(oop_handle_reg, c_rarg1);
++
++    // Get address of the box
++
++    __ lea(lock_reg, Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size));
++
++    // Load the oop from the handle
++    __ ldptr(obj_reg, Address(oop_handle_reg, 0));
++
++    if (UseBiasedLocking) {
++      __ biased_locking_enter(lock_reg, obj_reg, swap_reg, rscratch1, false, lock_done, &slow_path_lock);
++    }
++
++    // Load immediate 1 into swap_reg %rax
++    __ movw(swap_reg, 1);
++
++    // Load (object->mark() | 1) into swap_reg %rax
++    __ ldptr(rscratch3, Address(obj_reg, oopDesc::mark_offset_in_bytes()));
++    __ orptr(swap_reg, rscratch3, swap_reg);
++
++    // Save (object->mark() | 1) into BasicLock's displaced header
++    __ stptr(swap_reg, Address(lock_reg, mark_word_offset));
++
++    if (os::is_MP()) {
++      __ lock();
++    }
++
++    // Address -> lock_reg if lock_reg == swap_reg else swap_reg = lock_reg
++    __ cmpxchg(lock_reg, Address(obj_reg, oopDesc::mark_offset_in_bytes()), swap_reg);
++    __ jcc(Assembler::success, lock_done);
++
++    // Hmm should this move to the slow path code area???
++
++    // Test if the oopMark is an obvious stack pointer, i.e.,
++    //  1) (mark & 3) == 0, and
++    //  2) rsp <= mark < mark + os::pagesize()
++    // These 3 tests can be done by evaluating the following
++    // expression: ((mark - rsp) & (3 - os::vm_page_size())),
++    // assuming both stack pointer and pagesize have their
++    // least significant 2 bits clear.
++    // NOTE: the oopMark is in swap_reg %rax as the result of cmpxchg
++    //TODO:here is similar to interpreter ? jzy
++    __ subptr(swap_reg, esp, swap_reg);
++    __ andptr(swap_reg, 3 - os::vm_page_size(), swap_reg);
++
++    // Save the test result, for recursive case, the result is zero
++    __ stptr(swap_reg, Address(lock_reg, mark_word_offset));
++    __ jcc(Assembler::notEqual, slow_path_lock, swap_reg);
++
++    // Slow path will re-enter here
++
++    __ bind(lock_done);
++  }
++
++
++  // Finally just about ready to make the JNI call
++
++
++  // get JNIEnv* which is first argument to native
++  if (!is_critical_native) {
++    __ lea(c_rarg0, Address(rthread, in_bytes(JavaThread::jni_environment_offset())));
++  }
++
++  // Now set thread in native
++  __ stw(_thread_in_native, Address(rthread, JavaThread::thread_state_offset()));
++
++  __ call(RuntimeAddress(native_func), &native_return);
++  // Verify or restore cpu control state after JNI call
++  //__ restore_cpu_control_state_after_jni(); //sw need this? jzy
++  
++  //intptr_t return_pc = (intptr_t) __ pc();
++  oop_maps->add_gc_map( __ offset(native_return, (address)start), map);
++  
++  // Unpack native results.
++  switch (ret_type) {
++  case T_BOOLEAN: __ c2bool(V0);            break;
++  case T_CHAR   : __ zapnot(V0, 0x3, V0);   break;
++  case T_BYTE   : __ sign_extend_byte (V0); break;
++  case T_SHORT  : __ sign_extend_short(V0); break;
++  case T_INT    : /* nothing to do */       break;
++  case T_DOUBLE :
++  case T_FLOAT  :
++    // Result is in xmm0 we'll save as needed
++    break;
++  case T_ARRAY:                 // Really a handle
++  case T_OBJECT:                // Really a handle
++      break; // can't de-handlize until after safepoint check
++  case T_VOID: break;
++  case T_LONG: break;
++  default       : ShouldNotReachHere();
++  }
++
++#if INCLUDE_SHENANDOAHGC
++  if (UseShenandoahGC) {
++    // unpin pinned arguments
++    pinned_slot = oop_handle_offset;
++    if (pinned_args.length() > 0) {
++      // save return value that may be overwritten otherwise.
++      save_native_result(masm, ret_type, stack_slots);
++      for (int index = 0; index < pinned_args.length(); index ++) {
++        int i = pinned_args.at(index);
++        assert(pinned_slot <= stack_slots, "overflow");
++        ShenandoahBarrierSet::assembler()->unpin_critical_native_array(masm, in_regs[i], pinned_slot);
++      }
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++  }
++#endif
++  // Switch thread to "native transition" state before reading the synchronization state.
++  // This additional state is necessary because reading and testing the synchronization
++  // state is not atomic w.r.t. GC, as this scenario demonstrates:
++  //     Java thread A, in _thread_in_native state, loads _not_synchronized and is preempted.
++  //     VM thread changes sync state to synchronizing and suspends threads for GC.
++  //     Thread A is resumed to finish this native method, but doesn't block here since it
++  //     didn't see any synchronization is progress, and escapes.
++  __ stw(_thread_in_native_trans, Address(rthread, JavaThread::thread_state_offset()));
++
++  if(os::is_MP()) {
++    if (UseMembar) {
++      // Force this write out before the read below
++      __ memb();
++    } else {
++      // Write serialization page so VM thread can do a pseudo remote membar.
++      // We use the current thread pointer to calculate a thread specific
++      // offset to write to within the page. This minimizes bus traffic
++      // due to cache line collision.
++      __ serialize_memory(rthread, rscratch3);
++    }
++  }
++
++  Label after_transition;
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll(slow_path, rthread, rscratch3);
++
++    __ cmpw(Address(rthread, JavaThread::suspend_flags_offset()), R0);
++    __ jcc(Assembler::equal, Continue);
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception and forward it
++    // and never return here preventing us from clearing _last_native_pc down below.
++    // Also can't use call_VM_leaf either as it will check to see if rsi & rdi are
++    // preserved and correspond to the bcp/locals pointers. So we do a runtime call
++    // by hand.
++    //
++    //__ vzeroupper();
++    Register r12 = r12_heapbase;
++    
++    save_native_result(masm, ret_type, stack_slots);
++    __ movl(c_rarg0, rthread);
++    __ movl(r12, esp); // remember sp
++    __ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows
++    __ andptr(esp, -16, esp); // align stack as required by ABI
++    if (!is_critical_native) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
++    } else {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans_and_transition)));
++    }
++    __ movl(esp, r12); // restore sp
++    __ reinit_heapbase();
++    // Restore any method result value
++    restore_native_result(masm, ret_type, stack_slots);
++
++    if (is_critical_native) {
++      // The call above performed the transition to thread_in_Java so
++      // skip the transition logic below.
++      __ jmp(after_transition);
++    }
++
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ stw(_thread_in_Java, Address(rthread, JavaThread::thread_state_offset()));
++  __ bind(after_transition);
++
++  Label reguard;
++  Label reguard_done;
++  __ cmpw(Address(rthread, JavaThread::stack_guard_state_offset()), JavaThread::stack_guard_yellow_reserved_disabled);
++  __ jcc(Assembler::equal, reguard);
++  __ bind(reguard_done);
++
++  // native result if any is live
++  Register rax = V0;
++  Register rcx = rscratch3;
++  Register r12 = r12_heapbase;
++    
++  // Unlock
++  Label unlock_done;
++  Label slow_path_unlock;
++  if (method->is_synchronized()) {
++
++    // Get locked oop from the handle we passed to jni
++    __ ldptr(obj_reg, Address(oop_handle_reg, 0));
++
++    Label done;
++
++    if (UseBiasedLocking) {
++      __ biased_locking_exit(obj_reg, old_hdr, done);
++    }
++
++    // Simple recursive lock?
++
++    __ cmpptr(Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size), R0);
++    __ jcc(Assembler::equal, done);
++
++    // Must save rax if if it is live now because cmpxchg must use it
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++
++    // get address of the stack lock
++    __ lea(rax, Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    //  get old displaced header
++    __ ldptr(old_hdr, Address(rax, 0));
++
++    // Atomic swap old header if oop still contains the stack lock
++    if (os::is_MP()) {
++      __ lock();
++    }
++
++    __ cmpxchg(old_hdr, Address(obj_reg, oopDesc::mark_offset_in_bytes()), rax);
++    __ jcc(Assembler::failed, slow_path_unlock);
++
++    // slow path re-enters here
++    __ bind(unlock_done);
++    if (ret_type != T_FLOAT && ret_type != T_DOUBLE && ret_type != T_VOID) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ bind(done);
++
++  }
++  {
++    SkipIfEqual skip(masm, &DTraceMethodProbes, false);
++    save_native_result(masm, ret_type, stack_slots);
++    __ mov_metadata(c_rarg1, method());
++    __ call_VM_leaf(
++         CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit),
++         rthread, c_rarg1);
++    restore_native_result(masm, ret_type, stack_slots);
++  }
++
++  __ reset_last_Java_frame(false);
++
++  // Unbox oop result, e.g. JNIHandles::resolve value.
++  if (ret_type == T_OBJECT || ret_type == T_ARRAY) {
++    __ resolve_jobject(rax /* value */,
++                       rthread /* thread */,
++                       rcx /* tmp */);
++  }
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ stptr(R0, Address(rthread, JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  if (!is_critical_native) {
++    // reset handle block
++    __ ldptr(rcx, Address(rthread, JavaThread::active_handles_offset()));
++    __ stw(R0, Address(rcx, JNIHandleBlock::top_offset_in_bytes()));
++  }
++
++  // pop our frame
++
++  __ leave();
++
++  if (!is_critical_native) {
++    // Any exception pending?
++    __ cmpptr(Address(rthread, in_bytes(Thread::pending_exception_offset())), R0);
++    __ jcc(Assembler::notEqual, exception_pending);
++  }
++
++  // Return
++
++  __ ret();
++
++  // Unexpected paths are out of line and go here
++
++  if (!is_critical_native) {
++    // forward the exception
++    __ bind(exception_pending);
++
++    // and forward the exception
++    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++  }
++
++  // Slow path locking & unlocking
++  if (method->is_synchronized()) {
++
++    // BEGIN Slow path lock
++    __ bind(slow_path_lock);
++
++    // has last_Java_frame setup. No exceptions so do vanilla call not call_VM
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++
++    // protect the args we've loaded
++    save_args(masm, total_c_args, c_arg, out_regs);
++
++    __ movl(c_rarg0, obj_reg);
++    __ movl(c_rarg1, lock_reg);
++    __ movl(c_rarg2, rthread);
++
++    // Not a leaf but we have last_Java_frame setup as we want
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_locking_C), 3);
++    restore_args(masm, total_c_args, c_arg, out_regs);
++
++#ifdef ASSERT
++    { Label L;
++    __ cmpptr(Address(rthread, in_bytes(Thread::pending_exception_offset())), R0);
++    __ jcc(Assembler::equal, L);
++    __ stop("no pending exception allowed on exit from monitorenter");
++    __ bind(L);
++    }
++#endif
++    __ jmp(lock_done);
++
++    // END Slow path lock
++
++    // BEGIN Slow path unlock
++    __ bind(slow_path_unlock);
++
++    // If we haven't already saved the native result we must save it now as xmm registers
++    // are still exposed.
++    //__ vzeroupper();
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      save_native_result(masm, ret_type, stack_slots);
++    }
++
++    __ lea(c_rarg1, Address(esp, lock_slot_offset * VMRegImpl::stack_slot_size));
++    Register r12 = r12_heapbase;
++
++    __ movl(c_rarg0, obj_reg);
++    __ movl(c_rarg2, rthread);
++    __ movl(r12, esp); // remember sp
++    __ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows
++    __ andptr(esp, -16, esp); // align stack as required by ABI
++
++    // Save pending exception around call to VM (which contains an EXCEPTION_MARK)
++    // NOTE that obj_reg == rbx currently
++    __ ldptr(rbx, Address(rthread, in_bytes(Thread::pending_exception_offset())));
++    __ stptr(R0, Address(rthread, in_bytes(Thread::pending_exception_offset())));
++
++    // args are (oop obj, BasicLock* lock, JavaThread* thread)
++    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C)));
++    __ movl(esp, r12); // restore sp
++    __ reinit_heapbase();
++#ifdef ASSERT
++    {
++      Label L;
++      __ cmpptr(Address(rthread, in_bytes(Thread::pending_exception_offset())), R0);
++      __ jcc(Assembler::equal, L);
++      __ stop("no pending exception allowed on exit complete_monitor_unlocking_C");
++      __ bind(L);
++    }
++#endif /* ASSERT */
++
++    __ stptr(rbx, Address(rthread, in_bytes(Thread::pending_exception_offset())));
++
++    if (ret_type == T_FLOAT || ret_type == T_DOUBLE ) {
++      restore_native_result(masm, ret_type, stack_slots);
++    }
++    __ jmp(unlock_done);
++
++    // END Slow path unlock
++
++  } // synchronized
++
++  // SLOW PATH Reguard the stack if needed
++
++  __ bind(reguard);
++  //__ vzeroupper();
++  save_native_result(masm, ret_type, stack_slots);
++  __ movl(r12, esp); // remember sp
++  __ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows
++  __ andptr(esp, -16, esp); // align stack as required by ABI
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
++  __ movl(esp, r12); // restore sp
++  __ reinit_heapbase();
++  restore_native_result(masm, ret_type, stack_slots);
++  // and continue
++  __ jmp(reguard_done);
++
++
++
++  __ flush();
++
++  nmethod *nm = nmethod::new_native_nmethod(method,
++                                            compile_id,
++                                            masm->code(),
++                                            vep_offset,
++                                            frame_complete,
++                                            stack_slots / VMRegImpl::slots_per_word,
++                                            (is_static ? in_ByteSize(klass_offset) : in_ByteSize(receiver_offset)),
++                                            in_ByteSize(lock_slot_offset*VMRegImpl::stack_slot_size),
++                                            oop_maps);
++
++  if (is_critical_native) {
++    nm->set_lazy_critical_native(true);
++  }
++
++  return nm;
++
++}
++
++// this function returns the adjust size (in number of words) to a c2i adapter
++// activation for use during deoptimization
++int Deoptimization::last_frame_adjust(int callee_parameters, int callee_locals ) {
++  return (callee_locals - callee_parameters) * Interpreter::stackElementWords;
++}
++
++
++uint SharedRuntime::out_preserve_stack_slots() {
++  return 0;
++}
++
++//------------------------------generate_deopt_blob----------------------------
++void SharedRuntime::generate_deopt_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  int pad = 0;
++#if INCLUDE_JVMCI
++  if (EnableJVMCI || UseAOT) {
++    pad += 512; // Increase the buffer size when compiling for JVMCI
++  }
++#endif
++  CodeBuffer buffer("deopt_blob", 2560*2+pad, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++  int frame_size_in_words;
++  OopMap* map = NULL;
++  OopMapSet *oop_maps = new OopMapSet();
++
++  // -------------
++  // This code enters when returning to a de-optimized nmethod.  A return
++  // address has been pushed on the the stack, and return values are in
++  // registers.
++  // If we are doing a normal deopt then we were called from the patched
++  // nmethod from the point we returned to the nmethod. So the return
++  // address on the stack is wrong by NativeCall::instruction_size
++  // We will adjust the value so it looks like we have the original return
++  // address on the stack (like when we eagerly deoptimized).
++  // In the case of an exception pending when deoptimizing, we enter
++  // with a return address on the stack that points after the call we patched
++  // into the exception handler. We have the following register state from,
++  // e.g., the forward exception stub (see stubGenerator_x86_64.cpp).
++  //    rax: exception oop
++  //    rbx: exception handler
++  //    rdx: throwing pc
++  // So in this case we simply jam rdx into the useless return address and
++  // the stack looks just like we want.
++  //
++  // At this point we need to de-opt.  We save the argument return
++  // registers.  We call the first C routine, fetch_unroll_info().  This
++  // routine captures the return values and returns a structure which
++  // describes the current frame size and the sizes of all replacement frames.
++  // The current frame is compiled code and may contain many inlined
++  // functions, each with their own JVM state.  We pop the current frame, then
++  // push all the new frames.  Then we call the C routine unpack_frames() to
++  // populate these frames.  Finally unpack_frames() returns us the new target
++  // address.  Notice that callee-save registers are BLOWN here; they have
++  // already been captured in the vframeArray at the time the return PC was
++  // patched.
++  address start = __ pc();
++  Label cont;
++  //__ stop("check generate_deopt_blob @jzy");
++  // Prolog for non exception case!
++  //__ subptr(RA, NativeCall::return_address_offset, RA); //TODO:need this? jzy
++  
++  // Save everything in sight.
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
++  Register r14 = rlocals; //should be callee saved jzy
++  //Register rax = V0;
++  //Register rdi = A0;
++  //Register rsi = A1;
++  //Register rdx = A2;//?is OK? jzy
++  //Register rcx = A3;
++  //Register rbx = rmethod;
++  //__ stop("generate_deopt_blob");
++  // Normal deoptimization.  Save exec mode for unpack_frames.
++  __ mov_immediate32(r14, Deoptimization::Unpack_deopt); // callee-saved why r14? jzy
++  __ jmp(cont);
++
++  int reexecute_offset = __ pc() - start;
++#if INCLUDE_JVMCI && !defined(COMPILER1)
++  if (EnableJVMCI && UseJVMCICompiler) {
++    // JVMCI does not use this kind of deoptimization
++    __ should_not_reach_here();
++  }
++#endif
++
++  // Reexecute case
++  // return address is the pc describes what bci to do re-execute at
++
++  // No need to update map as each call to save_live_registers will produce identical oopmap
++  (void) RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
++
++  __ mov_immediate32(r14, Deoptimization::Unpack_reexecute); // callee-saved
++  __ jmp(cont);
++
++#if INCLUDE_JVMCI
++  Label after_fetch_unroll_info_call;
++  int implicit_exception_uncommon_trap_offset = 0;
++  int uncommon_trap_offset = 0;
++
++  if (EnableJVMCI || UseAOT) {
++    implicit_exception_uncommon_trap_offset = __ pc() - start;
++
++    __ pushptr(Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++    __ stptr(R0, Address(rthread, in_bytes(JavaThread::jvmci_implicit_exception_pc_offset())));
++
++    uncommon_trap_offset = __ pc() - start;
++
++    // Save everything in sight.
++    RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
++    // fetch_unroll_info needs to call last_java_frame()
++    Label retaddr;
++    __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT);
++
++    __ ldws(c_rarg1, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
++    __ mov_immediate32(rscratch3, -1);
++    __ stw(rscratch3, Address(rthread, in_bytes(JavaThread::pending_deoptimization_offset())));
++
++    __ mov_immediate32(r14, (int32_t)Deoptimization::Unpack_reexecute);
++    __ movl(c_rarg0, rthread);
++    __ movl(c_rarg2, r14); // exec mode
++    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)), &retaddr);
++    oop_maps->add_gc_map( __ offset(retaddr, start), map->deep_copy());
++
++    __ reset_last_Java_frame(false);
++
++    __ jmp(after_fetch_unroll_info_call);
++  } // EnableJVMCI
++#endif // INCLUDE_JVMCI
++
++  int exception_offset = __ pc() - start;
++
++  // Prolog for exception case
++
++  // all registers are dead at this entry point, except for rax, and
++  // rdx which contain the exception oop and exception pc
++  // respectively.  Set them in TLS and fall thru to the
++  // unpack_with_exception_in_tls entry point.
++  //__ stop("here should check:which is rax & rdx in sw?");
++  __ stptr(rdx, Address(rthread, JavaThread::exception_pc_offset()));
++  __ stptr(rax, Address(rthread, JavaThread::exception_oop_offset()));
++
++  int exception_in_tls_offset = __ pc() - start;
++
++  // new implementation because exception oop is now passed in JavaThread
++
++  // Prolog for exception case
++  // All registers must be preserved because they might be used by LinearScan
++  // Exceptiop oop and throwing PC are passed in JavaThread
++  // tos: stack at point of call to method that threw the exception (i.e. only
++  // args are on the stack, no return address)
++
++  // make room on stack for the return address
++  // It will be patched later with the throwing pc. The correct value is not
++  // available now because loading it from memory would destroy registers.
++  //__ push(0); //TODO:check return address? jzy
++  //__ stop("generate_deopt_blob:exception here need check: jzy");
++  // Save everything in sight.
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
++
++  // Now it is safe to overwrite any register
++
++  // Deopt during an exception.  Save exec mode for unpack_frames.
++  __ mov_immediate32u(r14, Deoptimization::Unpack_exception); // callee-saved
++
++  // load throwing pc from JavaThread and patch it as the return address
++  // of the current frame. Then clear the field in JavaThread
++
++  __ ldptr(rdx, Address(rthread, JavaThread::exception_pc_offset()));
++  __ stptr(rdx, Address(rfp, wordSize));
++  __ stptr(R0, Address(rthread, JavaThread::exception_pc_offset()));
++
++#ifdef ASSERT
++  // verify that there is really an exception oop in JavaThread
++  __ ldptr(rax, Address(rthread, JavaThread::exception_oop_offset()));
++  __ verify_oop(rax);
++
++  // verify that there is no pending exception
++  Label no_pending_exception;
++  __ ldptr(rax, Address(rthread, Thread::pending_exception_offset()));
++  __ jcc(Assembler::zero, no_pending_exception, rax);
++  __ stop("must not have pending exception here");
++  __ bind(no_pending_exception);
++#endif
++
++  __ bind(cont);
++
++  // Call C code.  Need thread and this frame, but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.
++  //
++  // UnrollBlock* fetch_unroll_info(JavaThread* thread)
++
++  // fetch_unroll_info needs to call last_java_frame().
++  //__ stop("TODO:check how set pc? jzy");
++  Label retaddr;
++  __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT);
++#ifdef ASSERT
++  { Label L;
++    __ cmpptr(Address(rthread,
++                    JavaThread::last_Java_fp_offset()),
++            R0);
++    __ jcc(Assembler::equal, L);
++    __ stop("SharedRuntime::generate_deopt_blob: last_Java_fp not cleared");
++    __ bind(L);
++  }
++#endif // ASSERT
++  __ movl(c_rarg0, rthread);
++  __ movl(c_rarg1, r14); // exec_mode
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::fetch_unroll_info)), &retaddr);
++
++  // Need to have an oopmap that tells fetch_unroll_info where to
++  // find any register it might need.
++  oop_maps->add_gc_map(__ offset(retaddr, start), map);
++
++  __ reset_last_Java_frame(false);
++
++#if INCLUDE_JVMCI
++  if (EnableJVMCI || UseAOT) {
++    __ bind(after_fetch_unroll_info_call);
++  }
++#endif
++
++  // Load UnrollBlock* into rdi
++  __ movl(rdi, rax);
++
++  __ ldws(r14, Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()));
++   Label noException;
++  __ cmpw(r14, Deoptimization::Unpack_exception);   // Was exception pending?
++  __ jcc(Assembler::notEqual, noException);
++  __ ldptr(rax, Address(rthread, JavaThread::exception_oop_offset()));
++  // QQQ this is useless it was NULL above
++  __ ldptr(rdx, Address(rthread, JavaThread::exception_pc_offset()));
++  __ stptr(R0,  Address(rthread, JavaThread::exception_oop_offset()));
++  __ stptr(R0,  Address(rthread, JavaThread::exception_pc_offset()));
++
++  __ verify_oop(rax);
++
++  // Overwrite the result registers with the exception results.
++  __ stptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes()));
++  // I think this is useless
++  assert(rdx == A2, "rdx not a2 register");
++  __ stptr(rdx, Address(esp, RegisterSaver::a2_offset_in_bytes()));
++
++  __ bind(noException);
++
++  // Only register save data is on the stack.
++  // Now restore the result registers.  Everything else is either dead
++  // or captured in the vframeArray.
++  RegisterSaver::restore_result_registers(masm);
++
++  // All of the register save area has been popped of the stack. Only the
++  // return address remains.
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++  //
++  // Note: by leaving the return address of self-frame on the stack
++  // and using the size of frame 2 to adjust the stack
++  // when we are done the return to frame 3 will still be on the stack.
++
++  // Pop deoptimized frame
++  __ ldws(rcx, Address(rdi, Deoptimization::UnrollBlock::size_of_deoptimized_frame_offset_in_bytes()));
++  __ addptr(esp, rcx, esp);
++  //__ ldl(RA, - 1 * wordSize, esp);
++  //__ ldl(rfp, - 2 * wordSize, esp);
++  
++  // rsp should be pointing at the return address to the caller (3)
++
++  // Pick up the initial fp we should save
++  // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
++  __ ldptr(rfp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
++
++/*#ifdef ASSERT
++  {
++    Label L;
++    __ movl(rscratch3, rfp);  
++    __ ldptr(rfp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
++    __ cmpl(rscratch3, rfp);
++    __ jcc(Assembler::zero, L);
++    __ stop("fp not equal @jzy");
++    __ bind(L);
++  }
++#endif*/
++  
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  if (UseStackBanging) {
++    __ ldws(rbx, Address(rdi, Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
++    __ bang_stack_size(rbx, rcx);
++  }
++#endif
++
++  // Load address of array of frame pcs into rcx
++  __ ldptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
++
++  // Trash the old pc
++  __ addptr(esp, wordSize, esp);
++
++  // Load address of array of frame sizes into rsi
++  __ ldptr(rsi, Address(rdi, Deoptimization::UnrollBlock::frame_sizes_offset_in_bytes()));
++
++  // Load counter into rdx
++  __ ldws(rdx, Address(rdi, Deoptimization::UnrollBlock::number_of_frames_offset_in_bytes()));
++
++  // Now adjust the caller's stack to make up for the extra locals
++  // but record the original sp so that we can save it in the skeletal interpreter
++  // frame and the stack walking of interpreter_sender will get the unextended sp
++  // value and not the "real" sp value.
++
++  const Register sender_sp = rscratch3;//? jzy
++  //__ stop("which register can we use?");
++  __ movl(sender_sp, esp);
++  __ ldws(rbx, Address(rdi,
++                       Deoptimization::UnrollBlock::
++                       caller_adjustment_offset_in_bytes()));
++  __ subptr(esp, rbx, esp);
++
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ldptr(rbx, Address(rsi, 0));      // Load frame size
++  __ subptr(rbx, 2*wordSize, rbx);           // We'll push pc and ebp by hand
++  __ ldptr(RA, Address(rcx, 0));          // Save return address
++  __ enter();                           // Save old & set new ebp
++  __ subptr(esp, rbx, esp);                  // Prolog
++  // This value is corrected by layout_activation_impl
++  __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ stptr(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // Make it walkable
++  __ movl(sender_sp, esp);               // Pass sender_sp to next frame
++  __ addptr(rsi, wordSize, rsi);             // Bump array pointer (sizes)
++  __ addptr(rcx, wordSize, rcx);             // Bump array pointer (pcs)
++  __ decrementl(rdx);                   // Decrement counter
++  __ jcc(Assembler::notZero, loop, rdx);
++  __ ldptr(RA, Address(rcx, 0));          // Save final return address
++
++  // Re-push self-frame
++  __ enter();                           // Save old & set new ebp
++
++  // Allocate a full sized register save area.
++  // Return address and rbp are in place, so we allocate two less words.
++  __ subptr(esp, (frame_size_in_words - 2) * wordSize, esp);
++
++  // Restore frame locals after moving the frame
++  __ store_double(FSF, Address(esp, RegisterSaver::fsf_offset_in_bytes()));
++  __ stptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes()));
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  //
++  // void Deoptimization::unpack_frames(JavaThread* thread, int exec_mode)
++
++  // Use rbp because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(esp, rfp, the_pc, rscratch3);
++
++  //__ andptr(esp, -(StackAlignmentInBytes), esp);  // Fix stack alignment as required by ABI
++  __ movl(c_rarg0, rthread);
++  __ movl(c_rarg1, r14); // second arg: exec_mode r14 should be callee saved  in sw jzy
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));
++  // Revert SP alignment after call since we're going to do some SP relative addressing below
++  __ ldptr(esp, Address(rthread, JavaThread::last_Java_sp_offset()));
++
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame TODO:need modify add_gc_map's offset?
++  oop_maps->add_gc_map(the_pc - start,
++                       new OopMap( frame_size_in_words, 0 ));
++
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
++
++  // Collect return values
++  __ load_double(FSF, Address(esp, RegisterSaver::fsf_offset_in_bytes()));
++  __ ldptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes()));
++  // I think this is useless (throwing pc?)
++  __ ldptr(rdx, Address(esp, RegisterSaver::a2_offset_in_bytes()));
++
++  // Pop self-frame.
++  __ leave();                           // Epilog
++
++  // Jump to interpreter
++  __ ret();
++
++  // Make sure all code is generated
++  masm->flush();
++  //__ stop("DeoptimizationBlob::create(unimplement): jzy");
++  _deopt_blob = DeoptimizationBlob::create(&buffer, oop_maps, 0, exception_offset, reexecute_offset, frame_size_in_words);
++  _deopt_blob->set_unpack_with_exception_in_tls_offset(exception_in_tls_offset);
++#if INCLUDE_JVMCI
++  if (EnableJVMCI || UseAOT) {
++    _deopt_blob->set_uncommon_trap_offset(uncommon_trap_offset);
++    _deopt_blob->set_implicit_exception_uncommon_trap_offset(implicit_exception_uncommon_trap_offset);
++  }
++#endif
++}
++
++#ifdef COMPILER2
++//------------------------------generate_uncommon_trap_blob--------------------
++void SharedRuntime::generate_uncommon_trap_blob() {
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  CodeBuffer buffer("uncommon_trap_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++
++  address start = __ pc();
++  //Register rax  = V0;
++  //Register rbx  = rmethod;
++  //Register rdi  = c_rarg0;
++  //Register rsi  = c_rarg1;
++  //Register rcx  = c_rarg3;
++  //Register rdx  = rscratch2_AT;
++  //Register rbp  = rfp;//lsp??
++  //__ stop("generate_uncommon_trap_blob");
++
++  // Push self-frame.  We get here with a return address in RA
++  __ enter();
++  // we don't expect an arg reg save area
++#ifndef PRODUCT
++  assert(frame::arg_reg_save_area_bytes == 0, "not expecting frame reg save area");
++#endif
++
++  // compiler left unloaded_class_index in j_rarg0 move to where the
++  // runtime expects it.
++  __ movws(c_rarg1, j_rarg0);
++  
++  Label retaddr;
++  __ set_last_Java_frame(esp, noreg, retaddr, rscratch3);
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // capture callee-saved registers as well as return values.
++  // Thread is in rdi already.
++  //
++  // UnrollBlock* uncommon_trap(JavaThread* thread, jint unloaded_class_index);
++
++  __ movl(c_rarg0, rthread);
++  __ mov_immediate32s(c_rarg2, Deoptimization::Unpack_uncommon_trap);
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::uncommon_trap)), &retaddr);
++
++  // Set an oopmap for the call site
++  OopMapSet* oop_maps = new OopMapSet();
++  OopMap* map = new OopMap(SimpleRuntimeFrame::framesize, 0);
++
++  // location of rbp is known implicitly by the frame sender code
++
++  oop_maps->add_gc_map(__ offset(retaddr, start), map);//TODO:check jzy
++
++  __ reset_last_Java_frame(false);
++
++  // Load UnrollBlock* into rdi
++  __ movl(rdi, rax);
++
++#ifdef ASSERT
++  { Label L;
++    __ mov_immediate32(rscratch3, (int32_t)Deoptimization::Unpack_uncommon_trap);
++    __ cmpptr(Address(rdi, Deoptimization::UnrollBlock::unpack_kind_offset_in_bytes()), rscratch3);
++    __ jcc(Assembler::equal, L);
++    __ stop("SharedRuntime::generate_deopt_blob: expected Unpack_uncommon_trap");
++    __ bind(L);
++  }
++#endif
++
++  // Pop all the frames we must move/replace.
++  //
++  // Frame picture (youngest to oldest)
++  // 1: self-frame (no frame link)
++  // 2: deopting frame  (no frame link)
++  // 3: caller of deopting frame (could be compiled/interpreted).
++
++  // Pop self-frame.  We have no frame, and must rely only on rax and rsp.
++  __ addptr(esp, (SimpleRuntimeFrame::framesize - 2) << LogBytesPerInt, esp); // Epilog!
++
++  // Pop deoptimized frame (int)
++  __ ldwu(rcx, Address(rdi,
++                       Deoptimization::UnrollBlock::
++                       size_of_deoptimized_frame_offset_in_bytes()));
++  __ addptr(esp, rcx, esp);
++
++  // rsp should be pointing at the return address to the caller (3)
++
++  // Pick up the initial fp we should save
++  // restore rbp before stack bang because if stack overflow is thrown it needs to be pushed (and preserved)
++  __ ldptr(rfp, Address(rdi, Deoptimization::UnrollBlock::initial_info_offset_in_bytes()));
++
++#ifdef ASSERT
++  // Compilers generate code that bang the stack by as much as the
++  // interpreter would need. So this stack banging should never
++  // trigger a fault. Verify that it does not on non product builds.
++  if (UseStackBanging) {
++    __ ldws(rbx, Address(rdi ,Deoptimization::UnrollBlock::total_frame_sizes_offset_in_bytes()));
++    __ bang_stack_size(rbx, rcx);
++  }
++#endif
++
++  // Load address of array of frame pcs into rcx (address*)
++  __ ldptr(rcx, Address(rdi, Deoptimization::UnrollBlock::frame_pcs_offset_in_bytes()));
++
++  // Trash the return pc
++  __ addptr(esp, wordSize, esp);
++
++  // Load address of array of frame sizes into rsi (intptr_t*)
++  __ ldptr(rsi, Address(rdi, Deoptimization::UnrollBlock:: frame_sizes_offset_in_bytes()));
++
++  // Counter
++  __ ldws(rdx, Address(rdi, Deoptimization::UnrollBlock:: number_of_frames_offset_in_bytes())); // (int)
++
++  // Now adjust the caller's stack to make up for the extra locals but
++  // record the original sp so that we can save it in the skeletal
++  // interpreter frame and the stack walking of interpreter_sender
++  // will get the unextended sp value and not the "real" sp value.
++
++  const Register sender_sp = rsender;
++  __ movl(sender_sp, esp);
++  __ ldws(rbx, Address(rdi, Deoptimization::UnrollBlock:: caller_adjustment_offset_in_bytes())); // (int)
++  __ subptr(esp, rbx, esp);
++
++  // Push interpreter frames in a loop
++  Label loop;
++  __ bind(loop);
++  __ ldptr(rbx, Address(rsi, 0)); // Load frame size
++  __ subptr(rbx, 2 * wordSize, rbx);    // We'll push pc and rbp by hand
++  __ ldptr(RA, Address(rcx, 0));     // Save return address
++  __ enter();                      // Save old & set new rbp
++  __ subptr(esp, rbx, esp);             // Prolog
++  __ stptr(sender_sp,
++           Address(rbp, frame::interpreter_frame_sender_sp_offset * wordSize));            // Make it walkable
++  // This value is corrected by layout_activation_impl
++  __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ movl(sender_sp, esp);          // Pass sender_sp to next frame
++  __ addptr(rsi, wordSize, rsi);        // Bump array pointer (sizes)
++  __ addptr(rcx, wordSize, rcx);        // Bump array pointer (pcs)
++  __ decrementl(rdx);              // Decrement counter
++  __ jcc(Assembler::notZero, loop, rdx);
++  __ ldptr(RA, Address(rcx, 0));     // Save final return address
++
++  // Re-push self-frame
++  __ enter();                 // Save old & set new rbp
++  __ subptr(esp, (SimpleRuntimeFrame::framesize - 4) << LogBytesPerInt, esp, rscratch3);
++                              // Prolog
++
++  // Use rbp because the frames look interpreted now
++  // Save "the_pc" since it cannot easily be retrieved using the last_java_SP after we aligned SP.
++  // Don't need the precise return PC here, just precise enough to point into this code blob.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(esp, rfp, the_pc, rscratch3);
++
++  // Call C code.  Need thread but NOT official VM entry
++  // crud.  We cannot block on this call, no GC can happen.  Call should
++  // restore return values to their stack-slots with the new SP.
++  // Thread is in rdi already.
++  //
++  // BasicType unpack_frames(JavaThread* thread, int exec_mode);
++
++  //__ andptr(esp, -(StackAlignmentInBytes), esp); // Align SP as required by ABI
++  __ movl(c_rarg0, rthread);
++  __ mov_immediate64(c_rarg1, Deoptimization::Unpack_uncommon_trap);
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, Deoptimization::unpack_frames)));//TODO:here need to modify offset? swjdk8 modifies this offset jzy
++
++  // Set an oopmap for the call site
++  // Use the same PC we used for the last java frame
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));
++
++  // Clear fp AND pc
++  __ reset_last_Java_frame(true);
++
++  // Pop self-frame.
++  __ leave();                 // Epilog
++
++  // Jump to interpreter
++  __ ret();
++
++  // Make sure all code is generated
++  masm->flush();
++
++  _uncommon_trap_blob =  UncommonTrapBlob::create(&buffer, oop_maps,
++                                                 SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
++
++
++//------------------------------generate_handler_blob------
++//
++// Generate a special Compile2Runtime blob that saves all registers,
++// and setup oopmap.
++//
++SafepointBlob* SharedRuntime::generate_handler_blob(address call_ptr, int poll_type) {
++  assert(StubRoutines::forward_exception_entry() != NULL,
++         "must be generated before");
++
++  ResourceMark rm;
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map;
++
++  // Allocate space for the code.  Setup code generation tools.
++  CodeBuffer buffer("handler_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++  address start   = __ pc();
++  address call_pc = NULL;
++  int frame_size_in_words;
++  bool cause_return = (poll_type == POLL_AT_RETURN);
++  bool save_vectors = (poll_type == POLL_AT_VECTOR_LOOP);
++  Register rbx = rmethod;
++  Register rax = V0;
++  
++//  if (UseRTMLocking) {
++//    // Abort RTM transaction before calling runtime
++//    // because critical section will be large and will be
++//    // aborted anyway. Also nmethod could be deoptimized.
++//    __ xabort(0);
++//  }
++//__ stop("generate_handler_blob");
++  // Make room for return address (or push it again)
++  //if (!cause_return) {
++   //__ push(rbx);
++    //__ ldptr(RA, Address(rthread, JavaThread::saved_exception_pc_offset()));//TODO:need this? jzy
++  //}
++
++  // Save registers, fpu state, and flags
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words, save_vectors);
++
++  // The following is basically a call_VM.  However, we need the precise
++  // address of the call in order to generate an oopmap. Hence, we do all the
++  // work outselves.
++  Label retaddr;
++  __ set_last_Java_frame(esp, noreg, retaddr, rscratch3);
++
++  // The return address must always be correct so that frame constructor never
++  // sees an invalid pc.
++
++  if (!cause_return) {
++    // Get the return pc saved by the signal handler and stash it in its appropriate place on the stack.
++    // Additionally, rbx is a callee saved register and we can look at it later to determine
++    // if someone changed the return address for us!
++    __ ldptr(rbx, Address(rthread, JavaThread::saved_exception_pc_offset()));
++    __ stptr(rbx, Address(rfp, wordSize));
++  }
++
++  // Do the call
++  __ movl(c_rarg0, rthread);
++  __ call(RuntimeAddress(call_ptr), &retaddr);
++
++  // Set an oopmap for the call site.  This oopmap will map all
++  // oop-registers and debug-info registers as callee-saved.  This
++  // will allow deoptimization at this safepoint to find all possible
++  // debug-info recordings, as well as let GC find all oops.
++
++  oop_maps->add_gc_map( __ offset(retaddr, start), map);
++
++  Label noException;
++
++  __ reset_last_Java_frame(false);
++
++  __ cmpptr(Address(rthread, Thread::pending_exception_offset()), R0);
++  __ jcc(Assembler::equal, noException);
++
++  // Exception pending
++
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++
++  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++
++  // No exception case
++  __ bind(noException);
++
++  Label no_adjust, bail, no_prefix, not_special;
++  if (SafepointMechanism::uses_thread_local_poll() && !cause_return) {
++    // If our stashed return pc was modified by the runtime we avoid touching it
++    __ cmpptr(rbx, Address(rfp, wordSize));
++    __ jcc(Assembler::notEqual, no_adjust);
++
++    // Skip over the poll instruction.
++    // See NativeInstruction::is_safepoint_poll()
++#ifdef ASSERT
++    // Verify the correct encoding of the poll we're about to skip.
++    __ ldwu(rscratch3, Address(rbx, 0));
++    __ srll(rscratch3, 26, rscratch2_AT);//get op
++    __ cmpl(rscratch2_AT, Assembler::op_ldw);
++    __ jcc(Assembler::notEqual, bail);
++    
++    __ srll(rscratch3, 21, rscratch2_AT);//get ra
++    __ andw(rscratch2_AT, 0x1F, rscratch2_AT);
++    __ cmpl(rscratch2_AT, 25);//rscratch3 t11
++    __ jcc(Assembler::notEqual, bail);
++    
++    __ andw(rscratch3, 0xFFFF, rscratch2_AT);
++//    __ cmpl(rscratch2_AT, R0);//disp t11
++//    __ jcc(Assembler::notEqual, bail);
++      __ bne_l(rscratch2_AT, bail);
++    
++#endif
++    // Adjust return pc forward to step over the safepoint poll instruction
++    __ stop("TODO:need check jzy");
++    __ addptr(rbx, 4, rbx); //TODO:refactor need const jzy
++    __ stptr(rbx, Address(rfp, wordSize));
++  }
++
++  __ bind(no_adjust);
++  // Normal exit, restore registers and exit.
++  RegisterSaver::restore_live_registers(masm, save_vectors);
++  __ ret();
++
++#ifdef ASSERT
++  __ bind(bail);
++  __ stop("Attempting to adjust pc to skip safepoint poll but the return point is not what we expected");
++#endif
++
++  // Make sure all code is generated
++  masm->flush();
++
++  // Fill-out other meta info
++  return SafepointBlob::create(&buffer, oop_maps, frame_size_in_words);
++}
++
++//
++// generate_resolve_blob - call resolution (static/virtual/opt-virtual/ic-miss
++//
++// Generate a stub that calls into vm to find out the proper destination
++// of a java call. All the argument registers are live at this point
++// but since this is generic code we don't know what they are and the caller
++// must do any gc of the args.
++//
++RuntimeStub* SharedRuntime::generate_resolve_blob(address destination, const char* name) {
++  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
++
++  // allocate space for the code
++  ResourceMark rm;
++
++  CodeBuffer buffer(name, 2000, 512);
++  MacroAssembler* masm                = new MacroAssembler(&buffer);
++
++  int frame_size_in_words;
++
++  OopMapSet *oop_maps = new OopMapSet();
++  OopMap* map = NULL;
++
++  //int start = __ offset();
++  address start_pc = __ pc();
++  Register rbx = rmethod;
++  Register rax = V0;
++  
++  map = RegisterSaver::save_live_registers(masm, 0, &frame_size_in_words);
++
++  int frame_complete = __ offset();
++
++  Label retaddr;
++  //__ debug_stop("TODO:how set pc? jzy generate_resolve_blob");
++  __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT);
++
++  __ movl(c_rarg0, rthread);
++
++  __ call(RuntimeAddress(destination), &retaddr);
++
++  // Set an oopmap for the call site.
++  // We need this not only for callee-saved registers, but also for volatile
++  // registers that the compiler might be keeping live across a safepoint.
++  // sw need setfpec1, so we should -4.
++  oop_maps->add_gc_map( __ offset(retaddr, start_pc), map);
++
++  // rax contains the address we are going to jump to assuming no exception got installed
++
++  // clear last_Java_sp
++  __ reset_last_Java_frame(false);
++  // check for pending exceptions
++  Label pending;
++  __ cmpptr(Address(rthread, Thread::pending_exception_offset()), (int32_t)NULL_WORD);
++  __ jcc(Assembler::notEqual, pending);
++
++  // get the returned Method*
++  __ get_vm_result_2(rbx, rthread);
++  __ stptr(rbx, Address(esp, RegisterSaver::rmethod_offset_in_bytes()));
++
++  __ stptr(rax, Address(esp, RegisterSaver::v0_offset_in_bytes()));
++
++  RegisterSaver::restore_live_registers(masm);
++
++  // We are back the the original state on entry and ready to go.
++
++  __ jmp(rax);
++
++  // Pending exception after the safepoint
++
++  __ bind(pending);
++
++  RegisterSaver::restore_live_registers(masm);
++
++  // exception pending => remove activation and forward to exception handler
++
++  __ stptr(R0, Address(rthread, JavaThread::vm_result_offset()));
++
++  __ ldptr(rax, Address(rthread, Thread::pending_exception_offset()));
++  __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++
++  // -------------
++  // make sure all code is generated
++  masm->flush();
++
++  // return the  blob
++  // frame_size_words or bytes??
++  return RuntimeStub::new_runtime_stub(name, &buffer, frame_complete, frame_size_in_words, oop_maps, true);
++}
++
++/* ================================= CRC32 ================================= */
++/* ========================================================================= */
++static const int  crc_table[8][256] =
++        {
++                {
++                        0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
++                        0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
++                        0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
++                        0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
++                        0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
++                        0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
++                        0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
++                        0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
++                        0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
++                        0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
++                        0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
++                        0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
++                        0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
++                        0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
++                        0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
++                        0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
++                        0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
++                        0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
++                        0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
++                        0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
++                        0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
++                        0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
++                        0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
++                        0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
++                        0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
++                        0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
++                        0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
++                        0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
++                        0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
++                        0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
++                        0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
++                        0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
++                        0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
++                        0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
++                        0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
++                        0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
++                        0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
++                        0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
++                        0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
++                        0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
++                        0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
++                        0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
++                        0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
++                        0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
++                        0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
++                        0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
++                        0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
++                        0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
++                        0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
++                        0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
++                        0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
++                        0x2d02ef8dUL
++                },
++                {
++                        0x00000000UL, 0x191b3141UL, 0x32366282UL, 0x2b2d53c3UL, 0x646cc504UL,
++                        0x7d77f445UL, 0x565aa786UL, 0x4f4196c7UL, 0xc8d98a08UL, 0xd1c2bb49UL,
++                        0xfaefe88aUL, 0xe3f4d9cbUL, 0xacb54f0cUL, 0xb5ae7e4dUL, 0x9e832d8eUL,
++                        0x87981ccfUL, 0x4ac21251UL, 0x53d92310UL, 0x78f470d3UL, 0x61ef4192UL,
++                        0x2eaed755UL, 0x37b5e614UL, 0x1c98b5d7UL, 0x05838496UL, 0x821b9859UL,
++                        0x9b00a918UL, 0xb02dfadbUL, 0xa936cb9aUL, 0xe6775d5dUL, 0xff6c6c1cUL,
++                        0xd4413fdfUL, 0xcd5a0e9eUL, 0x958424a2UL, 0x8c9f15e3UL, 0xa7b24620UL,
++                        0xbea97761UL, 0xf1e8e1a6UL, 0xe8f3d0e7UL, 0xc3de8324UL, 0xdac5b265UL,
++                        0x5d5daeaaUL, 0x44469febUL, 0x6f6bcc28UL, 0x7670fd69UL, 0x39316baeUL,
++                        0x202a5aefUL, 0x0b07092cUL, 0x121c386dUL, 0xdf4636f3UL, 0xc65d07b2UL,
++                        0xed705471UL, 0xf46b6530UL, 0xbb2af3f7UL, 0xa231c2b6UL, 0x891c9175UL,
++                        0x9007a034UL, 0x179fbcfbUL, 0x0e848dbaUL, 0x25a9de79UL, 0x3cb2ef38UL,
++                        0x73f379ffUL, 0x6ae848beUL, 0x41c51b7dUL, 0x58de2a3cUL, 0xf0794f05UL,
++                        0xe9627e44UL, 0xc24f2d87UL, 0xdb541cc6UL, 0x94158a01UL, 0x8d0ebb40UL,
++                        0xa623e883UL, 0xbf38d9c2UL, 0x38a0c50dUL, 0x21bbf44cUL, 0x0a96a78fUL,
++                        0x138d96ceUL, 0x5ccc0009UL, 0x45d73148UL, 0x6efa628bUL, 0x77e153caUL,
++                        0xbabb5d54UL, 0xa3a06c15UL, 0x888d3fd6UL, 0x91960e97UL, 0xded79850UL,
++                        0xc7cca911UL, 0xece1fad2UL, 0xf5facb93UL, 0x7262d75cUL, 0x6b79e61dUL,
++                        0x4054b5deUL, 0x594f849fUL, 0x160e1258UL, 0x0f152319UL, 0x243870daUL,
++                        0x3d23419bUL, 0x65fd6ba7UL, 0x7ce65ae6UL, 0x57cb0925UL, 0x4ed03864UL,
++                        0x0191aea3UL, 0x188a9fe2UL, 0x33a7cc21UL, 0x2abcfd60UL, 0xad24e1afUL,
++                        0xb43fd0eeUL, 0x9f12832dUL, 0x8609b26cUL, 0xc94824abUL, 0xd05315eaUL,
++                        0xfb7e4629UL, 0xe2657768UL, 0x2f3f79f6UL, 0x362448b7UL, 0x1d091b74UL,
++                        0x04122a35UL, 0x4b53bcf2UL, 0x52488db3UL, 0x7965de70UL, 0x607eef31UL,
++                        0xe7e6f3feUL, 0xfefdc2bfUL, 0xd5d0917cUL, 0xcccba03dUL, 0x838a36faUL,
++                        0x9a9107bbUL, 0xb1bc5478UL, 0xa8a76539UL, 0x3b83984bUL, 0x2298a90aUL,
++                        0x09b5fac9UL, 0x10aecb88UL, 0x5fef5d4fUL, 0x46f46c0eUL, 0x6dd93fcdUL,
++                        0x74c20e8cUL, 0xf35a1243UL, 0xea412302UL, 0xc16c70c1UL, 0xd8774180UL,
++                        0x9736d747UL, 0x8e2de606UL, 0xa500b5c5UL, 0xbc1b8484UL, 0x71418a1aUL,
++                        0x685abb5bUL, 0x4377e898UL, 0x5a6cd9d9UL, 0x152d4f1eUL, 0x0c367e5fUL,
++                        0x271b2d9cUL, 0x3e001cddUL, 0xb9980012UL, 0xa0833153UL, 0x8bae6290UL,
++                        0x92b553d1UL, 0xddf4c516UL, 0xc4eff457UL, 0xefc2a794UL, 0xf6d996d5UL,
++                        0xae07bce9UL, 0xb71c8da8UL, 0x9c31de6bUL, 0x852aef2aUL, 0xca6b79edUL,
++                        0xd37048acUL, 0xf85d1b6fUL, 0xe1462a2eUL, 0x66de36e1UL, 0x7fc507a0UL,
++                        0x54e85463UL, 0x4df36522UL, 0x02b2f3e5UL, 0x1ba9c2a4UL, 0x30849167UL,
++                        0x299fa026UL, 0xe4c5aeb8UL, 0xfdde9ff9UL, 0xd6f3cc3aUL, 0xcfe8fd7bUL,
++                        0x80a96bbcUL, 0x99b25afdUL, 0xb29f093eUL, 0xab84387fUL, 0x2c1c24b0UL,
++                        0x350715f1UL, 0x1e2a4632UL, 0x07317773UL, 0x4870e1b4UL, 0x516bd0f5UL,
++                        0x7a468336UL, 0x635db277UL, 0xcbfad74eUL, 0xd2e1e60fUL, 0xf9ccb5ccUL,
++                        0xe0d7848dUL, 0xaf96124aUL, 0xb68d230bUL, 0x9da070c8UL, 0x84bb4189UL,
++                        0x03235d46UL, 0x1a386c07UL, 0x31153fc4UL, 0x280e0e85UL, 0x674f9842UL,
++                        0x7e54a903UL, 0x5579fac0UL, 0x4c62cb81UL, 0x8138c51fUL, 0x9823f45eUL,
++                        0xb30ea79dUL, 0xaa1596dcUL, 0xe554001bUL, 0xfc4f315aUL, 0xd7626299UL,
++                        0xce7953d8UL, 0x49e14f17UL, 0x50fa7e56UL, 0x7bd72d95UL, 0x62cc1cd4UL,
++                        0x2d8d8a13UL, 0x3496bb52UL, 0x1fbbe891UL, 0x06a0d9d0UL, 0x5e7ef3ecUL,
++                        0x4765c2adUL, 0x6c48916eUL, 0x7553a02fUL, 0x3a1236e8UL, 0x230907a9UL,
++                        0x0824546aUL, 0x113f652bUL, 0x96a779e4UL, 0x8fbc48a5UL, 0xa4911b66UL,
++                        0xbd8a2a27UL, 0xf2cbbce0UL, 0xebd08da1UL, 0xc0fdde62UL, 0xd9e6ef23UL,
++                        0x14bce1bdUL, 0x0da7d0fcUL, 0x268a833fUL, 0x3f91b27eUL, 0x70d024b9UL,
++                        0x69cb15f8UL, 0x42e6463bUL, 0x5bfd777aUL, 0xdc656bb5UL, 0xc57e5af4UL,
++                        0xee530937UL, 0xf7483876UL, 0xb809aeb1UL, 0xa1129ff0UL, 0x8a3fcc33UL,
++                        0x9324fd72UL
++                },
++                {
++                        0x00000000UL, 0x01c26a37UL, 0x0384d46eUL, 0x0246be59UL, 0x0709a8dcUL,
++                        0x06cbc2ebUL, 0x048d7cb2UL, 0x054f1685UL, 0x0e1351b8UL, 0x0fd13b8fUL,
++                        0x0d9785d6UL, 0x0c55efe1UL, 0x091af964UL, 0x08d89353UL, 0x0a9e2d0aUL,
++                        0x0b5c473dUL, 0x1c26a370UL, 0x1de4c947UL, 0x1fa2771eUL, 0x1e601d29UL,
++                        0x1b2f0bacUL, 0x1aed619bUL, 0x18abdfc2UL, 0x1969b5f5UL, 0x1235f2c8UL,
++                        0x13f798ffUL, 0x11b126a6UL, 0x10734c91UL, 0x153c5a14UL, 0x14fe3023UL,
++                        0x16b88e7aUL, 0x177ae44dUL, 0x384d46e0UL, 0x398f2cd7UL, 0x3bc9928eUL,
++                        0x3a0bf8b9UL, 0x3f44ee3cUL, 0x3e86840bUL, 0x3cc03a52UL, 0x3d025065UL,
++                        0x365e1758UL, 0x379c7d6fUL, 0x35dac336UL, 0x3418a901UL, 0x3157bf84UL,
++                        0x3095d5b3UL, 0x32d36beaUL, 0x331101ddUL, 0x246be590UL, 0x25a98fa7UL,
++                        0x27ef31feUL, 0x262d5bc9UL, 0x23624d4cUL, 0x22a0277bUL, 0x20e69922UL,
++                        0x2124f315UL, 0x2a78b428UL, 0x2bbade1fUL, 0x29fc6046UL, 0x283e0a71UL,
++                        0x2d711cf4UL, 0x2cb376c3UL, 0x2ef5c89aUL, 0x2f37a2adUL, 0x709a8dc0UL,
++                        0x7158e7f7UL, 0x731e59aeUL, 0x72dc3399UL, 0x7793251cUL, 0x76514f2bUL,
++                        0x7417f172UL, 0x75d59b45UL, 0x7e89dc78UL, 0x7f4bb64fUL, 0x7d0d0816UL,
++                        0x7ccf6221UL, 0x798074a4UL, 0x78421e93UL, 0x7a04a0caUL, 0x7bc6cafdUL,
++                        0x6cbc2eb0UL, 0x6d7e4487UL, 0x6f38fadeUL, 0x6efa90e9UL, 0x6bb5866cUL,
++                        0x6a77ec5bUL, 0x68315202UL, 0x69f33835UL, 0x62af7f08UL, 0x636d153fUL,
++                        0x612bab66UL, 0x60e9c151UL, 0x65a6d7d4UL, 0x6464bde3UL, 0x662203baUL,
++                        0x67e0698dUL, 0x48d7cb20UL, 0x4915a117UL, 0x4b531f4eUL, 0x4a917579UL,
++                        0x4fde63fcUL, 0x4e1c09cbUL, 0x4c5ab792UL, 0x4d98dda5UL, 0x46c49a98UL,
++                        0x4706f0afUL, 0x45404ef6UL, 0x448224c1UL, 0x41cd3244UL, 0x400f5873UL,
++                        0x4249e62aUL, 0x438b8c1dUL, 0x54f16850UL, 0x55330267UL, 0x5775bc3eUL,
++                        0x56b7d609UL, 0x53f8c08cUL, 0x523aaabbUL, 0x507c14e2UL, 0x51be7ed5UL,
++                        0x5ae239e8UL, 0x5b2053dfUL, 0x5966ed86UL, 0x58a487b1UL, 0x5deb9134UL,
++                        0x5c29fb03UL, 0x5e6f455aUL, 0x5fad2f6dUL, 0xe1351b80UL, 0xe0f771b7UL,
++                        0xe2b1cfeeUL, 0xe373a5d9UL, 0xe63cb35cUL, 0xe7fed96bUL, 0xe5b86732UL,
++                        0xe47a0d05UL, 0xef264a38UL, 0xeee4200fUL, 0xeca29e56UL, 0xed60f461UL,
++                        0xe82fe2e4UL, 0xe9ed88d3UL, 0xebab368aUL, 0xea695cbdUL, 0xfd13b8f0UL,
++                        0xfcd1d2c7UL, 0xfe976c9eUL, 0xff5506a9UL, 0xfa1a102cUL, 0xfbd87a1bUL,
++                        0xf99ec442UL, 0xf85cae75UL, 0xf300e948UL, 0xf2c2837fUL, 0xf0843d26UL,
++                        0xf1465711UL, 0xf4094194UL, 0xf5cb2ba3UL, 0xf78d95faUL, 0xf64fffcdUL,
++                        0xd9785d60UL, 0xd8ba3757UL, 0xdafc890eUL, 0xdb3ee339UL, 0xde71f5bcUL,
++                        0xdfb39f8bUL, 0xddf521d2UL, 0xdc374be5UL, 0xd76b0cd8UL, 0xd6a966efUL,
++                        0xd4efd8b6UL, 0xd52db281UL, 0xd062a404UL, 0xd1a0ce33UL, 0xd3e6706aUL,
++                        0xd2241a5dUL, 0xc55efe10UL, 0xc49c9427UL, 0xc6da2a7eUL, 0xc7184049UL,
++                        0xc25756ccUL, 0xc3953cfbUL, 0xc1d382a2UL, 0xc011e895UL, 0xcb4dafa8UL,
++                        0xca8fc59fUL, 0xc8c97bc6UL, 0xc90b11f1UL, 0xcc440774UL, 0xcd866d43UL,
++                        0xcfc0d31aUL, 0xce02b92dUL, 0x91af9640UL, 0x906dfc77UL, 0x922b422eUL,
++                        0x93e92819UL, 0x96a63e9cUL, 0x976454abUL, 0x9522eaf2UL, 0x94e080c5UL,
++                        0x9fbcc7f8UL, 0x9e7eadcfUL, 0x9c381396UL, 0x9dfa79a1UL, 0x98b56f24UL,
++                        0x99770513UL, 0x9b31bb4aUL, 0x9af3d17dUL, 0x8d893530UL, 0x8c4b5f07UL,
++                        0x8e0de15eUL, 0x8fcf8b69UL, 0x8a809decUL, 0x8b42f7dbUL, 0x89044982UL,
++                        0x88c623b5UL, 0x839a6488UL, 0x82580ebfUL, 0x801eb0e6UL, 0x81dcdad1UL,
++                        0x8493cc54UL, 0x8551a663UL, 0x8717183aUL, 0x86d5720dUL, 0xa9e2d0a0UL,
++                        0xa820ba97UL, 0xaa6604ceUL, 0xaba46ef9UL, 0xaeeb787cUL, 0xaf29124bUL,
++                        0xad6fac12UL, 0xacadc625UL, 0xa7f18118UL, 0xa633eb2fUL, 0xa4755576UL,
++                        0xa5b73f41UL, 0xa0f829c4UL, 0xa13a43f3UL, 0xa37cfdaaUL, 0xa2be979dUL,
++                        0xb5c473d0UL, 0xb40619e7UL, 0xb640a7beUL, 0xb782cd89UL, 0xb2cddb0cUL,
++                        0xb30fb13bUL, 0xb1490f62UL, 0xb08b6555UL, 0xbbd72268UL, 0xba15485fUL,
++                        0xb853f606UL, 0xb9919c31UL, 0xbcde8ab4UL, 0xbd1ce083UL, 0xbf5a5edaUL,
++                        0xbe9834edUL
++                },
++                {
++                        0x00000000UL, 0xb8bc6765UL, 0xaa09c88bUL, 0x12b5afeeUL, 0x8f629757UL,
++                        0x37def032UL, 0x256b5fdcUL, 0x9dd738b9UL, 0xc5b428efUL, 0x7d084f8aUL,
++                        0x6fbde064UL, 0xd7018701UL, 0x4ad6bfb8UL, 0xf26ad8ddUL, 0xe0df7733UL,
++                        0x58631056UL, 0x5019579fUL, 0xe8a530faUL, 0xfa109f14UL, 0x42acf871UL,
++                        0xdf7bc0c8UL, 0x67c7a7adUL, 0x75720843UL, 0xcdce6f26UL, 0x95ad7f70UL,
++                        0x2d111815UL, 0x3fa4b7fbUL, 0x8718d09eUL, 0x1acfe827UL, 0xa2738f42UL,
++                        0xb0c620acUL, 0x087a47c9UL, 0xa032af3eUL, 0x188ec85bUL, 0x0a3b67b5UL,
++                        0xb28700d0UL, 0x2f503869UL, 0x97ec5f0cUL, 0x8559f0e2UL, 0x3de59787UL,
++                        0x658687d1UL, 0xdd3ae0b4UL, 0xcf8f4f5aUL, 0x7733283fUL, 0xeae41086UL,
++                        0x525877e3UL, 0x40edd80dUL, 0xf851bf68UL, 0xf02bf8a1UL, 0x48979fc4UL,
++                        0x5a22302aUL, 0xe29e574fUL, 0x7f496ff6UL, 0xc7f50893UL, 0xd540a77dUL,
++                        0x6dfcc018UL, 0x359fd04eUL, 0x8d23b72bUL, 0x9f9618c5UL, 0x272a7fa0UL,
++                        0xbafd4719UL, 0x0241207cUL, 0x10f48f92UL, 0xa848e8f7UL, 0x9b14583dUL,
++                        0x23a83f58UL, 0x311d90b6UL, 0x89a1f7d3UL, 0x1476cf6aUL, 0xaccaa80fUL,
++                        0xbe7f07e1UL, 0x06c36084UL, 0x5ea070d2UL, 0xe61c17b7UL, 0xf4a9b859UL,
++                        0x4c15df3cUL, 0xd1c2e785UL, 0x697e80e0UL, 0x7bcb2f0eUL, 0xc377486bUL,
++                        0xcb0d0fa2UL, 0x73b168c7UL, 0x6104c729UL, 0xd9b8a04cUL, 0x446f98f5UL,
++                        0xfcd3ff90UL, 0xee66507eUL, 0x56da371bUL, 0x0eb9274dUL, 0xb6054028UL,
++                        0xa4b0efc6UL, 0x1c0c88a3UL, 0x81dbb01aUL, 0x3967d77fUL, 0x2bd27891UL,
++                        0x936e1ff4UL, 0x3b26f703UL, 0x839a9066UL, 0x912f3f88UL, 0x299358edUL,
++                        0xb4446054UL, 0x0cf80731UL, 0x1e4da8dfUL, 0xa6f1cfbaUL, 0xfe92dfecUL,
++                        0x462eb889UL, 0x549b1767UL, 0xec277002UL, 0x71f048bbUL, 0xc94c2fdeUL,
++                        0xdbf98030UL, 0x6345e755UL, 0x6b3fa09cUL, 0xd383c7f9UL, 0xc1366817UL,
++                        0x798a0f72UL, 0xe45d37cbUL, 0x5ce150aeUL, 0x4e54ff40UL, 0xf6e89825UL,
++                        0xae8b8873UL, 0x1637ef16UL, 0x048240f8UL, 0xbc3e279dUL, 0x21e91f24UL,
++                        0x99557841UL, 0x8be0d7afUL, 0x335cb0caUL, 0xed59b63bUL, 0x55e5d15eUL,
++                        0x47507eb0UL, 0xffec19d5UL, 0x623b216cUL, 0xda874609UL, 0xc832e9e7UL,
++                        0x708e8e82UL, 0x28ed9ed4UL, 0x9051f9b1UL, 0x82e4565fUL, 0x3a58313aUL,
++                        0xa78f0983UL, 0x1f336ee6UL, 0x0d86c108UL, 0xb53aa66dUL, 0xbd40e1a4UL,
++                        0x05fc86c1UL, 0x1749292fUL, 0xaff54e4aUL, 0x322276f3UL, 0x8a9e1196UL,
++                        0x982bbe78UL, 0x2097d91dUL, 0x78f4c94bUL, 0xc048ae2eUL, 0xd2fd01c0UL,
++                        0x6a4166a5UL, 0xf7965e1cUL, 0x4f2a3979UL, 0x5d9f9697UL, 0xe523f1f2UL,
++                        0x4d6b1905UL, 0xf5d77e60UL, 0xe762d18eUL, 0x5fdeb6ebUL, 0xc2098e52UL,
++                        0x7ab5e937UL, 0x680046d9UL, 0xd0bc21bcUL, 0x88df31eaUL, 0x3063568fUL,
++                        0x22d6f961UL, 0x9a6a9e04UL, 0x07bda6bdUL, 0xbf01c1d8UL, 0xadb46e36UL,
++                        0x15080953UL, 0x1d724e9aUL, 0xa5ce29ffUL, 0xb77b8611UL, 0x0fc7e174UL,
++                        0x9210d9cdUL, 0x2aacbea8UL, 0x38191146UL, 0x80a57623UL, 0xd8c66675UL,
++                        0x607a0110UL, 0x72cfaefeUL, 0xca73c99bUL, 0x57a4f122UL, 0xef189647UL,
++                        0xfdad39a9UL, 0x45115eccUL, 0x764dee06UL, 0xcef18963UL, 0xdc44268dUL,
++                        0x64f841e8UL, 0xf92f7951UL, 0x41931e34UL, 0x5326b1daUL, 0xeb9ad6bfUL,
++                        0xb3f9c6e9UL, 0x0b45a18cUL, 0x19f00e62UL, 0xa14c6907UL, 0x3c9b51beUL,
++                        0x842736dbUL, 0x96929935UL, 0x2e2efe50UL, 0x2654b999UL, 0x9ee8defcUL,
++                        0x8c5d7112UL, 0x34e11677UL, 0xa9362eceUL, 0x118a49abUL, 0x033fe645UL,
++                        0xbb838120UL, 0xe3e09176UL, 0x5b5cf613UL, 0x49e959fdUL, 0xf1553e98UL,
++                        0x6c820621UL, 0xd43e6144UL, 0xc68bceaaUL, 0x7e37a9cfUL, 0xd67f4138UL,
++                        0x6ec3265dUL, 0x7c7689b3UL, 0xc4caeed6UL, 0x591dd66fUL, 0xe1a1b10aUL,
++                        0xf3141ee4UL, 0x4ba87981UL, 0x13cb69d7UL, 0xab770eb2UL, 0xb9c2a15cUL,
++                        0x017ec639UL, 0x9ca9fe80UL, 0x241599e5UL, 0x36a0360bUL, 0x8e1c516eUL,
++                        0x866616a7UL, 0x3eda71c2UL, 0x2c6fde2cUL, 0x94d3b949UL, 0x090481f0UL,
++                        0xb1b8e695UL, 0xa30d497bUL, 0x1bb12e1eUL, 0x43d23e48UL, 0xfb6e592dUL,
++                        0xe9dbf6c3UL, 0x516791a6UL, 0xccb0a91fUL, 0x740cce7aUL, 0x66b96194UL,
++                        0xde0506f1UL
++                },
++                {
++                        0x00000000UL, 0x96300777UL, 0x2c610eeeUL, 0xba510999UL, 0x19c46d07UL,
++                        0x8ff46a70UL, 0x35a563e9UL, 0xa395649eUL, 0x3288db0eUL, 0xa4b8dc79UL,
++                        0x1ee9d5e0UL, 0x88d9d297UL, 0x2b4cb609UL, 0xbd7cb17eUL, 0x072db8e7UL,
++                        0x911dbf90UL, 0x6410b71dUL, 0xf220b06aUL, 0x4871b9f3UL, 0xde41be84UL,
++                        0x7dd4da1aUL, 0xebe4dd6dUL, 0x51b5d4f4UL, 0xc785d383UL, 0x56986c13UL,
++                        0xc0a86b64UL, 0x7af962fdUL, 0xecc9658aUL, 0x4f5c0114UL, 0xd96c0663UL,
++                        0x633d0ffaUL, 0xf50d088dUL, 0xc8206e3bUL, 0x5e10694cUL, 0xe44160d5UL,
++                        0x727167a2UL, 0xd1e4033cUL, 0x47d4044bUL, 0xfd850dd2UL, 0x6bb50aa5UL,
++                        0xfaa8b535UL, 0x6c98b242UL, 0xd6c9bbdbUL, 0x40f9bcacUL, 0xe36cd832UL,
++                        0x755cdf45UL, 0xcf0dd6dcUL, 0x593dd1abUL, 0xac30d926UL, 0x3a00de51UL,
++                        0x8051d7c8UL, 0x1661d0bfUL, 0xb5f4b421UL, 0x23c4b356UL, 0x9995bacfUL,
++                        0x0fa5bdb8UL, 0x9eb80228UL, 0x0888055fUL, 0xb2d90cc6UL, 0x24e90bb1UL,
++                        0x877c6f2fUL, 0x114c6858UL, 0xab1d61c1UL, 0x3d2d66b6UL, 0x9041dc76UL,
++                        0x0671db01UL, 0xbc20d298UL, 0x2a10d5efUL, 0x8985b171UL, 0x1fb5b606UL,
++                        0xa5e4bf9fUL, 0x33d4b8e8UL, 0xa2c90778UL, 0x34f9000fUL, 0x8ea80996UL,
++                        0x18980ee1UL, 0xbb0d6a7fUL, 0x2d3d6d08UL, 0x976c6491UL, 0x015c63e6UL,
++                        0xf4516b6bUL, 0x62616c1cUL, 0xd8306585UL, 0x4e0062f2UL, 0xed95066cUL,
++                        0x7ba5011bUL, 0xc1f40882UL, 0x57c40ff5UL, 0xc6d9b065UL, 0x50e9b712UL,
++                        0xeab8be8bUL, 0x7c88b9fcUL, 0xdf1ddd62UL, 0x492dda15UL, 0xf37cd38cUL,
++                        0x654cd4fbUL, 0x5861b24dUL, 0xce51b53aUL, 0x7400bca3UL, 0xe230bbd4UL,
++                        0x41a5df4aUL, 0xd795d83dUL, 0x6dc4d1a4UL, 0xfbf4d6d3UL, 0x6ae96943UL,
++                        0xfcd96e34UL, 0x468867adUL, 0xd0b860daUL, 0x732d0444UL, 0xe51d0333UL,
++                        0x5f4c0aaaUL, 0xc97c0dddUL, 0x3c710550UL, 0xaa410227UL, 0x10100bbeUL,
++                        0x86200cc9UL, 0x25b56857UL, 0xb3856f20UL, 0x09d466b9UL, 0x9fe461ceUL,
++                        0x0ef9de5eUL, 0x98c9d929UL, 0x2298d0b0UL, 0xb4a8d7c7UL, 0x173db359UL,
++                        0x810db42eUL, 0x3b5cbdb7UL, 0xad6cbac0UL, 0x2083b8edUL, 0xb6b3bf9aUL,
++                        0x0ce2b603UL, 0x9ad2b174UL, 0x3947d5eaUL, 0xaf77d29dUL, 0x1526db04UL,
++                        0x8316dc73UL, 0x120b63e3UL, 0x843b6494UL, 0x3e6a6d0dUL, 0xa85a6a7aUL,
++                        0x0bcf0ee4UL, 0x9dff0993UL, 0x27ae000aUL, 0xb19e077dUL, 0x44930ff0UL,
++                        0xd2a30887UL, 0x68f2011eUL, 0xfec20669UL, 0x5d5762f7UL, 0xcb676580UL,
++                        0x71366c19UL, 0xe7066b6eUL, 0x761bd4feUL, 0xe02bd389UL, 0x5a7ada10UL,
++                        0xcc4add67UL, 0x6fdfb9f9UL, 0xf9efbe8eUL, 0x43beb717UL, 0xd58eb060UL,
++                        0xe8a3d6d6UL, 0x7e93d1a1UL, 0xc4c2d838UL, 0x52f2df4fUL, 0xf167bbd1UL,
++                        0x6757bca6UL, 0xdd06b53fUL, 0x4b36b248UL, 0xda2b0dd8UL, 0x4c1b0aafUL,
++                        0xf64a0336UL, 0x607a0441UL, 0xc3ef60dfUL, 0x55df67a8UL, 0xef8e6e31UL,
++                        0x79be6946UL, 0x8cb361cbUL, 0x1a8366bcUL, 0xa0d26f25UL, 0x36e26852UL,
++                        0x95770cccUL, 0x03470bbbUL, 0xb9160222UL, 0x2f260555UL, 0xbe3bbac5UL,
++                        0x280bbdb2UL, 0x925ab42bUL, 0x046ab35cUL, 0xa7ffd7c2UL, 0x31cfd0b5UL,
++                        0x8b9ed92cUL, 0x1daede5bUL, 0xb0c2649bUL, 0x26f263ecUL, 0x9ca36a75UL,
++                        0x0a936d02UL, 0xa906099cUL, 0x3f360eebUL, 0x85670772UL, 0x13570005UL,
++                        0x824abf95UL, 0x147ab8e2UL, 0xae2bb17bUL, 0x381bb60cUL, 0x9b8ed292UL,
++                        0x0dbed5e5UL, 0xb7efdc7cUL, 0x21dfdb0bUL, 0xd4d2d386UL, 0x42e2d4f1UL,
++                        0xf8b3dd68UL, 0x6e83da1fUL, 0xcd16be81UL, 0x5b26b9f6UL, 0xe177b06fUL,
++                        0x7747b718UL, 0xe65a0888UL, 0x706a0fffUL, 0xca3b0666UL, 0x5c0b0111UL,
++                        0xff9e658fUL, 0x69ae62f8UL, 0xd3ff6b61UL, 0x45cf6c16UL, 0x78e20aa0UL,
++                        0xeed20dd7UL, 0x5483044eUL, 0xc2b30339UL, 0x612667a7UL, 0xf71660d0UL,
++                        0x4d476949UL, 0xdb776e3eUL, 0x4a6ad1aeUL, 0xdc5ad6d9UL, 0x660bdf40UL,
++                        0xf03bd837UL, 0x53aebca9UL, 0xc59ebbdeUL, 0x7fcfb247UL, 0xe9ffb530UL,
++                        0x1cf2bdbdUL, 0x8ac2bacaUL, 0x3093b353UL, 0xa6a3b424UL, 0x0536d0baUL,
++                        0x9306d7cdUL, 0x2957de54UL, 0xbf67d923UL, 0x2e7a66b3UL, 0xb84a61c4UL,
++                        0x021b685dUL, 0x942b6f2aUL, 0x37be0bb4UL, 0xa18e0cc3UL, 0x1bdf055aUL,
++                        0x8def022dUL
++                },
++                {
++                        0x00000000UL, 0x41311b19UL, 0x82623632UL, 0xc3532d2bUL, 0x04c56c64UL,
++                        0x45f4777dUL, 0x86a75a56UL, 0xc796414fUL, 0x088ad9c8UL, 0x49bbc2d1UL,
++                        0x8ae8effaUL, 0xcbd9f4e3UL, 0x0c4fb5acUL, 0x4d7eaeb5UL, 0x8e2d839eUL,
++                        0xcf1c9887UL, 0x5112c24aUL, 0x1023d953UL, 0xd370f478UL, 0x9241ef61UL,
++                        0x55d7ae2eUL, 0x14e6b537UL, 0xd7b5981cUL, 0x96848305UL, 0x59981b82UL,
++                        0x18a9009bUL, 0xdbfa2db0UL, 0x9acb36a9UL, 0x5d5d77e6UL, 0x1c6c6cffUL,
++                        0xdf3f41d4UL, 0x9e0e5acdUL, 0xa2248495UL, 0xe3159f8cUL, 0x2046b2a7UL,
++                        0x6177a9beUL, 0xa6e1e8f1UL, 0xe7d0f3e8UL, 0x2483dec3UL, 0x65b2c5daUL,
++                        0xaaae5d5dUL, 0xeb9f4644UL, 0x28cc6b6fUL, 0x69fd7076UL, 0xae6b3139UL,
++                        0xef5a2a20UL, 0x2c09070bUL, 0x6d381c12UL, 0xf33646dfUL, 0xb2075dc6UL,
++                        0x715470edUL, 0x30656bf4UL, 0xf7f32abbUL, 0xb6c231a2UL, 0x75911c89UL,
++                        0x34a00790UL, 0xfbbc9f17UL, 0xba8d840eUL, 0x79dea925UL, 0x38efb23cUL,
++                        0xff79f373UL, 0xbe48e86aUL, 0x7d1bc541UL, 0x3c2ade58UL, 0x054f79f0UL,
++                        0x447e62e9UL, 0x872d4fc2UL, 0xc61c54dbUL, 0x018a1594UL, 0x40bb0e8dUL,
++                        0x83e823a6UL, 0xc2d938bfUL, 0x0dc5a038UL, 0x4cf4bb21UL, 0x8fa7960aUL,
++                        0xce968d13UL, 0x0900cc5cUL, 0x4831d745UL, 0x8b62fa6eUL, 0xca53e177UL,
++                        0x545dbbbaUL, 0x156ca0a3UL, 0xd63f8d88UL, 0x970e9691UL, 0x5098d7deUL,
++                        0x11a9ccc7UL, 0xd2fae1ecUL, 0x93cbfaf5UL, 0x5cd76272UL, 0x1de6796bUL,
++                        0xdeb55440UL, 0x9f844f59UL, 0x58120e16UL, 0x1923150fUL, 0xda703824UL,
++                        0x9b41233dUL, 0xa76bfd65UL, 0xe65ae67cUL, 0x2509cb57UL, 0x6438d04eUL,
++                        0xa3ae9101UL, 0xe29f8a18UL, 0x21cca733UL, 0x60fdbc2aUL, 0xafe124adUL,
++                        0xeed03fb4UL, 0x2d83129fUL, 0x6cb20986UL, 0xab2448c9UL, 0xea1553d0UL,
++                        0x29467efbUL, 0x687765e2UL, 0xf6793f2fUL, 0xb7482436UL, 0x741b091dUL,
++                        0x352a1204UL, 0xf2bc534bUL, 0xb38d4852UL, 0x70de6579UL, 0x31ef7e60UL,
++                        0xfef3e6e7UL, 0xbfc2fdfeUL, 0x7c91d0d5UL, 0x3da0cbccUL, 0xfa368a83UL,
++                        0xbb07919aUL, 0x7854bcb1UL, 0x3965a7a8UL, 0x4b98833bUL, 0x0aa99822UL,
++                        0xc9fab509UL, 0x88cbae10UL, 0x4f5def5fUL, 0x0e6cf446UL, 0xcd3fd96dUL,
++                        0x8c0ec274UL, 0x43125af3UL, 0x022341eaUL, 0xc1706cc1UL, 0x804177d8UL,
++                        0x47d73697UL, 0x06e62d8eUL, 0xc5b500a5UL, 0x84841bbcUL, 0x1a8a4171UL,
++                        0x5bbb5a68UL, 0x98e87743UL, 0xd9d96c5aUL, 0x1e4f2d15UL, 0x5f7e360cUL,
++                        0x9c2d1b27UL, 0xdd1c003eUL, 0x120098b9UL, 0x533183a0UL, 0x9062ae8bUL,
++                        0xd153b592UL, 0x16c5f4ddUL, 0x57f4efc4UL, 0x94a7c2efUL, 0xd596d9f6UL,
++                        0xe9bc07aeUL, 0xa88d1cb7UL, 0x6bde319cUL, 0x2aef2a85UL, 0xed796bcaUL,
++                        0xac4870d3UL, 0x6f1b5df8UL, 0x2e2a46e1UL, 0xe136de66UL, 0xa007c57fUL,
++                        0x6354e854UL, 0x2265f34dUL, 0xe5f3b202UL, 0xa4c2a91bUL, 0x67918430UL,
++                        0x26a09f29UL, 0xb8aec5e4UL, 0xf99fdefdUL, 0x3accf3d6UL, 0x7bfde8cfUL,
++                        0xbc6ba980UL, 0xfd5ab299UL, 0x3e099fb2UL, 0x7f3884abUL, 0xb0241c2cUL,
++                        0xf1150735UL, 0x32462a1eUL, 0x73773107UL, 0xb4e17048UL, 0xf5d06b51UL,
++                        0x3683467aUL, 0x77b25d63UL, 0x4ed7facbUL, 0x0fe6e1d2UL, 0xccb5ccf9UL,
++                        0x8d84d7e0UL, 0x4a1296afUL, 0x0b238db6UL, 0xc870a09dUL, 0x8941bb84UL,
++                        0x465d2303UL, 0x076c381aUL, 0xc43f1531UL, 0x850e0e28UL, 0x42984f67UL,
++                        0x03a9547eUL, 0xc0fa7955UL, 0x81cb624cUL, 0x1fc53881UL, 0x5ef42398UL,
++                        0x9da70eb3UL, 0xdc9615aaUL, 0x1b0054e5UL, 0x5a314ffcUL, 0x996262d7UL,
++                        0xd85379ceUL, 0x174fe149UL, 0x567efa50UL, 0x952dd77bUL, 0xd41ccc62UL,
++                        0x138a8d2dUL, 0x52bb9634UL, 0x91e8bb1fUL, 0xd0d9a006UL, 0xecf37e5eUL,
++                        0xadc26547UL, 0x6e91486cUL, 0x2fa05375UL, 0xe836123aUL, 0xa9070923UL,
++                        0x6a542408UL, 0x2b653f11UL, 0xe479a796UL, 0xa548bc8fUL, 0x661b91a4UL,
++                        0x272a8abdUL, 0xe0bccbf2UL, 0xa18dd0ebUL, 0x62defdc0UL, 0x23efe6d9UL,
++                        0xbde1bc14UL, 0xfcd0a70dUL, 0x3f838a26UL, 0x7eb2913fUL, 0xb924d070UL,
++                        0xf815cb69UL, 0x3b46e642UL, 0x7a77fd5bUL, 0xb56b65dcUL, 0xf45a7ec5UL,
++                        0x370953eeUL, 0x763848f7UL, 0xb1ae09b8UL, 0xf09f12a1UL, 0x33cc3f8aUL,
++                        0x72fd2493UL
++                },
++                {
++                        0x00000000UL, 0x376ac201UL, 0x6ed48403UL, 0x59be4602UL, 0xdca80907UL,
++                        0xebc2cb06UL, 0xb27c8d04UL, 0x85164f05UL, 0xb851130eUL, 0x8f3bd10fUL,
++                        0xd685970dUL, 0xe1ef550cUL, 0x64f91a09UL, 0x5393d808UL, 0x0a2d9e0aUL,
++                        0x3d475c0bUL, 0x70a3261cUL, 0x47c9e41dUL, 0x1e77a21fUL, 0x291d601eUL,
++                        0xac0b2f1bUL, 0x9b61ed1aUL, 0xc2dfab18UL, 0xf5b56919UL, 0xc8f23512UL,
++                        0xff98f713UL, 0xa626b111UL, 0x914c7310UL, 0x145a3c15UL, 0x2330fe14UL,
++                        0x7a8eb816UL, 0x4de47a17UL, 0xe0464d38UL, 0xd72c8f39UL, 0x8e92c93bUL,
++                        0xb9f80b3aUL, 0x3cee443fUL, 0x0b84863eUL, 0x523ac03cUL, 0x6550023dUL,
++                        0x58175e36UL, 0x6f7d9c37UL, 0x36c3da35UL, 0x01a91834UL, 0x84bf5731UL,
++                        0xb3d59530UL, 0xea6bd332UL, 0xdd011133UL, 0x90e56b24UL, 0xa78fa925UL,
++                        0xfe31ef27UL, 0xc95b2d26UL, 0x4c4d6223UL, 0x7b27a022UL, 0x2299e620UL,
++                        0x15f32421UL, 0x28b4782aUL, 0x1fdeba2bUL, 0x4660fc29UL, 0x710a3e28UL,
++                        0xf41c712dUL, 0xc376b32cUL, 0x9ac8f52eUL, 0xada2372fUL, 0xc08d9a70UL,
++                        0xf7e75871UL, 0xae591e73UL, 0x9933dc72UL, 0x1c259377UL, 0x2b4f5176UL,
++                        0x72f11774UL, 0x459bd575UL, 0x78dc897eUL, 0x4fb64b7fUL, 0x16080d7dUL,
++                        0x2162cf7cUL, 0xa4748079UL, 0x931e4278UL, 0xcaa0047aUL, 0xfdcac67bUL,
++                        0xb02ebc6cUL, 0x87447e6dUL, 0xdefa386fUL, 0xe990fa6eUL, 0x6c86b56bUL,
++                        0x5bec776aUL, 0x02523168UL, 0x3538f369UL, 0x087faf62UL, 0x3f156d63UL,
++                        0x66ab2b61UL, 0x51c1e960UL, 0xd4d7a665UL, 0xe3bd6464UL, 0xba032266UL,
++                        0x8d69e067UL, 0x20cbd748UL, 0x17a11549UL, 0x4e1f534bUL, 0x7975914aUL,
++                        0xfc63de4fUL, 0xcb091c4eUL, 0x92b75a4cUL, 0xa5dd984dUL, 0x989ac446UL,
++                        0xaff00647UL, 0xf64e4045UL, 0xc1248244UL, 0x4432cd41UL, 0x73580f40UL,
++                        0x2ae64942UL, 0x1d8c8b43UL, 0x5068f154UL, 0x67023355UL, 0x3ebc7557UL,
++                        0x09d6b756UL, 0x8cc0f853UL, 0xbbaa3a52UL, 0xe2147c50UL, 0xd57ebe51UL,
++                        0xe839e25aUL, 0xdf53205bUL, 0x86ed6659UL, 0xb187a458UL, 0x3491eb5dUL,
++                        0x03fb295cUL, 0x5a456f5eUL, 0x6d2fad5fUL, 0x801b35e1UL, 0xb771f7e0UL,
++                        0xeecfb1e2UL, 0xd9a573e3UL, 0x5cb33ce6UL, 0x6bd9fee7UL, 0x3267b8e5UL,
++                        0x050d7ae4UL, 0x384a26efUL, 0x0f20e4eeUL, 0x569ea2ecUL, 0x61f460edUL,
++                        0xe4e22fe8UL, 0xd388ede9UL, 0x8a36abebUL, 0xbd5c69eaUL, 0xf0b813fdUL,
++                        0xc7d2d1fcUL, 0x9e6c97feUL, 0xa90655ffUL, 0x2c101afaUL, 0x1b7ad8fbUL,
++                        0x42c49ef9UL, 0x75ae5cf8UL, 0x48e900f3UL, 0x7f83c2f2UL, 0x263d84f0UL,
++                        0x115746f1UL, 0x944109f4UL, 0xa32bcbf5UL, 0xfa958df7UL, 0xcdff4ff6UL,
++                        0x605d78d9UL, 0x5737bad8UL, 0x0e89fcdaUL, 0x39e33edbUL, 0xbcf571deUL,
++                        0x8b9fb3dfUL, 0xd221f5ddUL, 0xe54b37dcUL, 0xd80c6bd7UL, 0xef66a9d6UL,
++                        0xb6d8efd4UL, 0x81b22dd5UL, 0x04a462d0UL, 0x33cea0d1UL, 0x6a70e6d3UL,
++                        0x5d1a24d2UL, 0x10fe5ec5UL, 0x27949cc4UL, 0x7e2adac6UL, 0x494018c7UL,
++                        0xcc5657c2UL, 0xfb3c95c3UL, 0xa282d3c1UL, 0x95e811c0UL, 0xa8af4dcbUL,
++                        0x9fc58fcaUL, 0xc67bc9c8UL, 0xf1110bc9UL, 0x740744ccUL, 0x436d86cdUL,
++                        0x1ad3c0cfUL, 0x2db902ceUL, 0x4096af91UL, 0x77fc6d90UL, 0x2e422b92UL,
++                        0x1928e993UL, 0x9c3ea696UL, 0xab546497UL, 0xf2ea2295UL, 0xc580e094UL,
++                        0xf8c7bc9fUL, 0xcfad7e9eUL, 0x9613389cUL, 0xa179fa9dUL, 0x246fb598UL,
++                        0x13057799UL, 0x4abb319bUL, 0x7dd1f39aUL, 0x3035898dUL, 0x075f4b8cUL,
++                        0x5ee10d8eUL, 0x698bcf8fUL, 0xec9d808aUL, 0xdbf7428bUL, 0x82490489UL,
++                        0xb523c688UL, 0x88649a83UL, 0xbf0e5882UL, 0xe6b01e80UL, 0xd1dadc81UL,
++                        0x54cc9384UL, 0x63a65185UL, 0x3a181787UL, 0x0d72d586UL, 0xa0d0e2a9UL,
++                        0x97ba20a8UL, 0xce0466aaUL, 0xf96ea4abUL, 0x7c78ebaeUL, 0x4b1229afUL,
++                        0x12ac6fadUL, 0x25c6adacUL, 0x1881f1a7UL, 0x2feb33a6UL, 0x765575a4UL,
++                        0x413fb7a5UL, 0xc429f8a0UL, 0xf3433aa1UL, 0xaafd7ca3UL, 0x9d97bea2UL,
++                        0xd073c4b5UL, 0xe71906b4UL, 0xbea740b6UL, 0x89cd82b7UL, 0x0cdbcdb2UL,
++                        0x3bb10fb3UL, 0x620f49b1UL, 0x55658bb0UL, 0x6822d7bbUL, 0x5f4815baUL,
++                        0x06f653b8UL, 0x319c91b9UL, 0xb48adebcUL, 0x83e01cbdUL, 0xda5e5abfUL,
++                        0xed3498beUL
++                },
++                {
++                        0x00000000UL, 0x6567bcb8UL, 0x8bc809aaUL, 0xeeafb512UL, 0x5797628fUL,
++                        0x32f0de37UL, 0xdc5f6b25UL, 0xb938d79dUL, 0xef28b4c5UL, 0x8a4f087dUL,
++                        0x64e0bd6fUL, 0x018701d7UL, 0xb8bfd64aUL, 0xddd86af2UL, 0x3377dfe0UL,
++                        0x56106358UL, 0x9f571950UL, 0xfa30a5e8UL, 0x149f10faUL, 0x71f8ac42UL,
++                        0xc8c07bdfUL, 0xada7c767UL, 0x43087275UL, 0x266fcecdUL, 0x707fad95UL,
++                        0x1518112dUL, 0xfbb7a43fUL, 0x9ed01887UL, 0x27e8cf1aUL, 0x428f73a2UL,
++                        0xac20c6b0UL, 0xc9477a08UL, 0x3eaf32a0UL, 0x5bc88e18UL, 0xb5673b0aUL,
++                        0xd00087b2UL, 0x6938502fUL, 0x0c5fec97UL, 0xe2f05985UL, 0x8797e53dUL,
++                        0xd1878665UL, 0xb4e03addUL, 0x5a4f8fcfUL, 0x3f283377UL, 0x8610e4eaUL,
++                        0xe3775852UL, 0x0dd8ed40UL, 0x68bf51f8UL, 0xa1f82bf0UL, 0xc49f9748UL,
++                        0x2a30225aUL, 0x4f579ee2UL, 0xf66f497fUL, 0x9308f5c7UL, 0x7da740d5UL,
++                        0x18c0fc6dUL, 0x4ed09f35UL, 0x2bb7238dUL, 0xc518969fUL, 0xa07f2a27UL,
++                        0x1947fdbaUL, 0x7c204102UL, 0x928ff410UL, 0xf7e848a8UL, 0x3d58149bUL,
++                        0x583fa823UL, 0xb6901d31UL, 0xd3f7a189UL, 0x6acf7614UL, 0x0fa8caacUL,
++                        0xe1077fbeUL, 0x8460c306UL, 0xd270a05eUL, 0xb7171ce6UL, 0x59b8a9f4UL,
++                        0x3cdf154cUL, 0x85e7c2d1UL, 0xe0807e69UL, 0x0e2fcb7bUL, 0x6b4877c3UL,
++                        0xa20f0dcbUL, 0xc768b173UL, 0x29c70461UL, 0x4ca0b8d9UL, 0xf5986f44UL,
++                        0x90ffd3fcUL, 0x7e5066eeUL, 0x1b37da56UL, 0x4d27b90eUL, 0x284005b6UL,
++                        0xc6efb0a4UL, 0xa3880c1cUL, 0x1ab0db81UL, 0x7fd76739UL, 0x9178d22bUL,
++                        0xf41f6e93UL, 0x03f7263bUL, 0x66909a83UL, 0x883f2f91UL, 0xed589329UL,
++                        0x546044b4UL, 0x3107f80cUL, 0xdfa84d1eUL, 0xbacff1a6UL, 0xecdf92feUL,
++                        0x89b82e46UL, 0x67179b54UL, 0x027027ecUL, 0xbb48f071UL, 0xde2f4cc9UL,
++                        0x3080f9dbUL, 0x55e74563UL, 0x9ca03f6bUL, 0xf9c783d3UL, 0x176836c1UL,
++                        0x720f8a79UL, 0xcb375de4UL, 0xae50e15cUL, 0x40ff544eUL, 0x2598e8f6UL,
++                        0x73888baeUL, 0x16ef3716UL, 0xf8408204UL, 0x9d273ebcUL, 0x241fe921UL,
++                        0x41785599UL, 0xafd7e08bUL, 0xcab05c33UL, 0x3bb659edUL, 0x5ed1e555UL,
++                        0xb07e5047UL, 0xd519ecffUL, 0x6c213b62UL, 0x094687daUL, 0xe7e932c8UL,
++                        0x828e8e70UL, 0xd49eed28UL, 0xb1f95190UL, 0x5f56e482UL, 0x3a31583aUL,
++                        0x83098fa7UL, 0xe66e331fUL, 0x08c1860dUL, 0x6da63ab5UL, 0xa4e140bdUL,
++                        0xc186fc05UL, 0x2f294917UL, 0x4a4ef5afUL, 0xf3762232UL, 0x96119e8aUL,
++                        0x78be2b98UL, 0x1dd99720UL, 0x4bc9f478UL, 0x2eae48c0UL, 0xc001fdd2UL,
++                        0xa566416aUL, 0x1c5e96f7UL, 0x79392a4fUL, 0x97969f5dUL, 0xf2f123e5UL,
++                        0x05196b4dUL, 0x607ed7f5UL, 0x8ed162e7UL, 0xebb6de5fUL, 0x528e09c2UL,
++                        0x37e9b57aUL, 0xd9460068UL, 0xbc21bcd0UL, 0xea31df88UL, 0x8f566330UL,
++                        0x61f9d622UL, 0x049e6a9aUL, 0xbda6bd07UL, 0xd8c101bfUL, 0x366eb4adUL,
++                        0x53090815UL, 0x9a4e721dUL, 0xff29cea5UL, 0x11867bb7UL, 0x74e1c70fUL,
++                        0xcdd91092UL, 0xa8beac2aUL, 0x46111938UL, 0x2376a580UL, 0x7566c6d8UL,
++                        0x10017a60UL, 0xfeaecf72UL, 0x9bc973caUL, 0x22f1a457UL, 0x479618efUL,
++                        0xa939adfdUL, 0xcc5e1145UL, 0x06ee4d76UL, 0x6389f1ceUL, 0x8d2644dcUL,
++                        0xe841f864UL, 0x51792ff9UL, 0x341e9341UL, 0xdab12653UL, 0xbfd69aebUL,
++                        0xe9c6f9b3UL, 0x8ca1450bUL, 0x620ef019UL, 0x07694ca1UL, 0xbe519b3cUL,
++                        0xdb362784UL, 0x35999296UL, 0x50fe2e2eUL, 0x99b95426UL, 0xfcdee89eUL,
++                        0x12715d8cUL, 0x7716e134UL, 0xce2e36a9UL, 0xab498a11UL, 0x45e63f03UL,
++                        0x208183bbUL, 0x7691e0e3UL, 0x13f65c5bUL, 0xfd59e949UL, 0x983e55f1UL,
++                        0x2106826cUL, 0x44613ed4UL, 0xaace8bc6UL, 0xcfa9377eUL, 0x38417fd6UL,
++                        0x5d26c36eUL, 0xb389767cUL, 0xd6eecac4UL, 0x6fd61d59UL, 0x0ab1a1e1UL,
++                        0xe41e14f3UL, 0x8179a84bUL, 0xd769cb13UL, 0xb20e77abUL, 0x5ca1c2b9UL,
++                        0x39c67e01UL, 0x80fea99cUL, 0xe5991524UL, 0x0b36a036UL, 0x6e511c8eUL,
++                        0xa7166686UL, 0xc271da3eUL, 0x2cde6f2cUL, 0x49b9d394UL, 0xf0810409UL,
++                        0x95e6b8b1UL, 0x7b490da3UL, 0x1e2eb11bUL, 0x483ed243UL, 0x2d596efbUL,
++                        0xc3f6dbe9UL, 0xa6916751UL, 0x1fa9b0ccUL, 0x7ace0c74UL, 0x9461b966UL,
++                        0xf10605deUL
++                }
++        };
++/* ========================================================================= */
++#define DOLIT4 c ^= *buf4++; \
++        c = crc_table[3][c & 0xff] ^ crc_table[2][(c >> 8) & 0xff] ^ \
++            crc_table[1][(c >> 16) & 0xff] ^ crc_table[0][c >> 24]
++#define DOLIT32 DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4; DOLIT4
++
++unsigned int SharedRuntime::updateBytesCRC32(unsigned long crc, const unsigned char *buf,  unsigned int len) {
++    if (buf == 0) return 0UL;
++
++    register unsigned int  c;
++    register const unsigned int  *buf4;
++    c = (unsigned int)crc;
++    c = ~c;
++    while (len && ((ptrdiff_t)buf & 3)) {
++        c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
++        len--;
++    }
++
++    buf4 = (const unsigned int *) (const void  *)buf;
++    while (len >= 32) {
++        DOLIT32;
++        len -= 32;
++    }
++    while (len >= 4) {
++        DOLIT4;
++        len -= 4;
++    }
++    buf = (const unsigned char *)buf4;
++
++    if (len) do {
++            c = crc_table[0][(c ^ *buf++) & 0xff] ^ (c >> 8);
++        } while (--len);
++    c = ~c;
++    return (unsigned long)c;
++}
++
++//------------------------------Montgomery multiplication------------------------
++//
++
++#ifndef _WINDOWS
++
++#define ASM_SUBTRACT
++#undef ASM_SUBTRACT //by jzy
++
++#ifdef ASM_SUBTRACT
++// Subtract 0:b from carry:a.  Return carry.
++static unsigned long
++sub(unsigned long a[], unsigned long b[], unsigned long carry, long len) {
++  long i = 0, cnt = len;
++  unsigned long tmp;
++  asm volatile("clc; "
++               "0: ; "
++               "mov (%[b], %[i], 8), %[tmp]; "
++               "sbb %[tmp], (%[a], %[i], 8); "
++               "inc %[i]; dec %[cnt]; "
++               "jne 0b; "
++               "mov %[carry], %[tmp]; sbb $0, %[tmp]; "
++               : [i]"+r"(i), [cnt]"+r"(cnt), [tmp]"=&r"(tmp)
++               : [a]"r"(a), [b]"r"(b), [carry]"r"(carry)
++               : "memory");
++  return tmp;
++}
++#else // ASM_SUBTRACT
++typedef int __attribute__((mode(TI))) int128;
++
++// Subtract 0:b from carry:a.  Return carry.
++//static unsigned long
++//sub(unsigned long a[], unsigned long b[], unsigned long carry, int len) {
++//  int128 tmp = 0;
++//  int i;
++//  for (i = 0; i < len; i++) {
++//    tmp += a[i];
++//    tmp -= b[i];
++//    a[i] = tmp;
++//    tmp >>= 64;
++//    assert(-1 <= tmp && tmp <= 0, "invariant");
++//  }
++//  return tmp + carry;
++//}
++static julong
++sub(julong a[], julong b[], julong carry, long len) {
++    long borrow = 0, t = 0;
++    julong tmp0, tmp1;
++    __asm__ __volatile__ (
++    "0:                                            \n"
++    "ldl     %[tmp0],     0(%[a])                  \n"
++    "ldl     %[tmp1],     0(%[b])                  \n"
++    "cmpult  %[tmp0],     %[borrow],   %[t]        \n"
++    "subl    %[tmp0],     %[borrow],   %[tmp0]     \n"
++    "cmpult  %[tmp0],     %[tmp1],     %[borrow]   \n"
++    "bis     %[borrow],   %[t],        %[borrow]   \n"
++    "subl    %[tmp0],     %[tmp1],     %[tmp0]     \n"
++    "stl     %[tmp0],     0(%[a])                  \n"
++    "addl    %[a],        8,           %[a]        \n"
++    "addl    %[b],        8,           %[b]        \n"
++    "subl    %[len],      1,           %[len]      \n"
++    "bgt     %[len],      0b                       \n"
++    "subl    %[carry],    %[borrow],   %[tmp0]     \n"
++    : [len]"+r"(len), [tmp0]"=&r"(tmp0), [tmp1]"=&r"(tmp1), [borrow]"+r"(borrow), [a]"+r"(a), [b]"+r"(b), [t]"+r"(t)
++    : [carry]"r"(carry)
++    : "memory"
++    );
++    return tmp0;
++}
++#endif // ! ASM_SUBTRACT
++
++// Multiply (unsigned) Long A by Long B, accumulating the double-
++// length result into the accumulator formed of T0, T1, and T2.
++//#define MACC(A, B, T0, T1, T2)                                  \
++//do {                                                            \
++//  ShouldNotReachHere();                                         \
++//} while(0)
++inline void MACC(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "mull    %[A],        %[B] ,       %[lo]       \n"
++    "umulh   %[A],        %[B] ,       %[hi]       \n"
++    "addl    %[lo],       %[t0],       %[t0]       \n"
++    "cmpult  %[t0],       %[lo],       %[carry]    \n"
++    "addl    %[t1],       %[carry],    %[t1]       \n"
++    "cmpult  %[t1],       %[carry],    %[t]        \n"
++    "addl    %[t1],       %[hi],       %[t1]       \n"
++    "cmpult  %[t1],       %[hi],       %[carry]    \n"
++    "bis     %[carry],    %[t] ,       %[carry]    \n"
++    "addl    %[t2],       %[carry],    %[t2]       \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// As above, but add twice the double-length result into the
++// accumulator.
++//#define MACC2(A, B, T0, T1, T2)                                 \
++//do {                                                            \
++//ShouldNotReachHere();                                           \
++// } while(0)
++inline void MACC2(unsigned long A, unsigned long B, unsigned long &t0, unsigned long &t1, unsigned long &t2) {
++  unsigned long hi, lo, carry = 0, t = 0;
++  __asm__ __volatile__(
++    "mull   %[A],        %[B] ,      %[lo]               \n"
++    "umulh  %[A],        %[B] ,      %[hi]               \n"                 
++    "addl   %[t0],       %[lo],      %[t0]              \n"
++    "cmpult %[t0],       %[lo],      %[carry]            \n"
++    "addl   %[t1],       %[carry],   %[t1]               \n"
++    "cmpult %[t1],       %[carry],   %[t]                \n"
++    "addl   %[t1],       %[hi],      %[t1]               \n"
++    "cmpult %[t1],       %[hi],      %[carry]            \n"
++    "bis    %[carry],    %[t],       %[carry]            \n"
++    "addl   %[t2],       %[carry],   %[t2]               \n"
++    "addl   %[t0],       %[lo],      %[t0]               \n"
++    "cmpult  %[t0],      %[lo],      %[carry]            \n"
++    "addl    %[t1],      %[carry],   %[t1]               \n"
++    "cmpult  %[t1],      %[carry],   %[t]                \n"
++    "addl    %[t1],      %[hi],      %[t1]               \n"
++    "cmpult  %[t1],      %[hi],      %[carry]            \n"
++    "bis     %[carry],   %[t],       %[carry]            \n"
++    "addl    %[t2],      %[carry],   %[t2]               \n"
++    : [hi]"=&r"(hi), [lo]"=&r"(lo), [t0]"+r"(t0), [t1]"+r"(t1), [t2]"+r"(t2), [carry]"+r"(carry), [t]"+r"(t)
++    : [A]"r"(A), [B]"r"(B)
++    :
++  );
++}
++
++// Fast Montgomery multiplication.  The derivation of the algorithm is
++// in  A Cryptographic Library for the Motorola DSP56000,
++// Dusse and Kaliski, Proc. EUROCRYPT 90, pp. 230-237.
++
++static void NOINLINE
++montgomery_multiply(julong a[], julong b[], julong n[],
++                    julong m[], julong inv, int len) {
++  julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery multiply");
++  //ShouldNotReachHere();//by jzy
++  for (i = 0; i < len; i++) {
++    int j;
++    for (j = 0; j < i; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    MACC(a[i], b[0], t0, t1, t2);
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery multiply");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int j;
++    for (j = i-len+1; j < len; j++) {
++      MACC(a[j], b[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Fast Montgomery squaring.  This uses asymptotically 25% fewer
++// multiplies so it should be up to 25% faster than Montgomery
++// multiplication.  However, its loop control is more complex and it
++// may actually run slower on some machines.
++
++static void NOINLINE
++montgomery_square(julong a[], julong n[],
++                  julong m[], julong inv, int len) {
++  julong t0 = 0, t1 = 0, t2 = 0; // Triple-precision accumulator
++  int i;
++
++  assert(inv * n[0] == ULLONG_MAX, "broken inverse in Montgomery square");
++  //ShouldNotReachHere();//by jzy
++  for (i = 0; i < len; i++) {
++    int j;
++    int end = (i+1)/2;
++    for (j = 0; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < i; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i] = t0 * inv;
++    MACC(m[i], n[0], t0, t1, t2);
++
++    assert(t0 == 0, "broken Montgomery square");
++
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  for (i = len; i < 2*len; i++) {
++    int start = i-len+1;
++    int end = start + (len - start)/2;
++    int j;
++    for (j = start; j < end; j++) {
++      MACC2(a[j], a[i-j], t0, t1, t2);
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    if ((i & 1) == 0) {
++      MACC(a[j], a[j], t0, t1, t2);
++    }
++    for (; j < len; j++) {
++      MACC(m[j], n[i-j], t0, t1, t2);
++    }
++    m[i-len] = t0;
++    t0 = t1; t1 = t2; t2 = 0;
++  }
++
++  while (t0)
++    t0 = sub(m, n, t0, len);
++}
++
++// Swap words in a longword.
++static julong swap(julong x) {
++  return (x << 32) | (x >> 32);
++}
++
++// Copy len longwords from s to d, word-swapping as we go.  The
++// destination array is reversed.
++static void reverse_words(julong *s, julong *d, int len) {
++  d += len;
++  while(len-- > 0) {
++    d--;
++    *d = swap(*s);
++    s++;
++  }
++}
++
++// The threshold at which squaring is advantageous was determined
++// experimentally on an i7-3930K (Ivy Bridge) CPU @ 3.5GHz.
++#define MONTGOMERY_SQUARING_THRESHOLD 64
++
++void SharedRuntime::montgomery_multiply(jint *a_ints, jint *b_ints, jint *n_ints,
++                                        jint len, jlong inv,
++                                        jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_multiply must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 8k bytes of stack space.
++  int total_allocation = longwords * sizeof (julong) * 4;
++  guarantee(total_allocation <= 8192, "must be");
++  unsigned long *scratch = (julong *)alloca(total_allocation);
++
++  // Local scratch arrays
++    julong
++    *a = scratch + 0 * longwords,
++    *b = scratch + 1 * longwords,
++    *n = scratch + 2 * longwords,
++    *m = scratch + 3 * longwords;
++
++  reverse_words((julong *)a_ints, a, longwords);
++  reverse_words((julong *)b_ints, b, longwords);
++  reverse_words((julong *)n_ints, n, longwords);
++
++  ::montgomery_multiply(a, b, n, m, (julong)inv, longwords);
++
++  reverse_words(m, (julong *)m_ints, longwords);
++}
++
++void SharedRuntime::montgomery_square(jint *a_ints, jint *n_ints,
++                                      jint len, jlong inv,
++                                      jint *m_ints) {
++  assert(len % 2 == 0, "array length in montgomery_square must be even");
++  int longwords = len/2;
++
++  // Make very sure we don't use so much space that the stack might
++  // overflow.  512 jints corresponds to an 16384-bit integer and
++  // will use here a total of 6k bytes of stack space.
++  int total_allocation = longwords * sizeof (julong) * 3;
++  guarantee(total_allocation <= 8192, "must be");
++  julong *scratch = (julong *)alloca(total_allocation);
++
++  // Local scratch arrays
++  unsigned long
++    *a = scratch + 0 * longwords,
++    *n = scratch + 1 * longwords,
++    *m = scratch + 2 * longwords;
++
++  reverse_words((julong *)a_ints, a, longwords);
++  reverse_words((julong *)n_ints, n, longwords);
++
++  if (len >= MONTGOMERY_SQUARING_THRESHOLD) {
++    ::montgomery_square(a, n, m, (julong)inv, longwords);
++  } else {
++    ::montgomery_multiply(a, a, n, m, (julong)inv, longwords);
++  }
++
++  reverse_words(m, (julong *)m_ints, longwords);
++}
++
++#endif // WINDOWS
++
++#ifdef COMPILER2
++// This is here instead of runtime_x86_64.cpp because it uses SimpleRuntimeFrame
++//
++//------------------------------generate_exception_blob---------------------------
++// creates exception blob at the end
++// Using exception blob, this code is jumped from a compiled method.
++// (see emit_exception_handler in x86_64.ad file)
++//
++// Given an exception pc at a call we call into the runtime for the
++// handler in this method. This handler might merely restore state
++// (i.e. callee save registers) unwind the frame and jump to the
++// exception handler for the nmethod if there is no Java level handler
++// for the nmethod.
++//
++// This code is entered with a jmp.
++//
++// Arguments:
++//   rax: exception oop
++//   rdx: exception pc
++//
++// Results:
++//   rax: exception oop
++//   rdx: exception pc in caller or ???
++//   destination: exception handler of caller
++//
++// Note: the exception pc MUST be at a call (precise debug information)
++//       Registers rax, rdx, rcx, rsi, rdi, r8-r11 are not callee saved.
++//
++
++void OptoRuntime::generate_exception_blob() {
++  assert(!OptoRuntime::is_callee_saved_register(A2_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(A3_num), "");
++  assert(!OptoRuntime::is_callee_saved_register(V0_num), "");
++
++  assert(SimpleRuntimeFrame::framesize % 4 == 0, "sp not 16-byte aligned");
++
++  // Allocate space for the code
++  ResourceMark rm;
++  // Setup code generation tools
++  CodeBuffer buffer("exception_blob", 2048, 1024);
++  MacroAssembler* masm = new MacroAssembler(&buffer);
++
++
++  address start = __ pc();
++
++  //__ stop("generate_exception_blob");
++
++  // Exception pc is 'return address' for stack walker
++  __ push(rdx);
++  __ subptr(esp, SimpleRuntimeFrame::return_off << LogBytesPerInt, esp); // Prolog
++
++  // Save callee-saved registers.  See x86_64.ad.
++
++  // rbp is an implicitly saved callee saved register (i.e., the calling
++  // convention will save/restore it in the prolog/epilog). Other than that
++  // there are no callee save registers now that adapter frames are gone.
++
++  __ stptr(rfp, Address(esp, SimpleRuntimeFrame::rfp_off << LogBytesPerInt));
++
++  // Store exception in Thread object. We cannot pass any arguments to the
++  // handle_exception call, since we do not want to make any assumption
++  // about the size of the frame where the exception happened in.
++  // c_rarg0 is either rdi (Linux) or rcx (Windows).
++  __ stptr(rax, Address(rthread, JavaThread::exception_oop_offset()));
++  __ stptr(rdx, Address(rthread, JavaThread::exception_pc_offset()));
++
++  // This call does all the hard work.  It checks if an exception handler
++  // exists in the method.
++  // If so, it returns the handler address.
++  // If not, it prepares for stack-unwinding, restoring the callee-save
++  // registers of the frame being removed.
++  //
++  // address OptoRuntime::handle_exception_C(JavaThread* thread)
++
++  // At a method handle call, the stack may not be properly aligned
++  // when returning with an exception.
++  address the_pc = __ pc();
++  __ set_last_Java_frame(esp, noreg, the_pc, rscratch3);
++  __ movl(c_rarg0, rthread);
++  //__ andptr(esp, -(StackAlignmentInBytes), esp);    // Align stack
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, OptoRuntime::handle_exception_C)));
++
++  // Set an oopmap for the call site.  This oopmap will only be used if we
++  // are unwinding the stack.  Hence, all locations will be dead.
++  // Callee-saved registers will be the same as the frame above (i.e.,
++  // handle_exception_stub), since they were restored when we got the
++  // exception.
++
++  OopMapSet* oop_maps = new OopMapSet();
++
++  oop_maps->add_gc_map(the_pc - start, new OopMap(SimpleRuntimeFrame::framesize, 0));//TODO:here need to modify offset? jzy
++
++  __ reset_last_Java_frame(false);
++
++  // Restore callee-saved registers
++
++  // rbp is an implicitly saved callee-saved register (i.e., the calling
++  // convention will save restore it in prolog/epilog) Other than that
++  // there are no callee save registers now that adapter frames are gone.
++
++  __ ldptr(rfp, Address(esp, SimpleRuntimeFrame::rfp_off << LogBytesPerInt));
++
++  __ addptr(esp, SimpleRuntimeFrame::return_off << LogBytesPerInt, esp); // Epilog
++  __ pop(rdx);                  // No need for exception pc anymore
++
++  // rax: exception handler
++
++  // We have a handler in rax (could be deopt blob).
++  __ movl(rscratch3, rax);
++
++  // Get the exception oop
++  __ ldptr(rax, Address(rthread, JavaThread::exception_oop_offset()));
++  // Get the exception pc in case we are deoptimized
++  __ ldptr(rdx, Address(rthread, JavaThread::exception_pc_offset()));
++#ifdef ASSERT
++  __ stptr(R0, Address(rthread, JavaThread::exception_handler_pc_offset()));
++  __ stptr(R0, Address(rthread, JavaThread::exception_pc_offset()));
++#endif
++  // Clear the exception oop so GC no longer processes it as a root.
++  __ stptr(R0, Address(rthread, JavaThread::exception_oop_offset()));
++
++  // rax: exception oop
++  // rscratch3:  exception handler
++  // rdx: exception pc
++  // Jump to handler
++
++  __ jmp(rscratch3);
++
++  // Make sure all code is generated
++  masm->flush();
++
++  // Set exception blob
++  _exception_blob =  ExceptionBlob::create(&buffer, oop_maps, SimpleRuntimeFrame::framesize >> 1);
++}
++#endif // COMPILER2
+diff --git a/src/hotspot/cpu/sw64/stubGenerator_sw64.cpp b/src/hotspot/cpu/sw64/stubGenerator_sw64.cpp
+new file mode 100644
+index 0000000000..0dbeb1d98f
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/stubGenerator_sw64.cpp
+@@ -0,0 +1,5922 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, 2019, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "gc/shared/barrierSet.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "nativeInst_sw64.hpp"
++#include "oops/instanceOop.hpp"
++#include "oops/method.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/handles.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/align.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// Declaration and definition of StubGenerator (no .hpp file).
++// For a more detailed description of the stub routine structure
++// see the comment in stubRoutines.hpp
++
++#undef __
++#define __ _masm->
++#define TIMES_OOP (UseCompressedOops ? Address::times_4 : Address::times_8)
++//#define a__ ((Assembler*)_masm)->
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024]; sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++const int MXCSR_MASK = 0xFFC0;  // Mask out any pending exceptions
++
++// Stub Code definitions
++
++class StubGenerator: public StubCodeGenerator {
++ private:
++   
++#ifdef PRODUCT
++#define inc_counter_np(counter) ((void)0)
++#else
++  void inc_counter_np_(int& counter) {
++    // This can destroy rscratch1 if counter is far from the code cache
++    __ incrementw(ExternalAddress((address)&counter));
++  }
++#define inc_counter_np(counter) \
++  BLOCK_COMMENT("inc_counter " #counter); \
++  inc_counter_np_(counter);
++#endif
++
++  // Call stubs are used to call Java from C
++  //
++  // Linux Arguments:
++  //    c_rarg0:   call wrapper address                   address
++  //    c_rarg1:   result                                 address
++  //    c_rarg2:   result type                            BasicType
++  //    c_rarg3:   method                                 Method*
++  //    c_rarg4:   (interpreter) entry point              address
++  //    c_rarg5:   parameters                             intptr_t*
++  //    16(rfp): parameter size (in words)              int
++  //    24(rfp): thread                                 Thread*
++  //
++  //     [ return_from_Java     ] <--- rsp
++  //     [ argument word n      ]
++  //      ...
++  // -13 [ argument word 1      ]
++  // -12 [ saved S5             ] <--- rsp_after_call
++  // -11 [ saved S4             ]
++  // -10 [ saved S3             ]
++  //  -9 [ saved S2             ]
++  //  -8 [ saved S1             ]
++  //  -7 [ saved S0             ]
++  //  -6 [ call wrapper         ]
++  //  -5 [ result               ]
++  //  -4 [ result type          ]
++  //  -3 [ method               ]
++  //  -2 [ entry point          ]
++  //  -1 [ parameters           ]
++  //   0 [ saved rfp            ] <--- rfp
++  //   1 [ return address       ]
++  //   2 [ parameter size       ]
++  //   3 [ thread               ]
++  //
++
++// Call stub stack layout word offsets from rfp
++  enum call_stub_layout {
++    rsp_after_call_off = -20,
++    F9_off             = rsp_after_call_off,
++    F8_off             = -19,
++    F7_off             = -18,
++    F6_off             = -17,
++    F5_off             = -16,
++    F4_off             = -15,
++    F3_off             = -14,
++    F2_off             = -13,
++    S5_off             = -12,
++    S4_off             = -11,
++    S3_off             = -10,
++    S2_off             = -9,
++    S1_off             = -8,
++    S0_off             = -7,
++    call_wrapper_off   = -6,
++    result_off         = -5,
++    result_type_off    = -4,
++    method_off         = -3,
++    entry_point_off    = -2,
++    parameters_off     = -1,
++    rfp_off            =  0,
++    retaddr_off        =  1,
++    parameter_size_off =  2,
++    thread_off         =  3
++   };
++
++  address generate_call_stub(address& return_address) {
++    assert((int)frame::entry_frame_after_call_words == -(int)rsp_after_call_off + 1 &&
++          (int)frame::entry_frame_call_wrapper_offset == (int)call_wrapper_off,
++          "adjust this code");
++    StubCodeMark mark(this, "StubRoutines", "call_stub");
++    address start = __ pc();
++    Register rax = V0;
++
++    //set FPCR in kernel
++//    if (SetFPCR) {
++//        __ rfpcr(f28);
++//        __ fimovd(f28, AT);
++//        __ sbt(AT, 45, AT);
++//        __ ifmovd(AT, f28);
++//        __ wfpcr(f28);
++//    }
++
++    // same as in generate_catch_exception()!
++    const Address rsp_after_call(rfp, rsp_after_call_off * wordSize);
++
++    const Address call_wrapper  (rfp, call_wrapper_off   * wordSize);
++    const Address result        (rfp, result_off         * wordSize);
++    const Address result_type   (rfp, result_type_off    * wordSize);
++    const Address method        (rfp, method_off         * wordSize);
++    const Address entry_point   (rfp, entry_point_off    * wordSize);
++    const Address parameters    (rfp, parameters_off     * wordSize);
++    const Address parameter_size(rfp, parameter_size_off * wordSize);
++
++    // same as in generate_catch_exception()!
++    const Address thread        (rfp, thread_off         * wordSize);
++
++    // call Java method from C function, by LIX20170503
++    __ setfpec1();
++    const Address S5_save(rfp, S5_off * wordSize);
++    const Address S4_save(rfp, S4_off * wordSize);
++    const Address S3_save(rfp, S3_off * wordSize);
++    const Address S2_save(rfp, S2_off * wordSize);
++    const Address S1_save(rfp, S1_off * wordSize);
++    const Address S0_save(rfp, S0_off * wordSize);
++    
++    // stub code
++    __ enter();
++    __ subptr(esp, -rsp_after_call_off * wordSize, esp);
++    
++    __ stptr(c_rarg5, parameters);
++    __ stptr(c_rarg4, entry_point);
++    __ stptr(c_rarg3, method);
++    __ stptr(c_rarg2, result_type);
++    __ stptr(c_rarg1, result);
++    __ stptr(c_rarg0, call_wrapper);
++  
++    // save regs belonging to calling function
++    __ stptr(S5, S5_save);
++    __ stptr(S4, S4_save);
++    __ stptr(S3, S3_save);
++    __ stptr(S2, S2_save);
++    __ stptr(S1, S1_save);
++    __ stptr(S0, S0_save);
++
++    __ fstd(f9, F9_off * wordSize, rfp);
++    __ fstd(f8, F8_off * wordSize, rfp);
++    __ fstd(f7, F7_off * wordSize, rfp);
++    __ fstd(f6, F6_off * wordSize, rfp);
++    __ fstd(f5, F5_off * wordSize, rfp);
++    __ fstd(f4, F4_off * wordSize, rfp);
++    __ fstd(f3, F3_off * wordSize, rfp);
++    __ fstd(f2, F2_off * wordSize, rfp);
++    __ ldptr(rthread, thread);
++
++    __ reinit_heapbase();
++
++#ifdef ASSERT
++    // make sure we have no pending exceptions
++    {
++      Label L;
++      __ cmpptr(Address(rthread, Thread::pending_exception_offset()), R0);
++      __ jcc(Assembler::equal, L);
++      __ stop("StubRoutines::call_stub: entered with pending exception");
++      __ bind(L);
++    }
++#endif
++
++    // pass parameters if any
++    BLOCK_COMMENT("pass parameters if any");
++    Label parameters_done;
++    __ ldws(c_rarg3, parameter_size);
++    __ testw(c_rarg3, c_rarg3);
++    __ jcc(Assembler::zero, parameters_done);
++
++    Label loop;
++    __ ldptr(c_rarg2, parameters);       // parameter pointer
++    __ movw(c_rarg1, c_rarg3);           // parameter counter is in c_rarg1
++    __ BIND(loop);
++    __ ldptr(rax, Address(c_rarg2, 0));// get parameter
++    __ addptr(c_rarg2, wordSize, c_rarg2);       // advance to next parameter
++    __ decrementw(c_rarg1);             // decrement counter
++    __ push(rax);                       // pass parameter
++    __ jcc(Assembler::notZero, loop, c_rarg1);
++
++    // call Java function
++    __ BIND(parameters_done);
++    __ ldptr(rmethod, method);         // get Method*
++    __ ldptr(c_rarg1, entry_point);    // get entry_point
++    __ movl(rsender, esp);             //set sender sp
++    BLOCK_COMMENT("call Java function");
++    __ call(c_rarg1, return_address); //c_rarg4 is (interpreter) entry point
++    
++    // store result depending on type (everything that is not
++    // T_OBJECT, T_LONG, T_FLOAT or T_DOUBLE is treated as T_INT)
++    __ ldptr(c_rarg0, result);
++    Label is_long, is_float, is_double, exit;
++    __ ldws(c_rarg1, result_type);
++    __ cmpw(c_rarg1, T_OBJECT);
++    __ jcc(Assembler::equal, is_long);
++    __ cmpw(c_rarg1, T_LONG);
++    __ jcc(Assembler::equal, is_long);
++    __ cmpw(c_rarg1, T_FLOAT);
++    __ jcc(Assembler::equal, is_float);
++    __ cmpw(c_rarg1, T_DOUBLE);
++    __ jcc(Assembler::equal, is_double);
++
++    // handle T_INT case
++    __ stw(rax, Address(c_rarg0, 0));
++
++    __ BIND(exit);
++
++    // pop parameters
++    __ lea(esp, rsp_after_call);
++    
++#ifdef ASSERT
++    // verify that threads correspond
++    {
++     Label L1, L2, L3;
++     Register rbx = c_rarg1;
++      __ cmpptr(rthread, thread);
++      __ jcc(Assembler::equal, L1);
++      __ stop("StubRoutines::call_stub: rthread is corrupted");
++      __ bind(L1);
++      __ get_thread(rbx);
++      __ cmpptr(rthread, thread);
++      __ jcc(Assembler::equal, L2);
++      __ stop("StubRoutines::call_stub: rthread is modified by call");
++      __ bind(L2);
++      __ cmpptr(rthread, rbx);
++      __ jcc(Assembler::equal, L3);
++      __ stop("StubRoutines::call_stub: threads must correspond");
++      __ bind(L3);
++    }
++#endif
++
++    // restore regs belonging to calling function
++    __ ldptr(S5, S5_save);
++    __ ldptr(S4, S4_save);
++    __ ldptr(S3, S3_save);
++    __ ldptr(S2, S2_save);
++    __ ldptr(S1, S1_save);
++    __ ldptr(S0, S0_save);
++
++    __ fldd(f9, F9_off * wordSize, rfp);
++    __ fldd(f8, F8_off * wordSize, rfp);
++    __ fldd(f7, F7_off * wordSize, rfp);
++    __ fldd(f6, F6_off * wordSize, rfp);
++    __ fldd(f5, F5_off * wordSize, rfp);
++    __ fldd(f4, F4_off * wordSize, rfp);
++    __ fldd(f3, F3_off * wordSize, rfp);
++    __ fldd(f2, F2_off * wordSize, rfp);
++
++    // restore rsp
++    __ addptr(esp, -rsp_after_call_off * wordSize, esp);
++    
++    __ leave();
++    // return
++    __ ret();
++
++    // handle return types different from T_INT
++    __ BIND(is_long);
++    __ stl(rax, Address(c_rarg0, 0));
++    __ jmp(exit);
++
++    __ BIND(is_float);
++    __ fsts(f0, Address(c_rarg0, 0));
++    __ jmp(exit);
++
++    __ BIND(is_double);
++    __ fstd(f0, Address(c_rarg0, 0));
++    __ jmp(exit);
++
++    return start;
++  }
++
++  // Return point for a Java call if there's an exception thrown in
++  // Java code.  The exception is caught and transformed into a
++  // pending exception stored in JavaThread that can be tested from
++  // within the VM.
++  //
++  // Note: Usually the parameters are removed by the callee. In case
++  // of an exception crossing an activation frame boundary, that is
++  // not the case if the callee is compiled code => need to setup the
++  // rsp.
++  //
++  // rax: exception oop
++
++  address generate_catch_exception() {
++    StubCodeMark mark(this, "StubRoutines", "catch_exception");
++    address start = __ pc();
++    //Register rbx = c_rarg2;
++    //Register rax = V0;
++
++    // same as in generate_call_stub():
++    const Address rsp_after_call(rfp, rsp_after_call_off * wordSize);
++    const Address thread        (rfp, thread_off         * wordSize);
++
++#ifdef ASSERT
++    // verify that threads correspond
++    {
++      Label L1, L2, L3;
++      __ cmpptr(rthread, thread);
++      __ jcc(Assembler::equal, L1);
++      __ stop("StubRoutines::catch_exception: rthread is corrupted");
++      __ bind(L1);
++      __ get_thread(rbx);
++      __ cmpptr(rthread, thread);
++      __ jcc(Assembler::equal, L2);
++      __ stop("StubRoutines::catch_exception: rthread is modified by call");
++      __ bind(L2);
++      __ cmpptr(rthread, rbx);
++      __ jcc(Assembler::equal, L3);
++      __ stop("StubRoutines::catch_exception: threads must correspond");
++      __ bind(L3);
++    }
++#endif
++
++    // set pending exception
++    __ verify_oop(rax);
++
++    __ stptr(rax, Address(rthread, Thread::pending_exception_offset()));
++    __ lea(rscratch3, ExternalAddress((address)__FILE__));
++    __ stptr(rscratch3, Address(rthread, Thread::exception_file_offset()));
++    __ stw((int)  __LINE__, Address(rthread, Thread::exception_line_offset()));
++
++    // complete return to VM
++    assert(StubRoutines::_call_stub_return_address != NULL,
++           "_call_stub_return_address must have been generated before");
++    __ jump(RuntimeAddress(StubRoutines::_call_stub_return_address));
++
++    return start;
++  }
++
++  // Continuation point for runtime calls returning with a pending
++  // exception.  The pending exception check happened in the runtime
++  // or native call stub.  The pending exception in Thread is
++  // converted into a Java-level exception.
++  //
++  // Contract with Java-level exception handlers:
++  // rax: exception
++  // rdx: throwing pc
++  //
++  // NOTE: At entry of this stub, exception-pc must be on stack !!
++
++  address generate_forward_exception() {
++    StubCodeMark mark(this, "StubRoutines", "forward exception");
++    address start = __ pc();
++    //__ stop("not check:jzy");
++    Register rax    = V0;
++    Register rbx    = rmethod;
++    Register rdx    = c_rarg2;
++    // Upon entry, LR points to the return address returning into
++    // Java (interpreted or compiled) code; i.e., the return address
++    // becomes the throwing pc.
++    //
++    // Arguments pushed before the runtime call are still on the stack
++    // but the exception handler will reset the stack pointer ->
++    // ignore them.  A potential result in registers can be ignored as
++    // well.
++
++#ifdef ASSERT
++    // make sure this code is only executed if there is a pending exception
++    {
++      Label L;
++      __ cmpptr(Address(rthread, Thread::pending_exception_offset()), R0);
++      __ jcc(Assembler::notEqual, L);
++      __ stop("StubRoutines::forward exception: no pending exception (1)");
++      __ bind(L);
++    }
++#endif
++
++    // compute exception handler into rbx
++    
++    // call the VM to find the handler address associated with the
++    // caller address. pass thread in a0 and caller pc (ret address)
++    // in a1. n.b. the caller pc is in RA, unlike x86 where it is on
++    // the stack.
++    __ movl(r12_heapbase, RA);
++    __ movl(c_rarg1, RA);
++    __ block_comment("call exception_handler_for_return_address");
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address,
++                         SharedRuntime::exception_handler_for_return_address),
++                    rthread, c_rarg1);
++    // we should not really care that RA is no longer the callee
++    // address. we saved the value the handler needs in r12_heapbase so we can
++    // just copy it to rbx. however, the C2 handler will push its own
++    // frame and then calls into the VM and the VM code asserts that
++    // the PC for the frame above the handler belongs to a compiled
++    // Java method. So, we restore lr here to satisfy that assert.
++    __ movl(RA, r12_heapbase);
++    __ reinit_heapbase(); //reset r12_heapbase
++   
++    // setup rax & rdx, remove return address & clear pending exception
++    __ movl(rbx, rax);
++    __ movl(rdx, RA);
++    __ ldptr(rax, Address(rthread, Thread::pending_exception_offset()));
++    __ stptr(R0, Address(rthread, Thread::pending_exception_offset()));
++
++#ifdef ASSERT
++    // make sure exception is set
++    {
++      Label L;
++      __ jcc(Assembler::notEqual, L, rax);
++      __ stop("StubRoutines::forward exception: no pending exception (2)");
++      __ bind(L);
++    }
++#endif
++
++    // continue at exception handler (return address removed)
++    // rax: exception
++    // rbx: exception handler
++    // rdx: throwing pc 
++    __ verify_oop(rax);
++    __ jmp(rbx);
++
++    return start;
++  }
++
++  // Support for jint atomic::xchg(jint exchange_value, volatile jint* dest)
++  //
++  // Arguments :
++  //    c_rarg0: exchange_value
++  //    c_rarg0: dest
++  //
++  // Result:
++  //    *dest <- ex, return (orig *dest)
++  address generate_atomic_xchg() {
++    StubCodeMark mark(this, "StubRoutines", "atomic_xchg");
++    address start = __ pc();
++__ stop("unimplement generate_atomic_xchg");
++    __ movl(V0, c_rarg0); // Copy to eax we need a return value anyhow
++    __ xchgw(V0, Address(c_rarg1, 0)); // automatic LOCK
++    __ ret_sw();
++
++    return start;
++  }
++
++  // Support for intptr_t atomic::xchg_long(jlong exchange_value, volatile jlong* dest)
++  //
++  // Arguments :
++  //    c_rarg0: exchange_value
++  //    c_rarg1: dest
++  //
++  // Result:
++  //    *dest <- ex, return (orig *dest)
++  address generate_atomic_xchg_long() {
++    StubCodeMark mark(this, "StubRoutines", "atomic_xchg_long");
++    address start = __ pc();
++__ stop("unimplement generate_atomic_xchg_long");
++    __ movl(V0, c_rarg0); // Copy to eax we need a return value anyhow
++    __ xchgptr(V0, Address(c_rarg1, 0)); // automatic LOCK
++    __ ret_sw();
++
++    return start;
++  }
++
++  // Support for jint atomic::atomic_cmpxchg(jint exchange_value, volatile jint* dest,
++  //                                         jint compare_value)
++  //
++  // Arguments :
++  //    c_rarg0: exchange_value
++  //    c_rarg1: dest
++  //    c_rarg2: compare_value
++  //
++  // Result:
++  //    if ( compare_value == *dest ) {
++  //       *dest = exchange_value
++  //       return compare_value;
++  //    else
++  //       return *dest;
++  address generate_atomic_cmpxchg() {
++    StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg");
++    address start = __ pc();
++__ stop("unimplement generate_atomic_cmpxchg");
++    __ movw(V0, c_rarg2);
++   //if ( os::is_MP() ) __ lock();
++    __ cmpxchgw(c_rarg0, Address(c_rarg1, 0));
++    __ ret_sw();
++
++    return start;
++  }
++
++  // Support for int8_t atomic::atomic_cmpxchg(int8_t exchange_value, volatile int8_t* dest,
++  //                                           int8_t compare_value)
++  //
++  // Arguments :
++  //    c_rarg0: exchange_value
++  //    c_rarg1: dest
++  //    c_rarg2: compare_value
++  //
++  // Result:
++  //    if ( compare_value == *dest ) {
++  //       *dest = exchange_value
++  //       return compare_value;
++  //    else
++  //       return *dest;
++  address generate_atomic_cmpxchg_byte() {
++    StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_byte");
++    address start = __ pc();
++__ stop("unimplement generate_atomic_cmpxchg_byte");
++    __ sextb(V0, c_rarg2);
++   //if ( os::is_MP() ) __ lock();
++    __ cmpxchgb(c_rarg0, Address(c_rarg1, 0));
++    __ ret_sw();
++
++    return start;
++  }
++
++  // Support for int64_t atomic::atomic_cmpxchg(int64_t exchange_value,
++  //                                            volatile int64_t* dest,
++  //                                            int64_t compare_value)
++  // Arguments :
++  //    c_rarg0: exchange_value
++  //    c_rarg1: dest
++  //    c_rarg2: compare_value
++  //
++  // Result:
++  //    if ( compare_value == *dest ) {
++  //       *dest = exchange_value
++  //       return compare_value;
++  //    else
++  //       return *dest;
++  address generate_atomic_cmpxchg_long() {
++    StubCodeMark mark(this, "StubRoutines", "atomic_cmpxchg_long");
++    address start = __ pc();
++__ stop("unimplement generate_atomic_cmpxchg_long");
++    __ movl(V0, c_rarg2);
++   //if ( os::is_MP() ) __ lock();
++    __ cmpxchgq(c_rarg0, Address(c_rarg1, 0));
++    __ ret_sw();
++
++    return start;
++  }
++
++  // Support for jint atomic::add(jint add_value, volatile jint* dest)
++  //
++  // Arguments :
++  //    c_rarg0: add_value
++  //    c_rarg1: dest
++  //
++  // Result:
++  //    *dest += add_value
++  //    return *dest;
++  address generate_atomic_add() {
++    StubCodeMark mark(this, "StubRoutines", "atomic_add");
++    address start = __ pc();
++__ stop("unimplement generate_atomic_add");
++//    __ movw(V0, c_rarg0);
++//   //if ( os::is_MP() ) __ lock();
++//    __ xaddw(Address(c_rarg1, 0), c_rarg0);
++//    __ addw(V0, c_rarg0, V0);
++//    __ ret_sw();
++
++    return start;
++  }
++  
++  // Support for intptr_t atomic::add_ptr(intptr_t add_value, volatile intptr_t* dest)
++  //
++  // Arguments :
++  //    c_rarg0: add_value
++  //    c_rarg1: dest
++  //
++  // Result:
++  //    *dest += add_value
++  //    return *dest;
++  address generate_atomic_add_long() {
++    StubCodeMark mark(this, "StubRoutines", "atomic_add_long");
++    address start = __ pc();
++__ stop("unimplement generate_atomic_add_long");
++//    __ movl(V0, c_rarg0); // Copy to eax we need a return value anyhow
++//   //if ( os::is_MP() ) __ lock();
++//    __ xaddptr(Address(c_rarg1, 0), c_rarg0);
++//    __ addptr(V0, c_rarg0, V0);
++//    __ ret_sw();
++
++    return start;
++  }
++
++  // Support for intptr_t OrderAccess::fence()
++  //
++  // Arguments :
++  //
++  // Result:
++  address generate_orderaccess_fence() {
++    StubCodeMark mark(this, "StubRoutines", "orderaccess_fence");
++    address start = __ pc();__ stop("unimplement generate_orderaccess_fence");
++    __ memb();
++    __ ret_sw();
++
++    return start;
++  }
++
++  // Support for intptr_t get_previous_fp()
++  //
++  // This routine is used to find the previous frame pointer for the
++  // caller (current_frame_guess). This is used as part of debugging
++  // ps() is seemingly lost trying to find frames.
++  // This code assumes that caller current_frame_guess) has a frame.
++  address generate_get_previous_fp() {
++    StubCodeMark mark(this, "StubRoutines", "get_previous_fp");
++    const Address old_fp(rfp,  0);
++    const Address older_fp(V0,  0);
++    address start = __ pc();__ stop("unimplement generate_get_previous_fp");
++    Register rax = V0;
++    
++    __ enter();
++    __ ldptr(rax, old_fp); // callers fp
++    __ ldptr(rax, older_fp); // the frame for ps()
++    __ leave();
++    __ ret_sw();
++    return start;
++  }
++
++  // Support for intptr_t get_previous_sp()
++  //
++  // This routine is used to find the previous stack pointer for the
++  // caller.
++  address generate_get_previous_sp() {
++    StubCodeMark mark(this, "StubRoutines", "get_previous_sp");
++    address start = __ pc();__ stop("unimplement generate_get_previous_sp");
++
++    __ movl(V0, RA);
++    //__ addptr(V0, 8, V0); // return address is at the top of the stack.
++    __ ret_sw();
++
++    return start;
++  }
++  
++  address generate_f2i_fixup() {
++    StubCodeMark mark(this, "StubRoutines", "f2i_fixup");
++    Address inout(esp, 5 * wordSize); // return address + 4 saves
++
++    address start = __ pc();
++
++    Label L;
++    ShouldNotReachHere();
++//    __ push(rax);
++//    __ push(c_rarg3);
++//    __ push(c_rarg2);
++//    __ push(c_rarg1);
++//
++//    __ movl(rax, 0x7f800000);
++//    __ xorl(c_rarg3, c_rarg3);
++//    __ movl(c_rarg2, inout);
++//    __ movl(c_rarg1, c_rarg2);
++//    __ andl(c_rarg1, 0x7fffffff);
++//    __ cmpl(rax, c_rarg1); // NaN? -> 0
++//    __ jcc(Assembler::negative, L);
++//    __ testl(c_rarg2, c_rarg2); // signed ? min_jint : max_jint
++//    __ movl(c_rarg3, 0x80000000);
++//    __ movl(rax, 0x7fffffff);
++//    __ cmovl(Assembler::positive, c_rarg3, rax);
++//
++//    __ bind(L);
++//    __ movptr(inout, c_rarg3);
++//
++//    __ pop(c_rarg1);
++//    __ pop(c_rarg2);
++//    __ pop(c_rarg3);
++//    __ pop(rax);
++//
++//    __ ret(0);
++
++    return start;
++  }
++
++  address generate_f2l_fixup() {
++    StubCodeMark mark(this, "StubRoutines", "f2l_fixup");
++    Address inout(esp, 5 * wordSize); // return address + 4 saves
++    address start = __ pc();
++    ShouldNotReachHere();
++    Label L;
++
++//    __ push(rax);
++//    __ push(c_rarg3);
++//    __ push(c_rarg2);
++//    __ push(c_rarg1);
++//
++//    __ movl(rax, 0x7f800000);
++//    __ xorl(c_rarg3, c_rarg3);
++//    __ movl(c_rarg2, inout);
++//    __ movl(c_rarg1, c_rarg2);
++//    __ andl(c_rarg1, 0x7fffffff);
++//    __ cmpl(rax, c_rarg1); // NaN? -> 0
++//    __ jcc(Assembler::negative, L);
++//    __ testl(c_rarg2, c_rarg2); // signed ? min_jlong : max_jlong
++//    __ mov64(c_rarg3, 0x8000000000000000);
++//    __ mov64(rax, 0x7fffffffffffffff);
++//    __ cmov(Assembler::positive, c_rarg3, rax);
++//
++//    __ bind(L);
++//    __ movptr(inout, c_rarg3);
++//
++//    __ pop(c_rarg1);
++//    __ pop(c_rarg2);
++//    __ pop(c_rarg3);
++//    __ pop(rax);
++//
++//    __ ret(0);
++
++    return start;
++  }
++
++  address generate_d2i_fixup() {
++    StubCodeMark mark(this, "StubRoutines", "d2i_fixup");
++    Address inout(esp, 6 * wordSize); // return address + 5 saves
++
++    address start = __ pc();
++    ShouldNotReachHere();
++//    Label L;
++//
++//    __ push(rax);
++//    __ push(c_rarg3);
++//    __ push(c_rarg2);
++//    __ push(c_rarg1);
++//    __ push(c_rarg0);
++//
++//    __ movl(rax, 0x7ff00000);
++//    __ movq(c_rarg2, inout);
++//    __ movl(c_rarg3, c_rarg2);
++//    __ mov(c_rarg1, c_rarg2);
++//    __ mov(c_rarg0, c_rarg2);
++//    __ negl(c_rarg3);
++//    __ shrptr(c_rarg1, 0x20);
++//    __ orl(c_rarg3, c_rarg2);
++//    __ andl(c_rarg1, 0x7fffffff);
++//    __ xorl(c_rarg2, c_rarg2);
++//    __ shrl(c_rarg3, 0x1f);
++//    __ orl(c_rarg1, c_rarg3);
++//    __ cmpl(rax, c_rarg1);
++//    __ jcc(Assembler::negative, L); // NaN -> 0
++//    __ testptr(c_rarg0, c_rarg0); // signed ? min_jint : max_jint
++//    __ movl(c_rarg2, 0x80000000);
++//    __ movl(rax, 0x7fffffff);
++//    __ cmov(Assembler::positive, c_rarg2, rax);
++//
++//    __ bind(L);
++//    __ movptr(inout, c_rarg2);
++//
++//    __ pop(c_rarg0);
++//    __ pop(c_rarg1);
++//    __ pop(c_rarg2);
++//    __ pop(c_rarg3);
++//    __ pop(rax);
++//
++//    __ ret(0);
++
++    return start;
++  }
++
++  address generate_d2l_fixup() {
++    StubCodeMark mark(this, "StubRoutines", "d2l_fixup");
++    Address inout(esp, 6 * wordSize); // return address + 5 saves
++
++    address start = __ pc();
++    ShouldNotReachHere();
++    Label L;
++
++//    __ push(rax);
++//    __ push(c_rarg3);
++//    __ push(c_rarg2);
++//    __ push(c_rarg1);
++//    __ push(c_rarg0);
++//
++//    __ movl(rax, 0x7ff00000);
++//    __ movq(c_rarg2, inout);
++//    __ movl(c_rarg3, c_rarg2);
++//    __ mov(c_rarg1, c_rarg2);
++//    __ mov(c_rarg0, c_rarg2);
++//    __ negl(c_rarg3);
++//    __ shrptr(c_rarg1, 0x20);
++//    __ orl(c_rarg3, c_rarg2);
++//    __ andl(c_rarg1, 0x7fffffff);
++//    __ xorl(c_rarg2, c_rarg2);
++//    __ shrl(c_rarg3, 0x1f);
++//    __ orl(c_rarg1, c_rarg3);
++//    __ cmpl(rax, c_rarg1);
++//    __ jcc(Assembler::negative, L); // NaN -> 0
++//    __ testq(c_rarg0, c_rarg0); // signed ? min_jlong : max_jlong
++//    __ mov64(c_rarg2, 0x8000000000000000);
++//    __ mov64(rax, 0x7fffffffffffffff);
++//    __ cmovq(Assembler::positive, c_rarg2, rax);
++//
++//    __ bind(L);
++//    __ movq(inout, c_rarg2);
++//
++//    __ pop(c_rarg0);
++//    __ pop(c_rarg1);
++//    __ pop(c_rarg2);
++//    __ pop(c_rarg3);
++//    __ pop(rax);
++//
++//    __ ret(0);
++
++    return start;
++  }
++
++  address generate_fp_mask(const char *stub_name, int64_t mask) {
++//    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", stub_name);
++    address start = __ pc();
++    ShouldNotReachHere();
++    
++//    __ emit_data64( mask, relocInfo::none );
++//    __ emit_data64( mask, relocInfo::none );
++
++    return start;
++  }
++
++  // Non-destructive plausibility checks for oops
++  //
++  // Arguments:
++  //    all args on stack!
++  //
++  // Stack after saving c_rarg3:
++  //    [tos + 0]: saved c_rarg3
++  //    [tos + 1]: saved c_rarg2
++  //    [tos + 2]: saved r12_heapbase (several TemplateTable methods use it)
++  //    [tos + 3]: rscratch1
++  //    [tos + 4]: last RA
++  //  * [tos + 5]: error message (char*)
++  //  * [tos + 6]: object to verify (oop)
++  //  * [tos + 7]: saved rax - saved by caller and bashed
++  //  * = popped on exit
++  address generate_verify_oop() {
++    StubCodeMark mark(this, "StubRoutines", "verify_oop");
++    address start = __ pc();
++    Register rax = V0;
++    
++    Label exit, error;
++    __ push(rscratch1); 
++    __ push(r12_heapbase); 
++
++    // save c_rarg2 and c_rarg3
++    __ push(c_rarg2);
++    __ push(c_rarg3);
++    
++    __ incrementw(ExternalAddress((address) StubRoutines::verify_oop_count_addr()), 1, c_rarg2, c_rarg3);
++
++    enum {
++           // After previous pushes.
++           oop_to_verify      = 6 * wordSize,
++           saved_rax          = 7 * wordSize,
++
++           // Before the call to MacroAssembler::debug(), see below.
++           error_msg          = 25 * wordSize
++    };
++
++    // get object
++    __ ldptr(rax, Address(esp, oop_to_verify));
++
++    // make sure object is 'reasonable'
++    __ jcc(Assembler::zero, exit, rax); // if obj is NULL it is OK
++
++#if INCLUDE_ZGC
++    if (UseZGC) {
++      // Check if metadata bits indicate a bad oop
++      __ lea(rscratch3, Address(rthread, ZThreadLocalData::address_bad_mask_offset()));  
++      __ testptr(rax, rscratch3);
++      __ jcc(Assembler::notZero, error);
++    }
++#endif
++
++    // Check if the oop is in the right area of memory
++    __ movl(c_rarg2, rax);
++    __ mov_immediate64(c_rarg3, (intptr_t) Universe::verify_oop_mask());
++    __ andptr(c_rarg2, c_rarg3, c_rarg2);
++    __ mov_immediate64(c_rarg3, (intptr_t) Universe::verify_oop_bits());
++    __ cmpptr(c_rarg2, c_rarg3);
++    __ jcc(Assembler::notZero, error);
++
++    // set rheapbaase to heapbase for load_klass()
++    __ reinit_heapbase();
++
++    // make sure klass is 'reasonable', which is not zero.
++    __ load_klass(rax, rax);  // get klass
++    __ jcc(Assembler::zero, error, rax); // if klass is NULL it is broken
++
++    // return if everything seems ok
++    __ bind(exit);
++    __ pop(c_rarg3);                             // restore c_rarg3
++    __ pop(c_rarg2);                             // restore c_rarg2
++    __ pop(r12_heapbase);                                 // restore r12_heapbase
++    __ pop(rscratch1);                                 // restore rscratch1
++    
++    __ ret();
++
++    // handle errors
++    __ bind(error);
++    __ ldptr(rax, Address(esp, saved_rax));     // get saved rax back
++    __ pop(c_rarg3);                             // get saved c_rarg3 back
++    __ pop(c_rarg2);                             // get saved c_rarg2 back
++    __ pop(r12_heapbase);                                 // get saved r12 back
++    __ pop(rscratch1);                                 // restore rscratch1
++                                                 // will be ignored
++    __ push(RA);                                 // sw need to save RA which need by ret after calling of debug64 
++    __ pushad();                                  // push registers
++                                                 // (rip is already
++                                                 // already pushed)
++    // debug(char* msg, int64_t pc, int64_t regs[])
++    // We've popped the registers we'd saved (c_rarg3, c_rarg2 and flags), and
++    // pushed all the registers, so now the stack looks like:
++    //     [tos +  0] 24 saved registers
++    //     [tos + 24] current_RA
++    //     [tos + 25] last RA
++    //   * [tos + 26] error message (char*)
++    //   * [tos + 27] object to verify (oop)
++    //   * [tos + 28] saved rax - saved by caller and bashed
++    //   * = popped on exit
++
++    __ ldptr(c_rarg0, Address(esp, error_msg));     // pass address of error message
++    __ movl(c_rarg1, RA);                           // pass return address
++    __ movl(c_rarg2, esp);                          // pass address of regs on stack
++    __ movl(r12_heapbase, esp);                               // remember rsp
++    //__ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows
++    __ andptr(esp, -16, esp);                            // align stack as required by ABI
++    BLOCK_COMMENT("call MacroAssembler::debug");
++    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, MacroAssembler::debug64)));
++    __ movl(esp, r12_heapbase);                               // restore rsp
++    __ popad();                                      // pop registers (includes r12_heapbase)
++    __ pop(RA);
++    __ ret();
++    
++    return start;
++  }
++
++  //
++  // Verify that a register contains clean 32-bits positive value
++  // (high 32-bits are 0) so it could be used in 64-bits shifts.
++  //
++  //  Input:
++  //    Rint  -  32-bits value
++  //    Rtmp  -  scratch
++  //
++  void assert_clean_int(Register Rint, Register Rtmp) {
++#ifdef ASSERT
++    Label L;
++    assert_different_registers(Rtmp, Rint);
++    __ movws(Rtmp, Rint);
++    __ cmpl_raw(Rtmp, Rint);
++    __ jcc(Assembler::equal, L);
++    __ stop("high 32-bits of int value are not 0");
++    __ bind(L);
++#endif
++  }
++
++ //  Generate overlap test for array copy stubs
++  //
++  //  Input:
++  //     c_rarg0 - from
++  //     c_rarg1 - to
++  //     c_rarg2 - element count
++  //
++  //  Output:
++  //     rax   - &from[element count - 1]
++  //
++  void array_overlap_test(address no_overlap_target, Address::ScaleFactor sf) {SCOPEMARK_NAME(array_overlap_test, _masm)
++    assert(no_overlap_target != NULL, "must be generated");
++    array_overlap_test(no_overlap_target, NULL, sf);
++  }
++  void array_overlap_test(Label& L_no_overlap, Address::ScaleFactor sf) {
++    array_overlap_test(NULL, &L_no_overlap, sf);
++  }
++  void array_overlap_test(address no_overlap_target, Label* NOLp, Address::ScaleFactor sf) {
++    const Register from     = c_rarg0;
++    const Register to       = c_rarg1;
++    const Register count    = c_rarg2;
++    const Register end_from = V0;
++
++    __ cmpptr(to, from);
++    __ lea(end_from, Address(from, count, sf, 0));
++    if (NOLp == NULL) {
++      ExternalAddress no_overlap(no_overlap_target);
++      __ jump_cc(Assembler::belowEqual, no_overlap);
++      __ cmpptr(to, end_from);
++      __ jump_cc(Assembler::aboveEqual, no_overlap);
++    } else {
++      __ jcc(Assembler::belowEqual, (*NOLp));
++      __ cmpptr(to, end_from);
++      __ jcc(Assembler::aboveEqual, (*NOLp));
++    }
++  }
++
++  void array_overlap_test(address no_overlap_target, int log2_elem_size) {ShouldNotReachHere();
++    int elem_size = 1 << log2_elem_size;
++    Address::ScaleFactor sf = Address::times_1;
++
++    switch (log2_elem_size) {
++      case 0: sf = Address::times_1; break;
++      case 1: sf = Address::times_2; break;
++      case 2: sf = Address::times_4; break;
++      case 3: sf = Address::times_8; break;
++    }
++
++    ExternalAddress no_overlap(no_overlap_target);
++
++    __ slll(A2, sf, AT);
++    __ addl(AT, A0, AT);
++    __ addptr(AT, -elem_size, T12);
++    __ cmpl_raw(A1, A0);
++    __ jump_cc(Assembler::lessEqual, no_overlap);
++    __ cmpl_raw(A1, T12);
++    __ jump_cc(Assembler::greater, no_overlap);
++
++    // If A0 = 0xf... and A1 = 0x0..., than goto no_overlap_target
++    Label L;
++    __ jcc(Assembler::greaterEqual, L, A0);
++    __ jump_cc(Assembler::greater, no_overlap, A1);
++    __ bind(L);
++
++  }
++
++  // Shuffle first three arg regs on Windows into Linux/Solaris locations.
++  //
++  // Outputs:
++  //    rdi - rcx
++  //    rsi - rdx
++  //    rdx - r8
++  //    rcx - r9
++  //
++  // Registers r9 and r10 are used to save rdi and rsi on Windows, which latter
++  // are non-volatile.  r9 and r10 should not be used by the caller.
++  //
++  void setup_arg_regs(int nargs = 3) {
++    assert(nargs == 3 || nargs == 4, "else fix");
++    assert(c_rarg0 == A0 && c_rarg1 == A1 && c_rarg2 == A2 && c_rarg3 == A3,
++           "unexpected argument registers");
++  }
++
++  void restore_arg_regs() {
++  }
++
++
++  // Copy big chunks forward
++  //
++  // Inputs:
++  //   end_from     - source arrays end address
++  //   end_to       - destination array end address
++  //   qword_count  - 64-bits element count, negative
++  //   to           - scratch
++  //   L_copy_bytes - entry label
++  //   L_copy_8_bytes  - exit  label
++  //
++  void copy_bytes_forward(Register end_from, Register end_to,
++                             Register qword_count, Register to,
++                             Label& L_copy_bytes, Label& L_copy_8_bytes) {
++      ShouldNotReachHere();
++//    DEBUG_ONLY(__ stop("enter at entry label, not here"));
++//    Label L_loop;
++//    __ align(OptoLoopAlignment);
++//    if (UseUnalignedLoadStores) {
++//      Label L_end;
++//      // Copy 64-bytes per iteration
++//      if (UseAVX > 2) {
++//        Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
++//
++//        __ BIND(L_copy_bytes);
++//        __ cmpptr(qword_count, (-1 * AVX3Threshold / 8));
++//        __ jccb(Assembler::less, L_above_threshold);
++//        __ jmpb(L_below_threshold);
++//
++//        __ bind(L_loop_avx512);
++//        __ evmovdqul(xmm0, Address(end_from, qword_count, Address::times_8, -56), Assembler::AVX_512bit);
++//        __ evmovdqul(Address(end_to, qword_count, Address::times_8, -56), xmm0, Assembler::AVX_512bit);
++//        __ bind(L_above_threshold);
++//        __ addptr(qword_count, 8);
++//        __ jcc(Assembler::lessEqual, L_loop_avx512);
++//        __ jmpb(L_32_byte_head);
++//
++//        __ bind(L_loop_avx2);
++//        __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
++//        __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
++//        __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
++//        __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
++//        __ bind(L_below_threshold);
++//        __ addptr(qword_count, 8);
++//        __ jcc(Assembler::lessEqual, L_loop_avx2);
++//
++//        __ bind(L_32_byte_head);
++//        __ subptr(qword_count, 4);  // sub(8) and add(4)
++//        __ jccb(Assembler::greater, L_end);
++//      } else {
++//        __ BIND(L_loop);
++//        if (UseAVX == 2) {
++//          __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
++//          __ vmovdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
++//          __ vmovdqu(xmm1, Address(end_from, qword_count, Address::times_8, -24));
++//          __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm1);
++//        } else {
++//          __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -56));
++//          __ movdqu(Address(end_to, qword_count, Address::times_8, -56), xmm0);
++//          __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, -40));
++//          __ movdqu(Address(end_to, qword_count, Address::times_8, -40), xmm1);
++//          __ movdqu(xmm2, Address(end_from, qword_count, Address::times_8, -24));
++//          __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm2);
++//          __ movdqu(xmm3, Address(end_from, qword_count, Address::times_8, - 8));
++//          __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm3);
++//        }
++//
++//        __ BIND(L_copy_bytes);
++//        __ addptr(qword_count, 8);
++//        __ jcc(Assembler::lessEqual, L_loop);
++//        __ subptr(qword_count, 4);  // sub(8) and add(4)
++//        __ jccb(Assembler::greater, L_end);
++//      }
++//      // Copy trailing 32 bytes
++//      if (UseAVX >= 2) {
++//        __ vmovdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
++//        __ vmovdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
++//      } else {
++//        __ movdqu(xmm0, Address(end_from, qword_count, Address::times_8, -24));
++//        __ movdqu(Address(end_to, qword_count, Address::times_8, -24), xmm0);
++//        __ movdqu(xmm1, Address(end_from, qword_count, Address::times_8, - 8));
++//        __ movdqu(Address(end_to, qword_count, Address::times_8, - 8), xmm1);
++//      }
++//      __ addptr(qword_count, 4);
++//      __ BIND(L_end);
++//      if (UseAVX >= 2) {
++//        // clean upper bits of YMM registers
++//        __ vpxor(xmm0, xmm0);
++//        __ vpxor(xmm1, xmm1);
++//      }
++//    } else {
++//      // Copy 32-bytes per iteration
++//      __ BIND(L_loop);
++//      __ movq(to, Address(end_from, qword_count, Address::times_8, -24));
++//      __ movq(Address(end_to, qword_count, Address::times_8, -24), to);
++//      __ movq(to, Address(end_from, qword_count, Address::times_8, -16));
++//      __ movq(Address(end_to, qword_count, Address::times_8, -16), to);
++//      __ movq(to, Address(end_from, qword_count, Address::times_8, - 8));
++//      __ movq(Address(end_to, qword_count, Address::times_8, - 8), to);
++//      __ movq(to, Address(end_from, qword_count, Address::times_8, - 0));
++//      __ movq(Address(end_to, qword_count, Address::times_8, - 0), to);
++//
++//      __ BIND(L_copy_bytes);
++//      __ addptr(qword_count, 4);
++//      __ jcc(Assembler::lessEqual, L_loop);
++//    }
++//    __ subptr(qword_count, 4);
++//    __ jcc(Assembler::less, L_copy_8_bytes); // Copy trailing qwords
++  }
++
++  // Copy big chunks backward
++  //
++  // Inputs:
++  //   from         - source arrays address
++  //   dest         - destination array address
++  //   qword_count  - 64-bits element count
++  //   to           - scratch
++  //   L_copy_bytes - entry label
++  //   L_copy_8_bytes  - exit  label
++  //
++  void copy_bytes_backward(Register from, Register dest,
++                              Register qword_count, Register to,
++                              Label& L_copy_bytes, Label& L_copy_8_bytes) {
++    ShouldNotReachHere();
++    /*DEBUG_ONLY(__ stop("enter at entry label, not here"));
++    Label L_loop;
++    __ align(OptoLoopAlignment);
++    if (UseUnalignedLoadStores) {
++      Label L_end;
++      // Copy 64-bytes per iteration
++      if (UseAVX > 2) {
++        Label L_loop_avx512, L_loop_avx2, L_32_byte_head, L_above_threshold, L_below_threshold;
++
++        __ BIND(L_copy_bytes);
++        __ cmpptr(qword_count, (AVX3Threshold / 8));
++        __ jccb(Assembler::greater, L_above_threshold);
++        __ jmpb(L_below_threshold);
++
++        __ BIND(L_loop_avx512);
++        __ evmovdqul(xmm0, Address(from, qword_count, Address::times_8, 0), Assembler::AVX_512bit);
++        __ evmovdqul(Address(dest, qword_count, Address::times_8, 0), xmm0, Assembler::AVX_512bit);
++        __ bind(L_above_threshold);
++        __ subptr(qword_count, 8);
++        __ jcc(Assembler::greaterEqual, L_loop_avx512);
++        __ jmpb(L_32_byte_head);
++
++        __ bind(L_loop_avx2);
++        __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
++        __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
++        __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8, 0));
++        __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm1);
++        __ bind(L_below_threshold);
++        __ subptr(qword_count, 8);
++        __ jcc(Assembler::greaterEqual, L_loop_avx2);
++
++        __ bind(L_32_byte_head);
++        __ addptr(qword_count, 4);  // add(8) and sub(4)
++        __ jccb(Assembler::less, L_end);
++      } else {
++        __ BIND(L_loop);
++        if (UseAVX == 2) {
++          __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 32));
++          __ vmovdqu(Address(dest, qword_count, Address::times_8, 32), xmm0);
++          __ vmovdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
++          __ vmovdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
++        } else {
++          __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 48));
++          __ movdqu(Address(dest, qword_count, Address::times_8, 48), xmm0);
++          __ movdqu(xmm1, Address(from, qword_count, Address::times_8, 32));
++          __ movdqu(Address(dest, qword_count, Address::times_8, 32), xmm1);
++          __ movdqu(xmm2, Address(from, qword_count, Address::times_8, 16));
++          __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm2);
++          __ movdqu(xmm3, Address(from, qword_count, Address::times_8,  0));
++          __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm3);
++        }
++
++        __ BIND(L_copy_bytes);
++        __ subptr(qword_count, 8);
++        __ jcc(Assembler::greaterEqual, L_loop);
++
++        __ addptr(qword_count, 4);  // add(8) and sub(4)
++        __ jccb(Assembler::less, L_end);
++      }
++      // Copy trailing 32 bytes
++      if (UseAVX >= 2) {
++        __ vmovdqu(xmm0, Address(from, qword_count, Address::times_8, 0));
++        __ vmovdqu(Address(dest, qword_count, Address::times_8, 0), xmm0);
++      } else {
++        __ movdqu(xmm0, Address(from, qword_count, Address::times_8, 16));
++        __ movdqu(Address(dest, qword_count, Address::times_8, 16), xmm0);
++        __ movdqu(xmm1, Address(from, qword_count, Address::times_8,  0));
++        __ movdqu(Address(dest, qword_count, Address::times_8,  0), xmm1);
++      }
++      __ subptr(qword_count, 4);
++      __ BIND(L_end);
++      if (UseAVX >= 2) {
++        // clean upper bits of YMM registers
++        __ vpxor(xmm0, xmm0);
++        __ vpxor(xmm1, xmm1);
++      }
++    } else {
++      // Copy 32-bytes per iteration
++      __ BIND(L_loop);
++      __ movq(to, Address(from, qword_count, Address::times_8, 24));
++      __ movq(Address(dest, qword_count, Address::times_8, 24), to);
++      __ movq(to, Address(from, qword_count, Address::times_8, 16));
++      __ movq(Address(dest, qword_count, Address::times_8, 16), to);
++      __ movq(to, Address(from, qword_count, Address::times_8,  8));
++      __ movq(Address(dest, qword_count, Address::times_8,  8), to);
++      __ movq(to, Address(from, qword_count, Address::times_8,  0));
++      __ movq(Address(dest, qword_count, Address::times_8,  0), to);
++
++      __ BIND(L_copy_bytes);
++      __ subptr(qword_count, 4);
++      __ jcc(Assembler::greaterEqual, L_loop);
++    }
++    __ addptr(qword_count, 4);
++    __ jcc(Assembler::greater, L_copy_8_bytes); // Copy trailing qwords
++     * */
++  }
++
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  address generate_disjoint_byte_copy(bool aligned, address* entry, const char * name) {SCOPEMARK_NAME(generate_disjoint_byte_copy, _masm)
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    
++    Register src = c_rarg0;
++    Register dst = c_rarg1;
++    Register count = c_rarg2;
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++
++    address start = __ pc();
++    
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    assert_clean_int(c_rarg2, rscratch3);    // Make sure 'count' is clean int.
++        
++    if (entry != NULL) {
++      *entry = __ pc();
++       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++    
++    //__ movl(src, A0);
++    //__ movl(dst, A1);
++    //__ movl(count, A2);
++
++    Label l_align_dst, l_align_src, l_tail_bytes, l_end, l_tail;
++
++    if(UseSimdForward){
++      __ beq_l(count, l_end);
++
++      __ cmple(count, 63, tmp1);
++      __ bne_l(tmp1, l_tail_bytes);            //when count <= 63, don't use simd
++
++      __ BIND(l_align_dst);
++      __ and_ins(dst, 31, tmp1);         //is dst 0mod32?
++      __ beq_l(tmp1, l_align_src);
++
++      __ ldbu(src, 0, tmp1);          //grab 1 byte at a time, until dst is 0mod32
++      __ stb(tmp1, 0, dst);
++      __ subl(count, 1, count);
++      __ addl(dst, 1, dst);
++      __ addl(src, 1, src);
++      __ beq_l(R0, l_align_dst);
++
++      __ BIND(l_align_src);
++      copy_core_forward(32, src, dst, count, tmp1, tmp2);
++
++      __ BIND(l_tail);
++      __ ble_l(count, l_end);
++
++      //copy tail bytes.
++      __ BIND(l_tail_bytes);
++      __ ldbu(src, 0, tmp1);
++      __ stb(tmp1, 0, dst);
++      __ addl(src, 1, src);
++      __ addl(dst, 1, dst);
++      __ subl(count, 1, count);
++      __ bne_l(count, l_tail_bytes);
++
++      __ BIND(l_end);
++
++    }else{
++    generate_disjoint_copy(0, src, dst, count);
++    }
++
++    inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter
++    __ movl(V0, R0); // return 0
++    __ leave();
++    __ ret();
++
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_byte_copy(bool aligned, address nooverlap_target,
++                                      address* entry, const char *name) {SCOPEMARK_NAME(generate_conjoint_byte_copy, _masm);
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++//__ stop("generate_conjoint_byte_copy");
++    Label l_exit;
++    Label l_copy_byte;
++    Label l_align_dst, l_align_src, l_tail_bytes, l_end, l_tail;
++
++    /*address nooverlap_target = aligned ?
++      StubRoutines::arrayof_jbyte_disjoint_arraycopy() :
++      StubRoutines::jbyte_disjoint_arraycopy();*/
++
++    Register src      = c_rarg0;   // source array address
++    Register dst        = c_rarg1;   // destination array address
++    Register count     = c_rarg2;   // elements count
++    Register end_src  = src;   // source array end address
++    Register end_dst    = dst;   // destination array end address
++    Register end_count = count;   // destination array end address
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    assert_clean_int(c_rarg2, rscratch3);    // Make sure 'count' is clean int.
++    
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++    
++    array_overlap_test(nooverlap_target, Address::times_1);
++    // copy from high to low
++    //__ movl(end_count, count);
++    __ addl(src, end_count, end_src);
++    __ addl(dst, end_count, end_dst);
++    
++    if (UseSimdBackward) {
++
++      __ beq_l(count, l_end);
++
++      __ cmple(count, 63, tmp1);
++      __ bne_l(tmp1, l_tail_bytes);            //when count <= 63, don't use simd
++
++      __ BIND(l_align_dst);
++      __ and_ins(end_dst, 31, tmp1);         //is dst 0mod32?
++      __ beq_l(tmp1, l_align_src);
++
++      __ ldbu(end_src, -1, tmp2); //grab 1 bytes at a time, until dst is 0mod32
++      __ stb(tmp2, -1, end_dst);
++      __ subl(count, 1, count);
++      __ subl(end_dst, 1, end_dst);
++      __ subl(end_src, 1, end_src);
++      __ beq_l(R0, l_align_dst);
++
++      __ BIND(l_align_src);
++      copy_core_backward(32, end_src, end_dst, count, tmp1, tmp2);
++
++      __ BIND(l_tail);
++      __ ble_l(count, l_end);
++
++      __ BIND(l_tail_bytes);
++      __ ldbu(end_src, -1, tmp1);
++      __ stb(tmp1, -1, end_dst);
++      __ subl(end_src, 1, end_src);
++      __ subl(end_dst, 1, end_dst);
++      __ subl(count, 1, count);
++      __ bne_l(count, l_tail_bytes);
++
++      __ BIND(l_end);
++
++    } else {
++      generate_conjoint_copy(0, end_src, end_dst, end_count);
++    }
++
++     inc_counter_np(SharedRuntime::_jbyte_array_copy_ctr); // Update counter
++    __ movl(V0, R0); // return 0
++    __ leave();
++    __ ret();
++    return start;
++  }
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_short_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_short_copy().
++  //
++  address generate_disjoint_short_copy(bool aligned, address *entry, const char * name) {SCOPEMARK_NAME(generate_disjoint_short_copy, _masm)
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++
++    Register src = T0;
++    Register dst = T1;
++    Register count = T3;
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++    
++    Register tmp4 = T11;
++    Register tmp5 = T12;
++    Register tmp6 = T2;
++
++    address start = __ pc();
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    assert_clean_int(c_rarg2, V0);    // Make sure 'count' is clean int.
++
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++    
++    __ movl(src, A0);
++    __ movl(dst, A1);
++    __ movl(count, A2);
++
++    Label l_1, l_2, l_3, l_4, l_5, l_6, l_7, l_8, l_9, l_10, l_11, l_12, l_13, l_14;
++    Label l_debug;
++    Label l_align_dst, l_align_src, l_tail_2_bytes, l_end, l_tail;
++    // don't try anything fancy if arrays don't have many elements
++
++    if(UseSimdForward){
++      __ cmple(count, 31, tmp1); //if count < 32(bytes < 64), then copy 2 bytes at a time
++      __ bne_l(tmp1, l_tail);
++
++      __ BIND(l_align_dst);
++      __ and_ins(dst, 31, tmp1);
++      __ beq_l(tmp1, l_align_src);
++
++      __ ldhu(src, 0, tmp2);
++      __ subl(count, 1, count);
++      __ sth(tmp2, 0, dst);
++      __ addl(src, 2, src);
++      __ addl(dst, 2, dst);
++      __ beq_l(R0, l_align_dst);
++
++      __ BIND(l_align_src);
++      copy_core_forward(16, src, dst, count, tmp1, tmp2);
++
++      __ BIND(l_tail);
++      __ ble_l(count, l_end);
++
++      __ BIND(l_tail_2_bytes);
++      __ ldhu(src, 0, tmp1);
++      __ sth(tmp1, 0, dst);
++      __ addl(src, 2, src);
++      __ addl(dst, 2, dst);
++      __ subl(count, 1, count);
++      __ bne_l(count, l_tail_2_bytes);
++
++
++      __ BIND(l_end);
++
++    } else {
++      __ slll(count, 1, count);
++      generate_disjoint_copy(1, src,  dst,  count);
++    }
++    
++    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter 
++    __ movl(V0, R0); // return 0
++    __ leave();
++    __ ret();
++
++    __ bind(l_debug);
++    __ stop("generate_disjoint_short_copy should not reach here");
++    return start;
++  }
++
++  
++  address generate_fill(BasicType t, bool aligned, const char *name) {SCOPEMARK_NAME(generate_fill, _masm)
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    __ block_comment("Entry:");
++
++    const Register to       = c_rarg0;  // source array address
++    const Register value    = c_rarg1;  // value
++    const Register count    = c_rarg2;  // elements count
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ generate_fill(t, aligned, to, value, count, FSR);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret();
++    return start;
++  }
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++  // let the hardware handle it.  The two or four words within dwords
++  // or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  address generate_conjoint_short_copy(bool aligned, address nooverlap_target,
++                                      address* entry, const char *name) {SCOPEMARK_NAME(generate_conjoint_short_copy, _masm)
++    Label l_tail_2_bytes, l_align_dst, l_align_src, l_tail, l_end, l_exit, l_copy_2_bytes;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    Register end_src = T3;
++    Register end_dst = T0;
++    Register count = T1;
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    assert_clean_int(c_rarg2, V0);    // Make sure 'count' is clean int.
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++    
++    array_overlap_test(nooverlap_target, Address::times_2);
++    __ movl(end_src, A0);
++    __ movl(end_dst, A1);
++    __ movl(count, A2);
++
++    if(UseSimdBackward){
++      __ beq_l(count, l_end);
++
++      __ sll(T1, Address::times_2, tmp1);
++      __ addl(T3, tmp1, end_src);
++      __ addl(T0, tmp1, end_dst);
++
++      __ cmple(count, 31, tmp1);
++      __ bne_l(tmp1, l_tail_2_bytes);            //when count <= 31, don't use simd
++
++      __ BIND(l_align_dst);
++      __ and_ins(end_dst, 31, tmp1);         //is dst 0mod32?
++      __ beq_l(tmp1, l_align_src);
++
++      __ ldhu(end_src, -2, tmp2); //grab 2 bytes at a time, until dst is 0mod32
++      __ sth(tmp2, -2, end_dst);
++      __ subl(count, 1, count);
++      __ subl(end_dst, 2, end_dst);
++      __ subl(end_src, 2, end_src);
++      __ beq_l(R0, l_align_dst);
++
++      __ BIND(l_align_src);
++      copy_core_backward(16, end_src, end_dst, count, tmp1, tmp2);
++
++      __ BIND(l_tail);
++      __ ble_l(count, l_end);
++
++      __ BIND(l_tail_2_bytes);
++      __ ldhu(end_src, -2, tmp1);
++      __ sth(tmp1, -2, end_dst);
++      __ subl(end_src, 2, end_src);
++      __ subl(end_dst, 2, end_dst);
++      __ subl(count, 1, count);
++      __ bne_l(count, l_tail_2_bytes);
++
++      __ BIND(l_end);
++
++    }else{
++      __ slll(count, 1, count);
++      __ addl(T3, count, end_src);
++      __ addl(T0, count, end_dst);
++      generate_conjoint_copy(1, end_src, end_dst, count);
++    }
++    
++    inc_counter_np(SharedRuntime::_jshort_array_copy_ctr); // Update counter
++    __ movl(V0, R0); // return 0
++    __ leave();
++    __ ret();
++    return start;
++  }
++
++    // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  // Side Effects:
++  //   disjoint_int_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_int_oop_copy().
++  //
++  address generate_disjoint_int_oop_copy(bool aligned, bool is_oop, address* entry, const char *name, bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_disjoint_int_oop_copy, _masm)
++    Label l_tail_4_bytes, l_align_dst, l_align_src, l_align_simd, l_misalign, l_misalign_simd, l_tail, l_before_tail, l_end;
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    Register src = T3;
++    Register dst = T0;
++    Register count = T1;
++    Register dword_count = T4;
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    assert_clean_int(c_rarg2, V0);    // Make sure 'count' is clean int.
++    
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++    
++    __ movl(src, A0);
++    __ movl(dst, A1);
++    __ movl(count, A2);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BasicType type = is_oop ? T_OBJECT : T_INT;
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, type, src, dst, count);
++    
++    __ movl(dword_count, count);
++
++      if(UseSimdForward){
++        __ cmple(count, 15, tmp1);
++        __ bne_l(tmp1, l_tail);
++
++        __ BIND(l_align_dst);
++        __ and_ins(dst, 31, tmp1);
++        __ beq_l(tmp1, l_align_src);
++
++        __ ldw(src, 0, tmp1);
++        __ subl(count, 1, count);
++        __ stw(tmp1, 0, dst);
++        __ addl(src, 4, src);
++        __ addl(dst, 4, dst);
++        __ beq_l(R0, l_align_dst);
++
++        __ BIND(l_align_src);
++        copy_core_forward(8, src, dst, count, tmp1, tmp2);
++
++        __ BIND(l_tail);
++        __ ble_l(count, l_end);
++
++        __ BIND(l_tail_4_bytes);
++        __ ldw(src, 0, tmp2);
++        __ stw(tmp2, 0, dst);
++        __ addl(src, 4, src);
++        __ addl(dst, 4, dst);
++        __ subl(count, 1, count);
++        __ bne_l(count, l_tail_4_bytes);
++
++
++        __ BIND(l_end);
++
++      } else {
++        __ slll(count, 2, count);
++        generate_disjoint_copy(2, src,  dst,  count);
++    }
++
++    bs->arraycopy_epilogue(_masm, decorators, type, src, dst, dword_count);
++    inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter
++    __ movl(V0, R0);
++    __ leave();
++    __ ret();
++    return start;
++  }
++
++   // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_int_oop_copy(bool aligned, bool is_oop, address nooverlap_target,
++                                      address* entry, const char *name, bool dest_uninitialized = false) {
++    Label l_2, l_4;
++    Label l_tail_4_bytes, l_align_dst, l_align_src, l_tail, l_end;
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    Register from = c_rarg0;
++    Register to   = c_rarg1;
++    Register end_src = T3;
++    Register end_dst = T0;
++    Register count = T1;
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++    Register dword_count = T4;
++    
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    if (entry != NULL) {
++      *entry = __ pc();
++       // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++
++    array_overlap_test(nooverlap_target, Address::times_4);
++    
++    __ movl(count, A2);
++    __ movl(end_src, A0);
++    __ movl(end_dst, A1);
++    
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BasicType type = is_oop ? T_OBJECT : T_INT;
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    // no registers are destroyed by this call
++    bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
++    
++    assert_clean_int(count, V0); // Make sure 'count' is clean int.
++    __ movl(dword_count, count);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++
++     if(UseSimdBackward){
++       __ beq_l(count, l_end);
++
++       __ sll(T1, Address::times_4, tmp1);
++       __ addl(T3, tmp1, end_src);
++       __ addl(T0, tmp1, end_dst);
++
++       __ cmple(count, 15, tmp1);
++       __ bne_l(tmp1, l_tail_4_bytes);            //when count <= 15, don't use simd
++
++       __ BIND(l_align_dst);
++       __ and_ins(end_dst, 31, tmp1);         //is dst 0mod32?
++       __ beq_l(tmp1, l_align_src);
++
++       __ ldw(end_src, -4, tmp1);          //grab 4 bytes at a time, until dst is 0mod32
++       __ stw(tmp1, -4, end_dst);
++       __ subl(count, 1, count);
++       __ subl(end_dst, 4, end_dst);
++       __ subl(end_src, 4, end_src);
++       __ beq_l(R0, l_align_dst);   // todo zl check?
++
++       __ BIND(l_align_src);
++       copy_core_backward(8, end_src, end_dst, count, tmp1, tmp2);
++
++       __ BIND(l_tail);
++       __ ble_l(count, l_end);
++
++       __ BIND(l_tail_4_bytes);
++       __ ldw(end_src, -4, tmp1);
++       __ stw(tmp1, -4, end_dst);
++       __ subl(end_src, 4, end_src);
++       __ subl(end_dst, 4, end_dst);
++       __ subl(count, 1, count);
++       __ bne_l(count, l_tail_4_bytes);
++
++       __ BIND(l_end);
++
++     }else{
++        __ slll(count, 2, count);
++        __ addl(end_src, count, end_src);
++        __ addl(end_dst, count, end_dst);
++        generate_conjoint_copy(2, end_src,  end_dst,  count);
++    }
++    
++    bs->arraycopy_epilogue(_masm, decorators, type, from, to, dword_count);
++    inc_counter_np(SharedRuntime::_jint_array_copy_ctr); // Update counter after rscratch1 is free
++    __ movl(V0, R0); // return 0
++    
++    __ leave();
++    __ ret();
++    return start;
++  }
++
++
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // Side Effects:
++  //   disjoint_oop_copy_entry or disjoint_long_copy_entry is set to the
++  //   no-overlap entry point used by generate_conjoint_long_oop_copy().
++  //
++  address generate_disjoint_long_oop_copy(bool aligned, bool is_oop, address* entry, const char *name, bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_disjoint_long_oop_copy, _masm)
++    Label l_3, l_4;
++    Label l_tail_8_bytes, l_align_dst, l_align_src, l_tail, l_end;
++
++    Register src = c_rarg0;
++    Register dst = c_rarg1;
++    Register count = c_rarg2;
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++    //Register saved_count = T11;//special, relate to arraycopy_prologue TODO:refactor, maybe put saved_count as parameter? jzy
++    
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++//__ stop("generate_disjoint_long_oop_copy");
++    //__ movl(src, A0);
++    //__ movl(dst, A1);
++    //__ movl(count, A2);
++    
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    // Save no-overlap entry point for generate_conjoint_long_oop_copy()
++    assert_clean_int(c_rarg2, rscratch3);    // Make sure 'count' is clean int.
++
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BasicType type = is_oop ? T_OBJECT : T_LONG;
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, type, src, dst, count);
++
++    // T3: source array address
++    // T0: destination array address
++    // T1: element count
++    //TODO:refact jzy saved_count(T11) should not changed before arraycopy_epilogue, because count is saved in arraycopy_prologue
++    if(UseSimdForward){
++      __ align(16);
++      __ beq_l(count, l_end);
++
++      __ cmple(count, 7, tmp1);
++      __ bne_l(tmp1, l_tail_8_bytes);            //when count <= 7, don't use simd
++
++      __ BIND(l_align_dst);
++      __ and_ins(dst, 31, tmp1);         //is dst 0mod32?
++      __ beq_l(tmp1, l_align_src);
++
++      __ ldl(src, 0, tmp1);          //grab 8 bytes at a time, until dst is 0mod32
++      __ stl(tmp1, 0, dst);
++      __ subl(count, 1, count);
++      __ addl(dst, 8, dst);
++      __ addl(src, 8, src);
++      __ beq_l(R0, l_align_dst); //todo zl check?
++
++      __ BIND(l_align_src);
++      copy_core_forward(4, src, dst, count, tmp1, tmp2);
++
++      __ BIND(l_tail);
++      __ ble_l(count, l_end);
++
++      __ BIND(l_tail_8_bytes);
++      __ ldl(src, 0, tmp1);
++      __ stl(tmp1, 0, dst);
++      __ addl(src, 8, src);
++      __ addl(dst, 8, dst);
++      __ subl(count, 1, count);
++      __ bne_l(count, l_tail_8_bytes);
++
++      __ BIND(l_end);
++
++    }else{
++        __ slll(count, 3, count);
++        generate_disjoint_copy(3, src,  dst,  count);
++    }
++    
++    bs->arraycopy_epilogue(_masm, decorators, type, src, dst, count);
++    if (is_oop) {
++      inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
++    } else {
++      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
++    }
++    
++    __ movl(V0, R0); //return 0
++    __ leave();
++    __ ret();
++
++    return start;
++  }
++
++   // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord boundary == 8 bytes
++  //             ignored
++  //   is_oop  - true => oop array, so generate store check code
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++  // the hardware handle it.  The two dwords within qwords that span
++  // cache line boundaries will still be loaded and stored atomicly.
++  //
++  address generate_conjoint_long_oop_copy(bool aligned, bool is_oop,
++                                          address nooverlap_target, address *entry,
++                                          const char *name, bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_conjoint_long_oop_copy, _masm)
++    
++    Label l_1, l_2;
++    Label l_tail_8_bytes, l_align_dst, l_align_src, l_tail, l_end;
++    
++    StubCodeMark mark(this, "StubRoutines", name);
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++    Register end_src = c_rarg0;
++    Register end_dst = c_rarg1;
++    Register count = c_rarg2;
++    Register tmp1 = rscratch1;
++    Register tmp2 = rscratch2;
++
++    
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++    assert_clean_int(c_rarg2, rscratch3);    // Make sure 'count' is clean int.
++
++    if (entry != NULL) {
++      *entry = __ pc();
++      // caller can pass a 64-bit byte count here (from Unsafe.copyMemory)
++      __ block_comment("Entry:");
++    }
++
++    array_overlap_test(nooverlap_target, Address::times_8);
++    //__ movl(end_src, A0);
++    //__ movl(end_dst, A1);
++    //__ movl(count, A2);
++    
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++    if (aligned) {
++      decorators |= ARRAYCOPY_ALIGNED;
++    }
++
++    BasicType type = is_oop ? T_OBJECT : T_LONG;
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, type, end_src, end_dst, count);
++
++     if(UseSimdBackward){
++       __ align(16);
++       __ beq_l(count, l_end);
++
++       __ sll(T1, Address::times_8, tmp1);
++       __ addl(T3, tmp1, end_src);
++       __ addl(T0, tmp1, end_dst);
++
++       __ cmple(count, 7, tmp1);
++       __ bne_l(tmp1, l_tail_8_bytes);            //when count <= 7, don't use simd
++
++       __ BIND(l_align_dst);
++       __ and_ins(end_dst, 31, tmp1);         //is dst 0mod32?
++       __ beq_l(tmp1, l_align_src);
++
++       __ ldl(end_src, -8, tmp1);          //grab 8 bytes at a time, until dst is 0mod32
++       __ stl(tmp1, -8, end_dst);
++       __ subl(count, 1, count);
++       __ subl(end_dst, 8, end_dst);
++       __ subl(end_src, 8, end_src);
++       __ ble_l(count, l_end);
++       __ beq_l(R0, l_align_dst);
++
++       __ BIND(l_align_src);
++       copy_core_backward(4, end_src, end_dst, count, tmp1, tmp2);
++
++       __ BIND(l_tail);
++       __ ble_l(count, l_end);
++
++       __ BIND(l_tail_8_bytes);
++       __ ldl(end_src, -8, tmp1);
++       __ stl(tmp1, -8, end_dst);
++       __ subl(end_src, 8, end_src);
++       __ subl(end_dst, 8, end_dst);
++       __ subl(count, 1, count);
++       __ bne_l(count,l_tail_8_bytes);
++
++       __ BIND(l_end);
++
++     }else{
++      __ slll(count, Address::times_8, count);
++      __ addl(end_src, count, end_src);
++      __ addl(end_dst, count, end_dst);
++      generate_conjoint_copy(3, end_src, end_dst, count);
++    }
++    
++    bs->arraycopy_epilogue(_masm, decorators, type, end_src, end_dst, count);
++    if (is_oop) {
++      inc_counter_np(SharedRuntime::_oop_array_copy_ctr); // Update counter after rscratch1 is free
++    } else {
++      inc_counter_np(SharedRuntime::_jlong_array_copy_ctr); // Update counter after rscratch1 is free
++    }
++    
++    __ movl(V0, R0); //return 0
++    __ leave();
++    __ ret();
++    return start;
++  }
++
++ 
++  // Helper for generating a dynamic type check.
++  // Smashes no registers.
++  void generate_type_check(Register sub_klass,
++                           Register super_check_offset,
++                           Register super_klass,
++                           Label& L_success, Register temp_reg = noreg, Register temp2_reg = noreg) {
++    assert_different_registers(sub_klass, super_check_offset, super_klass);
++
++    __ block_comment("type_check:");
++    Label L_miss;
++
++    __ check_klass_subtype_fast_path(sub_klass, super_klass, temp_reg,        &L_success, &L_miss, NULL,
++                                     super_check_offset);
++    __ check_klass_subtype_slow_path(sub_klass, super_klass, temp_reg, temp2_reg, &L_success, NULL);
++
++    // Fall through on failure!
++    __ bind(L_miss);
++  }
++
++  //
++  //  Generate checkcasting array copy stub
++  //
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - element count, treated as ssize_t, can be zero
++  //    c_rarg3   - size_t ckoff (super_check_offset)
++  // not Win64
++  //    c_rarg4   - oop ckval (super_klass)
++  // Win64
++  //    rsp+40    - oop ckval (super_klass)
++  //
++  //  Output:
++  //    rax ==  0  -  success
++  //    rax == -1^K - failure, where K is partial transfer count
++  //
++  address generate_checkcast_copy(const char *name, address *entry,
++                                  bool dest_uninitialized = false) {SCOPEMARK_NAME(generate_checkcast_copy, _masm)
++
++    Label L_load_element, L_store_element, L_do_card_marks, L_done;
++
++    // Input registers (after setup_arg_regs)
++    const Register from        = c_rarg0;   // source array address
++    const Register to          = c_rarg1;   // destination array address
++    const Register length      = c_rarg2;   // elements count
++    const Register ckoff       = c_rarg3;   // super_check_offset
++    const Register ckval       = c_rarg4;    // super_klass
++
++    // Registers used as temps (r13, r14 are save-on-entry)
++    const Register end_from    = from;  // source array end address
++    const Register end_to      = r13;   // destination array end address
++    const Register count       = c_rarg2;   // -(count_remaining)
++    const Register r14_length  = r14;   // saved copy of length
++    // End pointers are inclusive, and if length is not zero they point
++    // to the last unit copied:  end_to[0] := end_from[0]
++    
++//    const Register rax        = V0;
++//    const Register r13        = end_to;
++//    const Register r14        = r14_length;
++    //const Register rcx        = ckoff; 
++    const Register rax_oop    = V0;    // actual oop copied
++    const Register r11_klass  = T4;    // oop._klass
++
++    //---------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the two arrays are subtypes of Object[] but the
++    // destination array type is not equal to or a supertype
++    // of the source type.  Each element must be separately
++    // checked.
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifdef ASSERT
++    // caller guarantees that the arrays really are different
++    // otherwise, we would have to make conjoint checks
++    { Label L;
++      array_overlap_test(L, TIMES_OOP);
++      __ stop("checkcast_copy within a single array");
++      __ bind(L);
++    }
++#endif //ASSERT
++
++    setup_arg_regs(4); // from => rdi, to => rsi, length => rdx
++                       // ckoff => rcx, ckval => r8
++                       // r9 and r10 may be used to save non-volatile registers
++
++    // Caller of this entry point must set up the argument registers.
++    if (entry != NULL) {
++      *entry = __ pc();
++      __ block_comment("Entry:");
++    }
++
++    // allocate spill slots for r13, r14
++    enum {
++      saved_r13_offset,
++      saved_r14_offset,
++      saved_rbp_offset
++    };
++    __ subptr(rsp, saved_rbp_offset * wordSize,rsp);
++    __ stl(r13, Address(rsp, saved_r13_offset * wordSize));
++    __ stl(r14, Address(rsp, saved_r14_offset * wordSize));
++
++    // check that int operands are properly extended to size_t
++    assert_clean_int(length, rax);
++    assert_clean_int(ckoff, rax);
++
++#ifdef ASSERT
++    __ block_comment("assert consistent ckoff/ckval");
++    // The ckoff and ckval must be mutually consistent,
++    // even though caller generates both.
++    { Label L;
++      int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ cmpw(ckoff, Address(ckval, sco_offset));
++      __ jcc(Assembler::equal, L);
++      __ stop("super_check_offset inconsistent");
++      __ bind(L);
++    }
++#endif //ASSERT
++
++    // Loop-invariant addresses.  They are exclusive end pointers.
++    Address end_from_addr(from, length, TIMES_OOP, 0);
++    Address   end_to_addr(to,   length, TIMES_OOP, 0);
++    // Loop-variant addresses.  They assume post-incremented count < 0.
++    Address from_element_addr(end_from, count, TIMES_OOP, 0);
++    Address   to_element_addr(end_to,   count, TIMES_OOP, 0);
++
++    DecoratorSet decorators = IN_HEAP | IS_ARRAY | ARRAYCOPY_CHECKCAST | ARRAYCOPY_DISJOINT;
++    if (dest_uninitialized) {
++      decorators |= IS_DEST_UNINITIALIZED;
++    }
++
++    BasicType type = T_OBJECT;
++    BarrierSetAssembler *bs = BarrierSet::barrier_set()->barrier_set_assembler();
++    bs->arraycopy_prologue(_masm, decorators, type, from, to, count);
++
++    // Copy from low to high addresses, indexed from the end of each array.
++    __ lea(end_from, end_from_addr);
++    __ lea(end_to,   end_to_addr);
++    __ movl(r14_length, length);        // save a copy of the length
++    assert(length == count, "");          // else fix next line:
++    __ subptr(R0, count, count);                     // negate and test the length
++    __ jcc(Assembler::notZero, L_load_element, count);
++
++    // Empty array:  Nothing to do.
++    __ movl(rax, R0);                  // return 0 on (trivial) success
++    __ jmp(L_done);
++
++    // ======== begin loop ========
++    // (Loop is rotated; its entry is L_load_element.)
++    // Loop control:
++    //   for (count = -count; count != 0; count++)
++    // Base pointers src, dst are biased by 8*(count-1),to last element.
++    __ align(OptoLoopAlignment);
++
++    __ BIND(L_store_element);
++    __ store_heap_oop(to_element_addr, rax_oop, noreg, noreg, AS_RAW);  // store the oop
++    __ increment(count);               // increment the count toward zero
++    __ jcc(Assembler::zero, L_do_card_marks, count);
++
++    // ======== loop entry is here ========
++    __ BIND(L_load_element);
++    __ load_heap_oop(rax_oop, from_element_addr, noreg, noreg, AS_RAW); // load the oop
++    __ testptr(rax_oop, rax_oop);
++    __ jcc(Assembler::zero, L_store_element);
++
++    __ load_klass(r11_klass, rax_oop);// query the object klass
++    //will kill rscratch1 rscratch2
++    generate_type_check(r11_klass, ckoff, ckval, L_store_element, rscratch1, rscratch2);
++    // ======== end loop ========
++
++    // It was a real error; we must depend on the caller to finish the job.
++    // Register rdx = -1 * number of *remaining* oops, r14 = *total* oops.
++    // Emit GC store barriers for the oops we have copied (r14 + rdx),
++    // and report their number to the caller.
++    assert_different_registers(rax, r14_length, count, to, end_to, rscratch1);
++    Label L_post_barrier;
++    __ addptr(r14_length, count, r14_length);     // K = (original - remaining) oops
++    __ movl(rax, r14_length);       // save the value
++    __ notptr(rax);                   // report (-1^K) to caller (does not affect flags)
++    __ jcc(Assembler::notZero, L_post_barrier);
++    __ jmp(L_done); // K == 0, nothing was copied, skip post barrier
++
++    // Come here on success only.
++    __ BIND(L_do_card_marks);
++    __ movl(rax, R0);              // return 0 on success
++
++    __ BIND(L_post_barrier);
++    bs->arraycopy_epilogue(_masm, decorators, type, from, to, r14_length);
++
++    // Common exit point (success or failure).
++    __ BIND(L_done);
++    __ ldptr(r13, Address(rsp, saved_r13_offset * wordSize));
++    __ ldptr(r14, Address(rsp, saved_r14_offset * wordSize));
++    restore_arg_regs();
++    inc_counter_np(SharedRuntime::_checkcast_array_copy_ctr); // Update counter after rscratch1 is free
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret_sw();
++
++    return start;
++  }
++
++  //
++  //  Generate 'unsafe' array copy stub
++  //  Though just as safe as the other stubs, it takes an unscaled
++  //  size_t argument instead of an element count.
++  //
++  //  Input:
++  //    c_rarg0   - source array address
++  //    c_rarg1   - destination array address
++  //    c_rarg2   - byte count, treated as ssize_t, can be zero
++  //
++  // Examines the alignment of the operands and dispatches
++  // to a long, int, short, or byte copy loop.
++  //
++  address generate_unsafe_copy(const char *name,
++                               address byte_copy_entry, address short_copy_entry,
++                               address int_copy_entry, address long_copy_entry) {
++    SCOPEMARK_NAME(generate_unsafe_copy, _masm)
++
++    Label L_long_aligned, L_int_aligned, L_short_aligned;
++    
++    // Input registers (before setup_arg_regs)
++    const Register from        = c_rarg0;  // source array address
++    const Register to          = c_rarg1;  // destination array address
++    const Register size        = c_rarg2;  // byte count (size_t)
++
++    // Register used as a temp
++    const Register bits        = V0;      // test copy of low bits
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_unsafe_array_copy_ctr);
++
++    __ movl(bits, from);
++//    __ xorptr(bits, to, bits);
++//    __ xorptr(bits, size, bits);
++    __ orptr(bits, to, bits);
++    __ orptr(bits, size, bits);
++
++    __ testb(bits, BytesPerLong-1);
++    __ jcc(Assembler::zero, L_long_aligned);
++
++    __ testb(bits, BytesPerInt-1);
++    __ jcc(Assembler::zero, L_int_aligned);
++
++    __ testb(bits, BytesPerShort-1);
++    __ jump_cc(Assembler::notZero, RuntimeAddress(byte_copy_entry));
++
++    __ BIND(L_short_aligned);
++    __ srll(size, LogBytesPerShort, size); // size => short_count
++    __ jump(RuntimeAddress(short_copy_entry));
++
++    __ BIND(L_int_aligned);
++    __ srll(size, LogBytesPerInt, size); // size => int_count
++    __ jump(RuntimeAddress(int_copy_entry));
++
++    __ BIND(L_long_aligned);
++    __ srll(size, LogBytesPerLong, size); // size => qword_count
++    __ jump(RuntimeAddress(long_copy_entry));
++
++    return start;
++  }
++
++  // Perform range checks on the proposed arraycopy.
++  // Kills temp, but nothing else.
++  // Also, clean the sign bits of src_pos and dst_pos.
++  void arraycopy_range_checks(Register src,     // source array oop (c_rarg0)
++                              Register src_pos, // source position (c_rarg1)
++                              Register dst,     // destination array oo (c_rarg2)
++                              Register dst_pos, // destination position (c_rarg3)
++                              Register length,
++                              Register temp,
++                              Label& L_failed) {
++    __ block_comment("arraycopy_range_checks:");
++
++    //  if (src_pos + length > arrayOop(src)->length())  FAIL;
++    __ movw(temp, length);
++    __ addw(temp, src_pos, temp);             // src_pos + length
++    __ cmpw(temp, Address(src, arrayOopDesc::length_offset_in_bytes()));
++    __ jcc(Assembler::above, L_failed);
++
++    //  if (dst_pos + length > arrayOop(dst)->length())  FAIL;
++    __ movw(temp, length);
++    __ addw(temp, dst_pos, temp);             // dst_pos + length
++    __ cmpw(temp, Address(dst, arrayOopDesc::length_offset_in_bytes()));
++    __ jcc(Assembler::above, L_failed);
++
++    // Have to clean up high 32-bits of 'src_pos' and 'dst_pos'.
++    // Move with sign extension can be used since they are positive.
++    __ movws(src_pos, src_pos);
++    __ movws(dst_pos, dst_pos);
++
++    __ block_comment("arraycopy_range_checks done");
++  }
++
++  //
++  //  Generate generic array copy stubs
++  //
++  //  Input:
++  //    c_rarg0    -  src oop
++  //    c_rarg1    -  src_pos (32-bits)
++  //    c_rarg2    -  dst oop
++  //    c_rarg3    -  dst_pos (32-bits)
++  // not Win64
++  //    c_rarg4    -  element count (32-bits)
++  // Win64
++  //    rsp+40     -  element count (32-bits)
++  //
++  //  Output:
++  //    rax ==  0  -  success
++  //    rax == -1^K - failure, where K is partial transfer count
++  //
++  address generate_generic_copy(const char *name,
++                                address byte_copy_entry, address short_copy_entry,
++                                address int_copy_entry, address oop_copy_entry,
++                                address long_copy_entry, address checkcast_copy_entry) {
++    SCOPEMARK_NAME(generate_generic_copy, _masm)
++
++    Label L_failed, L_failed_0, L_objArray;
++    Label L_copy_bytes, L_copy_shorts, L_copy_ints, L_copy_longs;
++
++    // Input registers
++    const Register src        = c_rarg0;  // source array oop
++    const Register src_pos    = c_rarg1;  // source position
++    const Register dst        = c_rarg2;  // destination array oop
++    const Register dst_pos    = c_rarg3;  // destination position
++    const Register length     = c_rarg4;
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Short-hop target to L_failed.  Makes for denser prologue code.
++    __ BIND(L_failed_0);
++    __ jmp(L_failed);
++
++    __ align(CodeEntryAlignment);
++    address start = __ pc();
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    // bump this on entry, not on exit:
++    inc_counter_np(SharedRuntime::_generic_array_copy_ctr);
++
++    //-----------------------------------------------------------------------
++    // Assembler stub will be used for this call to arraycopy
++    // if the following conditions are met:
++    //
++    // (1) src and dst must not be null.
++    // (2) src_pos must not be negative.
++    // (3) dst_pos must not be negative.
++    // (4) length  must not be negative.
++    // (5) src klass and dst klass should be the same and not NULL.
++    // (6) src and dst should be arrays.
++    // (7) src_pos + length must not exceed length of src.
++    // (8) dst_pos + length must not exceed length of dst.
++    //
++
++    //  if (src == NULL) return -1;
++    __ testptr(src, src);         // src oop
++    size_t j1off = __ offset();
++    __ jcc(Assembler::zero, L_failed_0, src);
++
++    //  if (src_pos < 0) return -1;
++    __ addw(src_pos, R0, rcc); // src_pos (32-bits)
++    __ jcc(Assembler::negative, L_failed_0, rcc);
++
++    //  if (dst == NULL) return -1;
++    __ testptr(dst, dst);         // dst oop
++    __ jcc(Assembler::zero, L_failed_0);
++
++    //  if (dst_pos < 0) return -1;
++    __ addw(dst_pos, R0, rcc); // dst_pos (32-bits)
++    size_t j4off = __ offset();
++    __ jcc(Assembler::negative, L_failed_0, rcc);
++
++    // The first four tests are very dense code,
++    // but not quite dense enough to put four
++    // jumps in a 16-byte instruction fetch buffer.
++    // That's good, because some branch predicters
++    // do not like jumps so close together.
++    // Make sure of this.
++    guarantee(((j1off ^ j4off) & ~15) != 0, "I$ line of 1st & 4th jumps"); //should sw need this? jzy
++
++    // registers used as temp
++    const Register r11_length    = T0; // elements count to copy
++    const Register r10_src_klass = T1; // array klass
++    const Register r10           = r10_src_klass;
++    const Register r11           = r11_length;
++    const Register rscratch      = rscratch1;
++    
++    //  if (length < 0) return -1;
++    __ movl(r11_length, length);        // length (elements count, 32-bits value)
++    __ jcc(Assembler::negative, L_failed_0, r11_length);
++
++    __ load_klass(r10_src_klass, src);
++#ifdef ASSERT
++    //  assert(src->klass() != NULL);
++    {
++      BLOCK_COMMENT("assert klasses not null {");
++      Label L1, L2;
++      __ testptr(r10_src_klass, r10_src_klass);
++      __ jcc(Assembler::notZero, L2);   // it is broken if klass is NULL
++      __ bind(L1);
++      __ stop("broken null klass");
++      __ bind(L2);
++      __ load_klass(rax, dst);
++      __ cmpl(rax, 0);
++      __ jcc(Assembler::equal, L1);     // this would be broken also
++      BLOCK_COMMENT("} assert klasses not null done");
++    }
++#endif
++
++    // Load layout helper (32-bits)
++    //
++    //  |array_tag|     | header_size | element_type |     |log2_element_size|
++    // 32        30    24            16              8     2                 0
++    //
++    //   array_tag: typeArray = 0x3, objArray = 0x2, non-array = 0x0
++    //
++
++    const int lh_offset = in_bytes(Klass::layout_helper_offset());
++
++    // Handle objArrays completely differently...
++    const jint objArray_lh = Klass::array_layout_helper(T_OBJECT);
++//    __ mov_immediate32(rscratch, objArray_lh);
++    __ cmpw(Address(r10_src_klass, lh_offset), objArray_lh);
++    __ jcc(Assembler::equal, L_objArray);
++
++    //  if (src->klass() != dst->klass()) return -1;
++    __ load_klass(rax, dst);
++//    __ cmpl(r10_src_klass, rax);
++//    __ jcc(Assembler::notEqual, L_failed);
++    __ cmpq(rax, r10_src_klass);
++    __ jcc(Assembler::notEqual, L_failed);
++//    __ bne_c(rax, r10_src_klass, L_failed); //todo zl check?
++    const Register rax_lh = rax;  // layout helper
++    __ ldws(rax_lh, Address(r10_src_klass, lh_offset));
++
++    //  if (!src->is_Array()) return -1;
++    __ cmpw(rax_lh, Klass::_lh_neutral_value);
++    __ jcc(Assembler::greaterEqual, L_failed);
++
++    // At this point, it is known to be a typeArray (array_tag 0x3).
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert primitive array {");
++      Label L;
++//      __ mov_immediate32(rscratch ,(Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
++      __ cmpw(rax_lh, (Klass::_lh_array_tag_type_value << Klass::_lh_array_tag_shift));
++      __ jcc(Assembler::greaterEqual, L);
++      __ stop("must be a primitive array");
++      __ bind(L);
++      BLOCK_COMMENT("} assert primitive array done");
++    }
++#endif
++
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
++                           r10, L_failed);
++
++    // TypeArrayKlass
++    //
++    // src_addr = (src + array_header_in_bytes()) + (src_pos << log2elemsize);
++    // dst_addr = (dst + array_header_in_bytes()) + (dst_pos << log2elemsize);
++    //
++
++    const Register r10_offset = r10;    // array offset
++    const Register rax_elsize = rax_lh; // element size
++
++    __ movw(r10_offset, rax_lh);
++    __ srll(r10_offset, Klass::_lh_header_size_shift, r10_offset);
++    __ andptr(r10_offset, Klass::_lh_header_size_mask, r10_offset);   // array_offset
++    __ addptr(src, r10_offset, src);           // src array offset
++    __ addptr(dst, r10_offset, dst);           // dst array offset
++    BLOCK_COMMENT("choose copy loop based on element size");
++    __ andw(rax_lh, Klass::_lh_log2_element_size_mask, rax_lh); // rax_lh -> rax_elsize
++
++    // next registers should be set before the jump to corresponding stub
++    const Register from     = c_rarg0;  // source array address
++    const Register to       = c_rarg1;  // destination array address
++    const Register count    = c_rarg2;  // elements count
++
++    // 'from', 'to', 'count' registers should be set in such order
++    // since they are the same as 'src', 'src_pos', 'dst'.
++
++    __ BIND(L_copy_bytes);
++    __ cmpw(rax_elsize, 0);
++    __ jcc(Assembler::notEqual, L_copy_shorts);
++    __ lea(from, Address(src, src_pos, Address::times_1, 0));// src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_1, 0));// dst_addr
++    __ movws(count, r11_length); // length
++    __ jump(RuntimeAddress(byte_copy_entry));
++
++    __ BIND(L_copy_shorts);
++    __ cmpw(rax_elsize, LogBytesPerShort);
++    __ jcc(Assembler::notEqual, L_copy_ints);
++    __ lea(from, Address(src, src_pos, Address::times_2, 0));// src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_2, 0));// dst_addr
++    __ movws(count, r11_length); // length
++    __ jump(RuntimeAddress(short_copy_entry));
++
++    __ BIND(L_copy_ints);
++    __ cmpw(rax_elsize, LogBytesPerInt);
++    __ jcc(Assembler::notEqual, L_copy_longs);
++    __ lea(from, Address(src, src_pos, Address::times_4, 0));// src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_4, 0));// dst_addr
++    __ movws(count, r11_length); // length
++    __ jump(RuntimeAddress(int_copy_entry));
++
++    __ BIND(L_copy_longs);
++#ifdef ASSERT
++    {
++      BLOCK_COMMENT("assert long copy {");
++      Label L;
++      __ cmpw(rax_elsize, LogBytesPerLong);
++      __ jcc(Assembler::equal, L);
++      __ stop("must be long copy, but elsize is wrong");
++      __ bind(L);
++      BLOCK_COMMENT("} assert long copy done");
++    }
++#endif
++    __ lea(from, Address(src, src_pos, Address::times_8, 0));// src_addr
++    __ lea(to,   Address(dst, dst_pos, Address::times_8, 0));// dst_addr
++    __ movws(count, r11_length); // length
++    __ jump(RuntimeAddress(long_copy_entry));
++
++    // ObjArrayKlass
++    __ BIND(L_objArray);
++    // live at this point:  r10_src_klass, r11_length, src[_pos], dst[_pos]
++
++    Label L_plain_copy, L_checkcast_copy;
++    //  test array classes for subtyping
++    __ load_klass(rax, dst);
++//    __ cmpl(r10_src_klass, rax); // usual case is exact equality
++//    __ jcc(Assembler::notEqual, L_checkcast_copy);
++//    __ bne_c(r10_src_klass, rax, L_checkcast_copy);
++    __ cmpq(rax, r10_src_klass);
++    __ jcc(Assembler::notEqual, L_checkcast_copy);
++
++    // Identically typed arrays can be copied without element-wise checks.
++    arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
++                           r10, L_failed);
++
++    __ lea(from, Address(src, src_pos, TIMES_OOP,
++                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // src_addr
++    __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
++                 arrayOopDesc::base_offset_in_bytes(T_OBJECT))); // dst_addr
++    __ movws(count, r11_length); // length
++    __ BIND(L_plain_copy);
++    __ jump(RuntimeAddress(oop_copy_entry));
++
++    __ BIND(L_checkcast_copy);
++    // live at this point:  r10_src_klass, r11_length, rax (dst_klass)
++    {
++      // Before looking at dst.length, make sure dst is also an objArray.
++      __ mov_immediate32(rscratch, objArray_lh);
++      __ cmpw(Address(rax, lh_offset), rscratch);
++      __ jcc(Assembler::notEqual, L_failed);
++
++      // It is safe to examine both src.length and dst.length.
++      arraycopy_range_checks(src, src_pos, dst, dst_pos, r11_length,
++                             rax, L_failed);
++
++      const Register r11_dst_klass = r11;
++      __ load_klass(r11_dst_klass, dst); // reload
++
++      // Marshal the base address arguments now, freeing registers.
++      __ lea(from, Address(src, src_pos, TIMES_OOP,
++                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
++      __ lea(to,   Address(dst, dst_pos, TIMES_OOP,
++                   arrayOopDesc::base_offset_in_bytes(T_OBJECT)));
++      __ movw(count, length);           // length (reloaded)
++      Register sco_temp = c_rarg3;      // this register is free now
++      assert_different_registers(from, to, count, sco_temp,
++                                 r11_dst_klass, r10_src_klass);
++      assert_clean_int(count, sco_temp);
++
++      // Generate the type check.
++      const int sco_offset = in_bytes(Klass::super_check_offset_offset());
++      __ ldws(sco_temp, Address(r11_dst_klass, sco_offset));
++      assert_clean_int(sco_temp, rax);
++      //will kill rscratch1 rscratch2
++      generate_type_check(r10_src_klass, sco_temp, r11_dst_klass, L_plain_copy, rscratch1, rscratch2);
++
++      // Fetch destination element klass from the ObjArrayKlass header.
++      int ek_offset = in_bytes(ObjArrayKlass::element_klass_offset());
++      __ ldptr(r11_dst_klass, Address(r11_dst_klass, ek_offset));
++      __ ldws(  sco_temp,      Address(r11_dst_klass, sco_offset));
++      assert_clean_int(sco_temp, rax);
++
++      // the checkcast_copy loop needs two extra arguments:
++      assert(c_rarg3 == sco_temp, "#3 already in place");
++      // Set up arguments for checkcast_copy_entry.
++      setup_arg_regs(4);
++      __ movl(c_rarg4, r11_dst_klass);  // dst.klass.element_klass, r8 is c_rarg4 on Linux/Solaris
++      __ jump(RuntimeAddress(checkcast_copy_entry));
++    }
++
++    __ BIND(L_failed);
++    __ ldi(rax, -1, R0);// return -1
++    __ leave();   // required for proper stackwalking of RuntimeStub frame
++    __ ret_sw();
++
++    return start;
++  }
++
++  void generate_arraycopy_stubs() {
++    address entry;
++    address entry_jbyte_arraycopy;
++    address entry_jshort_arraycopy;
++    address entry_jint_arraycopy;
++    address entry_oop_arraycopy;
++    address entry_jlong_arraycopy;
++    address entry_checkcast_arraycopy;
++//TODO:jzy fast path to arraycopy
++    StubRoutines::_jbyte_disjoint_arraycopy  = generate_disjoint_byte_copy(false, &entry,
++                                                                           "jbyte_disjoint_arraycopy");
++    StubRoutines::_jbyte_arraycopy           = generate_conjoint_byte_copy(false, entry, &entry_jbyte_arraycopy,
++                                                                           "jbyte_arraycopy");
++
++    StubRoutines::_jshort_disjoint_arraycopy = generate_disjoint_short_copy(false, &entry,
++                                                                            "jshort_disjoint_arraycopy");
++    StubRoutines::_jshort_arraycopy          = generate_conjoint_short_copy(false, entry, &entry_jshort_arraycopy,
++                                                                            "jshort_arraycopy");
++
++    StubRoutines::_jint_disjoint_arraycopy   = generate_disjoint_int_oop_copy(false, false, &entry,
++                                                                              "jint_disjoint_arraycopy");
++    StubRoutines::_jint_arraycopy            = generate_conjoint_int_oop_copy(false, false, entry,
++                                                                              &entry_jint_arraycopy, "jint_arraycopy");
++
++    StubRoutines::_jlong_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, false, &entry,
++                                                                               "jlong_disjoint_arraycopy");
++    StubRoutines::_jlong_arraycopy           = generate_conjoint_long_oop_copy(false, false, entry,
++                                                                               &entry_jlong_arraycopy, "jlong_arraycopy");
++
++
++    if (UseCompressedOops) {
++      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_int_oop_copy(false, true, &entry,
++                                                                              "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy           = generate_conjoint_int_oop_copy(false, true, entry,
++                                                                              &entry_oop_arraycopy, "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_int_oop_copy(false, true, &entry,
++                                                                                     "oop_disjoint_arraycopy_uninit",
++          /*dest_uninitialized*/true);
++      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_int_oop_copy(false, true, entry,
++                                                                                     NULL, "oop_arraycopy_uninit",
++          /*dest_uninitialized*/true);
++    } else {
++      StubRoutines::_oop_disjoint_arraycopy  = generate_disjoint_long_oop_copy(false, true, &entry,
++                                                                               "oop_disjoint_arraycopy");
++      StubRoutines::_oop_arraycopy           = generate_conjoint_long_oop_copy(false, true, entry,
++                                                                               &entry_oop_arraycopy, "oop_arraycopy");
++      StubRoutines::_oop_disjoint_arraycopy_uninit  = generate_disjoint_long_oop_copy(false, true, &entry,
++                                                                                      "oop_disjoint_arraycopy_uninit",
++          /*dest_uninitialized*/true);
++      StubRoutines::_oop_arraycopy_uninit           = generate_conjoint_long_oop_copy(false, true, entry,
++                                                                                      NULL, "oop_arraycopy_uninit",
++          /*dest_uninitialized*/true);
++    }
++    //TODO:jzy fast path to checkcast
++    StubRoutines::_checkcast_arraycopy        = generate_checkcast_copy("checkcast_arraycopy", &entry_checkcast_arraycopy);
++    StubRoutines::_checkcast_arraycopy_uninit = generate_checkcast_copy("checkcast_arraycopy_uninit", NULL,
++        /*dest_uninitialized*/true);
++
++    StubRoutines::_unsafe_arraycopy    = generate_unsafe_copy("unsafe_arraycopy",
++                                                              entry_jbyte_arraycopy,
++                                                              entry_jshort_arraycopy,
++                                                              entry_jint_arraycopy,
++                                                              entry_jlong_arraycopy);
++    StubRoutines::_generic_arraycopy   = generate_generic_copy("generic_arraycopy",
++                                                               entry_jbyte_arraycopy,
++                                                               entry_jshort_arraycopy,
++                                                               entry_jint_arraycopy,
++                                                               entry_oop_arraycopy,
++                                                               entry_jlong_arraycopy,
++                                                               entry_checkcast_arraycopy);
++    //TODO:fast path jzy
++    StubRoutines::_jbyte_fill = generate_fill(T_BYTE, false, "jbyte_fill");
++    StubRoutines::_jshort_fill = generate_fill(T_SHORT, false, "jshort_fill");
++    StubRoutines::_jint_fill = generate_fill(T_INT, false, "jint_fill");
++    StubRoutines::_arrayof_jbyte_fill = generate_fill(T_BYTE, true, "arrayof_jbyte_fill");
++    StubRoutines::_arrayof_jshort_fill = generate_fill(T_SHORT, true, "arrayof_jshort_fill");
++    StubRoutines::_arrayof_jint_fill = generate_fill(T_INT, true, "arrayof_jint_fill");
++
++    // We don't generate specialized code for HeapWord-aligned source
++    // arrays, so just use the code we've already generated
++    StubRoutines::_arrayof_jbyte_disjoint_arraycopy  = StubRoutines::_jbyte_disjoint_arraycopy;
++    StubRoutines::_arrayof_jbyte_arraycopy           = StubRoutines::_jbyte_arraycopy;
++
++    StubRoutines::_arrayof_jshort_disjoint_arraycopy = StubRoutines::_jshort_disjoint_arraycopy;
++    StubRoutines::_arrayof_jshort_arraycopy          = StubRoutines::_jshort_arraycopy;
++
++    StubRoutines::_arrayof_jint_disjoint_arraycopy   = StubRoutines::_jint_disjoint_arraycopy;
++    StubRoutines::_arrayof_jint_arraycopy            = StubRoutines::_jint_arraycopy;
++
++    StubRoutines::_arrayof_jlong_disjoint_arraycopy  = StubRoutines::_jlong_disjoint_arraycopy;
++    StubRoutines::_arrayof_jlong_arraycopy           = StubRoutines::_jlong_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy    = StubRoutines::_oop_disjoint_arraycopy;
++    StubRoutines::_arrayof_oop_arraycopy             = StubRoutines::_oop_arraycopy;
++
++    StubRoutines::_arrayof_oop_disjoint_arraycopy_uninit    = StubRoutines::_oop_disjoint_arraycopy_uninit;
++    StubRoutines::_arrayof_oop_arraycopy_uninit             = StubRoutines::_oop_arraycopy_uninit;
++  }
++
++  // AES intrinsic stubs
++  enum {AESBlockSize = 16};
++
++  address generate_key_shuffle_mask() {
++    __ align(16);
++    StubCodeMark mark(this, "StubRoutines", "key_shuffle_mask");
++    address start = __ pc();ShouldNotReachHere();
++//    __ emit_data64( 0x0405060700010203, relocInfo::none );
++//    __ emit_data64( 0x0c0d0e0f08090a0b, relocInfo::none );
++    return start;
++  }
++
++  address generate_counter_shuffle_mask() {
++    __ align(16);
++    StubCodeMark mark(this, "StubRoutines", "counter_shuffle_mask");
++    address start = __ pc();ShouldNotReachHere();
++//    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
++//    __ emit_data64(0x0001020304050607, relocInfo::none);
++    return start;
++  }
++
++  // Utility routine for loading a 128-bit key word in little endian format
++  // can optionally specify that the shuffle mask is already in an xmmregister
++  /*void load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask=NULL) {
++    __ movdqu(xmmdst, Address(key, offset));
++    if (xmm_shuf_mask != NULL) {
++      __ pshufb(xmmdst, xmm_shuf_mask);
++    } else {
++      __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
++    }
++  }*/
++
++  // Utility routine for increase 128bit counter (iv in CTR mode)
++  /*void inc_counter(Register reg, XMMRegister xmmdst, int inc_delta, Label& next_block) {
++    __ pextrq(reg, xmmdst, 0x0);
++    __ addq(reg, inc_delta);
++    __ pinsrq(xmmdst, reg, 0x0);
++    __ jcc(Assembler::carryClear, next_block); // jump if no carry
++    __ pextrq(reg, xmmdst, 0x01); // Carry
++    __ addq(reg, 0x01);
++    __ pinsrq(xmmdst, reg, 0x01); //Carry end
++    __ BIND(next_block);          // next instruction
++  }*/
++
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0   - source byte array address
++  //   c_rarg1   - destination byte array address
++  //   c_rarg2   - K (key) in little endian int array
++  //
++  address generate_aescrypt_encryptBlock() {
++    assert(UseAES, "need AES instructions and misaligned SSE support");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_encryptBlock");
++    Label L_doLast;
++    address start = __ pc();ShouldNotReachHere();
++/*
++    const Register from        = c_rarg0;  // source array address
++    const Register to          = c_rarg1;  // destination array address
++    const Register key         = c_rarg2;  // key array address
++    const Register keylen      = rax;
++
++    const XMMRegister xmm_result = xmm0;
++    const XMMRegister xmm_key_shuf_mask = xmm1;
++    // On win64 xmm6-xmm15 must be preserved so don't use them.
++    const XMMRegister xmm_temp1  = xmm2;
++    const XMMRegister xmm_temp2  = xmm3;
++    const XMMRegister xmm_temp3  = xmm4;
++    const XMMRegister xmm_temp4  = xmm5;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
++    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++
++    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
++    __ movdqu(xmm_result, Address(from, 0));  // get 16 bytes of input
++
++    // For encryption, the java expanded key ordering is just what we need
++    // we don't know if the key is aligned, hence not using load-execute form
++
++    load_key(xmm_temp1, key, 0x00, xmm_key_shuf_mask);
++    __ pxor(xmm_result, xmm_temp1);
++
++    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
++    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
++    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
++
++    __ aesenc(xmm_result, xmm_temp1);
++    __ aesenc(xmm_result, xmm_temp2);
++    __ aesenc(xmm_result, xmm_temp3);
++    __ aesenc(xmm_result, xmm_temp4);
++
++    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
++    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
++    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
++
++    __ aesenc(xmm_result, xmm_temp1);
++    __ aesenc(xmm_result, xmm_temp2);
++    __ aesenc(xmm_result, xmm_temp3);
++    __ aesenc(xmm_result, xmm_temp4);
++
++    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
++
++    __ cmpl(keylen, 44);
++    __ jccb(Assembler::equal, L_doLast);
++
++    __ aesenc(xmm_result, xmm_temp1);
++    __ aesenc(xmm_result, xmm_temp2);
++
++    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
++
++    __ cmpl(keylen, 52);
++    __ jccb(Assembler::equal, L_doLast);
++
++    __ aesenc(xmm_result, xmm_temp1);
++    __ aesenc(xmm_result, xmm_temp2);
++
++    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
++
++    __ BIND(L_doLast);
++    __ aesenc(xmm_result, xmm_temp1);
++    __ aesenclast(xmm_result, xmm_temp2);
++    __ movdqu(Address(to, 0), xmm_result);        // store the result
++    __ xorptr(rax, rax); // return 0
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++  }
++
++
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0   - source byte array address
++  //   c_rarg1   - destination byte array address
++  //   c_rarg2   - K (key) in little endian int array
++  //
++  address generate_aescrypt_decryptBlock() {
++    assert(UseAES, "need AES instructions and misaligned SSE support");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "aescrypt_decryptBlock");
++    Label L_doLast;
++    address start = __ pc();ShouldNotReachHere();
++/*
++    const Register from        = c_rarg0;  // source array address
++    const Register to          = c_rarg1;  // destination array address
++    const Register key         = c_rarg2;  // key array address
++    const Register keylen      = rax;
++
++    const XMMRegister xmm_result = xmm0;
++    const XMMRegister xmm_key_shuf_mask = xmm1;
++    // On win64 xmm6-xmm15 must be preserved so don't use them.
++    const XMMRegister xmm_temp1  = xmm2;
++    const XMMRegister xmm_temp2  = xmm3;
++    const XMMRegister xmm_temp3  = xmm4;
++    const XMMRegister xmm_temp4  = xmm5;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    // keylen could be only {11, 13, 15} * 4 = {44, 52, 60}
++    __ movl(keylen, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++
++    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
++    __ movdqu(xmm_result, Address(from, 0));
++
++    // for decryption java expanded key ordering is rotated one position from what we want
++    // so we start from 0x10 here and hit 0x00 last
++    // we don't know if the key is aligned, hence not using load-execute form
++    load_key(xmm_temp1, key, 0x10, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0x20, xmm_key_shuf_mask);
++    load_key(xmm_temp3, key, 0x30, xmm_key_shuf_mask);
++    load_key(xmm_temp4, key, 0x40, xmm_key_shuf_mask);
++
++    __ pxor  (xmm_result, xmm_temp1);
++    __ aesdec(xmm_result, xmm_temp2);
++    __ aesdec(xmm_result, xmm_temp3);
++    __ aesdec(xmm_result, xmm_temp4);
++
++    load_key(xmm_temp1, key, 0x50, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0x60, xmm_key_shuf_mask);
++    load_key(xmm_temp3, key, 0x70, xmm_key_shuf_mask);
++    load_key(xmm_temp4, key, 0x80, xmm_key_shuf_mask);
++
++    __ aesdec(xmm_result, xmm_temp1);
++    __ aesdec(xmm_result, xmm_temp2);
++    __ aesdec(xmm_result, xmm_temp3);
++    __ aesdec(xmm_result, xmm_temp4);
++
++    load_key(xmm_temp1, key, 0x90, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0xa0, xmm_key_shuf_mask);
++    load_key(xmm_temp3, key, 0x00, xmm_key_shuf_mask);
++
++    __ cmpl(keylen, 44);
++    __ jccb(Assembler::equal, L_doLast);
++
++    __ aesdec(xmm_result, xmm_temp1);
++    __ aesdec(xmm_result, xmm_temp2);
++
++    load_key(xmm_temp1, key, 0xb0, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0xc0, xmm_key_shuf_mask);
++
++    __ cmpl(keylen, 52);
++    __ jccb(Assembler::equal, L_doLast);
++
++    __ aesdec(xmm_result, xmm_temp1);
++    __ aesdec(xmm_result, xmm_temp2);
++
++    load_key(xmm_temp1, key, 0xd0, xmm_key_shuf_mask);
++    load_key(xmm_temp2, key, 0xe0, xmm_key_shuf_mask);
++
++    __ BIND(L_doLast);
++    __ aesdec(xmm_result, xmm_temp1);
++    __ aesdec(xmm_result, xmm_temp2);
++
++    // for decryption the aesdeclast operation is always on key+0x00
++    __ aesdeclast(xmm_result, xmm_temp3);
++    __ movdqu(Address(to, 0), xmm_result);  // store the result
++    __ xorptr(rax, rax); // return 0
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++  }
++
++
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0   - source byte array address
++  //   c_rarg1   - destination byte array address
++  //   c_rarg2   - K (key) in little endian int array
++  //   c_rarg3   - r vector byte array address
++  //   c_rarg4   - input length
++  //
++  // Output:
++  //   rax       - input length
++  //
++  address generate_cipherBlockChaining_encryptAESCrypt() {
++    assert(UseAES, "need AES instructions and misaligned SSE support");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_encryptAESCrypt");
++    address start = __ pc();ShouldNotReachHere();
++/*
++    Label L_exit, L_key_192_256, L_key_256, L_loopTop_128, L_loopTop_192, L_loopTop_256;
++    const Register from        = c_rarg0;  // source array address
++    const Register to          = c_rarg1;  // destination array address
++    const Register key         = c_rarg2;  // key array address
++    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
++                                           // and left with the results of the last encryption block
++#ifndef _WIN64
++    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
++#else
++    const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
++    const Register len_reg     = r11;      // pick the volatile windows register
++#endif
++    const Register pos         = rax;
++
++    // xmm register assignments for the loops below
++    const XMMRegister xmm_result = xmm0;
++    const XMMRegister xmm_temp   = xmm1;
++    // keys 0-10 preloaded into xmm2-xmm12
++    const int XMM_REG_NUM_KEY_FIRST = 2;
++    const int XMM_REG_NUM_KEY_LAST  = 15;
++    const XMMRegister xmm_key0   = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
++    const XMMRegister xmm_key10  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+10);
++    const XMMRegister xmm_key11  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+11);
++    const XMMRegister xmm_key12  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+12);
++    const XMMRegister xmm_key13  = as_XMMRegister(XMM_REG_NUM_KEY_FIRST+13);
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifdef _WIN64
++    // on win64, fill len_reg from stack position
++    __ movl(len_reg, len_mem);
++#else
++    __ push(len_reg); // Save
++#endif
++
++    const XMMRegister xmm_key_shuf_mask = xmm_temp;  // used temporarily to swap key bytes up front
++    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
++    // load up xmm regs xmm2 thru xmm12 with key 0x00 - 0xa0
++    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x00; rnum <= XMM_REG_NUM_KEY_FIRST+10; rnum++) {
++      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
++      offset += 0x10;
++    }
++    __ movdqu(xmm_result, Address(rvec, 0x00));   // initialize xmm_result with r vec
++
++    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
++    __ movl(rax, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++    __ cmpl(rax, 44);
++    __ jcc(Assembler::notEqual, L_key_192_256);
++
++    // 128 bit code follows here
++    __ movptr(pos, 0);
++    __ align(OptoLoopAlignment);
++
++    __ BIND(L_loopTop_128);
++    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
++    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
++    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
++    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum <= XMM_REG_NUM_KEY_FIRST + 9; rnum++) {
++      __ aesenc(xmm_result, as_XMMRegister(rnum));
++    }
++    __ aesenclast(xmm_result, xmm_key10);
++    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
++    // no need to store r to memory until we exit
++    __ addptr(pos, AESBlockSize);
++    __ subptr(len_reg, AESBlockSize);
++    __ jcc(Assembler::notEqual, L_loopTop_128);
++
++    __ BIND(L_exit);
++    __ movdqu(Address(rvec, 0), xmm_result);     // final value of r stored in rvec of CipherBlockChaining object
++
++#ifdef _WIN64
++    __ movl(rax, len_mem);
++#else
++    __ pop(rax); // return length
++#endif
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++
++    __ BIND(L_key_192_256);
++    // here rax = len in ints of AESCrypt.KLE array (52=192, or 60=256)
++    load_key(xmm_key11, key, 0xb0, xmm_key_shuf_mask);
++    load_key(xmm_key12, key, 0xc0, xmm_key_shuf_mask);
++    __ cmpl(rax, 52);
++    __ jcc(Assembler::notEqual, L_key_256);
++
++    // 192-bit code follows here (could be changed to use more xmm registers)
++    __ movptr(pos, 0);
++    __ align(OptoLoopAlignment);
++
++    __ BIND(L_loopTop_192);
++    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
++    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
++    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
++    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 11; rnum++) {
++      __ aesenc(xmm_result, as_XMMRegister(rnum));
++    }
++    __ aesenclast(xmm_result, xmm_key12);
++    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
++    // no need to store r to memory until we exit
++    __ addptr(pos, AESBlockSize);
++    __ subptr(len_reg, AESBlockSize);
++    __ jcc(Assembler::notEqual, L_loopTop_192);
++    __ jmp(L_exit);
++
++    __ BIND(L_key_256);
++    // 256-bit code follows here (could be changed to use more xmm registers)
++    load_key(xmm_key13, key, 0xd0, xmm_key_shuf_mask);
++    __ movptr(pos, 0);
++    __ align(OptoLoopAlignment);
++
++    __ BIND(L_loopTop_256);
++    __ movdqu(xmm_temp, Address(from, pos, Address::times_1, 0));   // get next 16 bytes of input
++    __ pxor  (xmm_result, xmm_temp);               // xor with the current r vector
++    __ pxor  (xmm_result, xmm_key0);               // do the aes rounds
++    for (int rnum = XMM_REG_NUM_KEY_FIRST + 1; rnum  <= XMM_REG_NUM_KEY_FIRST + 13; rnum++) {
++      __ aesenc(xmm_result, as_XMMRegister(rnum));
++    }
++    load_key(xmm_temp, key, 0xe0);
++    __ aesenclast(xmm_result, xmm_temp);
++    __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result);     // store into the next 16 bytes of output
++    // no need to store r to memory until we exit
++    __ addptr(pos, AESBlockSize);
++    __ subptr(len_reg, AESBlockSize);
++    __ jcc(Assembler::notEqual, L_loopTop_256);
++    __ jmp(L_exit);
++*/
++    return start;
++  }
++
++  // Safefetch stubs.
++  void generate_safefetch(const char* name, int size, address* entry,
++                          address* fault_pc, address* continuation_pc) {
++    // safefetch signatures:
++    //   int      SafeFetch32(int*      adr, int      errValue);
++    //   intptr_t SafeFetchN (intptr_t* adr, intptr_t errValue);
++    //
++    // arguments:
++    //   c_rarg0 = adr
++    //   c_rarg1 = errValue
++    //
++    // result:
++    //   PPC_RET  = *adr or errValue
++
++    StubCodeMark mark(this, "StubRoutines", name);
++
++    // Entry point, pc or function descriptor.
++    *entry = __ pc();
++
++    // Load *adr into c_rarg1, may fault.
++    *fault_pc = __ pc();
++    switch (size) {
++      case 4:
++        // int32_t
++        __ ldws(c_rarg1, Address(c_rarg0, 0)); //__ stop("here should extend? jzy");//here should extend? jzy
++        break;
++      case 8:
++        // int64_t
++        __ ldl(c_rarg1, Address(c_rarg0, 0));
++        break;
++      default:
++        ShouldNotReachHere();
++    }
++
++    // return errValue or *adr
++    *continuation_pc = __ pc();
++    __ movl(V0, c_rarg1);
++    __ ret_sw();
++  }
++
++  // This is a version of CBC/AES Decrypt which does 4 blocks in a loop at a time
++  // to hide instruction latency
++  //
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0   - source byte array address
++  //   c_rarg1   - destination byte array address
++  //   c_rarg2   - K (key) in little endian int array
++  //   c_rarg3   - r vector byte array address
++  //   c_rarg4   - input length
++  //
++  // Output:
++  //   rax       - input length
++  //
++  address generate_cipherBlockChaining_decryptAESCrypt_Parallel() {
++    assert(UseAES, "need AES instructions and misaligned SSE support");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");
++    address start = __ pc();ShouldNotReachHere();
++/*
++    const Register from        = c_rarg0;  // source array address
++    const Register to          = c_rarg1;  // destination array address
++    const Register key         = c_rarg2;  // key array address
++    const Register rvec        = c_rarg3;  // r byte array initialized from initvector array address
++                                           // and left with the results of the last encryption block
++#ifndef _WIN64
++    const Register len_reg     = c_rarg4;  // src len (must be multiple of blocksize 16)
++#else
++    const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
++    const Register len_reg     = r11;      // pick the volatile windows register
++#endif
++    const Register pos         = rax;
++
++    const int PARALLEL_FACTOR = 4;
++    const int ROUNDS[3] = { 10, 12, 14 }; // aes rounds for key128, key192, key256
++
++    Label L_exit;
++    Label L_singleBlock_loopTopHead[3]; // 128, 192, 256
++    Label L_singleBlock_loopTopHead2[3]; // 128, 192, 256
++    Label L_singleBlock_loopTop[3]; // 128, 192, 256
++    Label L_multiBlock_loopTopHead[3]; // 128, 192, 256
++    Label L_multiBlock_loopTop[3]; // 128, 192, 256
++
++    // keys 0-10 preloaded into xmm5-xmm15
++    const int XMM_REG_NUM_KEY_FIRST = 5;
++    const int XMM_REG_NUM_KEY_LAST  = 15;
++    const XMMRegister xmm_key_first = as_XMMRegister(XMM_REG_NUM_KEY_FIRST);
++    const XMMRegister xmm_key_last  = as_XMMRegister(XMM_REG_NUM_KEY_LAST);
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifdef _WIN64
++    // on win64, fill len_reg from stack position
++    __ movl(len_reg, len_mem);
++#else
++    __ push(len_reg); // Save
++#endif
++    __ push(rbx);
++    // the java expanded key ordering is rotated one position from what we want
++    // so we start from 0x10 here and hit 0x00 last
++    const XMMRegister xmm_key_shuf_mask = xmm1;  // used temporarily to swap key bytes up front
++    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
++    // load up xmm regs 5 thru 15 with key 0x10 - 0xa0 - 0x00
++    for (int rnum = XMM_REG_NUM_KEY_FIRST, offset = 0x10; rnum < XMM_REG_NUM_KEY_LAST; rnum++) {
++      load_key(as_XMMRegister(rnum), key, offset, xmm_key_shuf_mask);
++      offset += 0x10;
++    }
++    load_key(xmm_key_last, key, 0x00, xmm_key_shuf_mask);
++
++    const XMMRegister xmm_prev_block_cipher = xmm1;  // holds cipher of previous block
++
++    // registers holding the four results in the parallelized loop
++    const XMMRegister xmm_result0 = xmm0;
++    const XMMRegister xmm_result1 = xmm2;
++    const XMMRegister xmm_result2 = xmm3;
++    const XMMRegister xmm_result3 = xmm4;
++
++    __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00));   // initialize with initial rvec
++
++    __ xorptr(pos, pos);
++
++    // now split to different paths depending on the keylen (len in ints of AESCrypt.KLE array (52=192, or 60=256))
++    __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++    __ cmpl(rbx, 52);
++    __ jcc(Assembler::equal, L_multiBlock_loopTopHead[1]);
++    __ cmpl(rbx, 60);
++    __ jcc(Assembler::equal, L_multiBlock_loopTopHead[2]);
++
++#define DoFour(opc, src_reg)           \
++  __ opc(xmm_result0, src_reg);         \
++  __ opc(xmm_result1, src_reg);         \
++  __ opc(xmm_result2, src_reg);         \
++  __ opc(xmm_result3, src_reg);         \
++
++    for (int k = 0; k < 3; ++k) {
++      __ BIND(L_multiBlock_loopTopHead[k]);
++      if (k != 0) {
++        __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left
++        __ jcc(Assembler::less, L_singleBlock_loopTopHead2[k]);
++      }
++      if (k == 1) {
++        __ subptr(rsp, 6 * wordSize);
++        __ movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
++        load_key(xmm15, key, 0xb0); // 0xb0; 192-bit key goes up to 0xc0
++        __ movdqu(Address(rsp, 2 * wordSize), xmm15);
++        load_key(xmm1, key, 0xc0);  // 0xc0;
++        __ movdqu(Address(rsp, 4 * wordSize), xmm1);
++      } else if (k == 2) {
++        __ subptr(rsp, 10 * wordSize);
++        __ movdqu(Address(rsp, 0), xmm15); //save last_key from xmm15
++        load_key(xmm15, key, 0xd0); // 0xd0; 256-bit key goes upto 0xe0
++        __ movdqu(Address(rsp, 6 * wordSize), xmm15);
++        load_key(xmm1, key, 0xe0);  // 0xe0;
++        __ movdqu(Address(rsp, 8 * wordSize), xmm1);
++        load_key(xmm15, key, 0xb0); // 0xb0;
++        __ movdqu(Address(rsp, 2 * wordSize), xmm15);
++        load_key(xmm1, key, 0xc0);  // 0xc0;
++        __ movdqu(Address(rsp, 4 * wordSize), xmm1);
++      }
++      __ align(OptoLoopAlignment);
++      __ BIND(L_multiBlock_loopTop[k]);
++      __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least 4 blocks left
++      __ jcc(Assembler::less, L_singleBlock_loopTopHead[k]);
++
++      if  (k != 0) {
++        __ movdqu(xmm15, Address(rsp, 2 * wordSize));
++        __ movdqu(xmm1, Address(rsp, 4 * wordSize));
++      }
++
++      __ movdqu(xmm_result0, Address(from, pos, Address::times_1, 0 * AESBlockSize)); // get next 4 blocks into xmmresult registers
++      __ movdqu(xmm_result1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
++      __ movdqu(xmm_result2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
++      __ movdqu(xmm_result3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
++
++      DoFour(pxor, xmm_key_first);
++      if (k == 0) {
++        for (int rnum = 1; rnum < ROUNDS[k]; rnum++) {
++          DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
++        }
++        DoFour(aesdeclast, xmm_key_last);
++      } else if (k == 1) {
++        for (int rnum = 1; rnum <= ROUNDS[k]-2; rnum++) {
++          DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
++        }
++        __ movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again.
++        DoFour(aesdec, xmm1);  // key : 0xc0
++        __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00));  // xmm1 needs to be loaded again
++        DoFour(aesdeclast, xmm_key_last);
++      } else if (k == 2) {
++        for (int rnum = 1; rnum <= ROUNDS[k] - 4; rnum++) {
++          DoFour(aesdec, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
++        }
++        DoFour(aesdec, xmm1);  // key : 0xc0
++        __ movdqu(xmm15, Address(rsp, 6 * wordSize));
++        __ movdqu(xmm1, Address(rsp, 8 * wordSize));
++        DoFour(aesdec, xmm15);  // key : 0xd0
++        __ movdqu(xmm_key_last, Address(rsp, 0)); // xmm15 needs to be loaded again.
++        DoFour(aesdec, xmm1);  // key : 0xe0
++        __ movdqu(xmm_prev_block_cipher, Address(rvec, 0x00));  // xmm1 needs to be loaded again
++        DoFour(aesdeclast, xmm_key_last);
++      }
++
++      // for each result, xor with the r vector of previous cipher block
++      __ pxor(xmm_result0, xmm_prev_block_cipher);
++      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 0 * AESBlockSize));
++      __ pxor(xmm_result1, xmm_prev_block_cipher);
++      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 1 * AESBlockSize));
++      __ pxor(xmm_result2, xmm_prev_block_cipher);
++      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 2 * AESBlockSize));
++      __ pxor(xmm_result3, xmm_prev_block_cipher);
++      __ movdqu(xmm_prev_block_cipher, Address(from, pos, Address::times_1, 3 * AESBlockSize));   // this will carry over to next set of blocks
++      if (k != 0) {
++        __ movdqu(Address(rvec, 0x00), xmm_prev_block_cipher);
++      }
++
++      __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);     // store 4 results into the next 64 bytes of output
++      __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
++      __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
++      __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
++
++      __ addptr(pos, PARALLEL_FACTOR * AESBlockSize);
++      __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize);
++      __ jmp(L_multiBlock_loopTop[k]);
++
++      // registers used in the non-parallelized loops
++      // xmm register assignments for the loops below
++      const XMMRegister xmm_result = xmm0;
++      const XMMRegister xmm_prev_block_cipher_save = xmm2;
++      const XMMRegister xmm_key11 = xmm3;
++      const XMMRegister xmm_key12 = xmm4;
++      const XMMRegister key_tmp = xmm4;
++
++      __ BIND(L_singleBlock_loopTopHead[k]);
++      if (k == 1) {
++        __ addptr(rsp, 6 * wordSize);
++      } else if (k == 2) {
++        __ addptr(rsp, 10 * wordSize);
++      }
++      __ cmpptr(len_reg, 0); // any blocks left??
++      __ jcc(Assembler::equal, L_exit);
++      __ BIND(L_singleBlock_loopTopHead2[k]);
++      if (k == 1) {
++        load_key(xmm_key11, key, 0xb0); // 0xb0; 192-bit key goes upto 0xc0
++        load_key(xmm_key12, key, 0xc0); // 0xc0; 192-bit key goes upto 0xc0
++      }
++      if (k == 2) {
++        load_key(xmm_key11, key, 0xb0); // 0xb0; 256-bit key goes upto 0xe0
++      }
++      __ align(OptoLoopAlignment);
++      __ BIND(L_singleBlock_loopTop[k]);
++      __ movdqu(xmm_result, Address(from, pos, Address::times_1, 0)); // get next 16 bytes of cipher input
++      __ movdqa(xmm_prev_block_cipher_save, xmm_result); // save for next r vector
++      __ pxor(xmm_result, xmm_key_first); // do the aes dec rounds
++      for (int rnum = 1; rnum <= 9 ; rnum++) {
++          __ aesdec(xmm_result, as_XMMRegister(rnum + XMM_REG_NUM_KEY_FIRST));
++      }
++      if (k == 1) {
++        __ aesdec(xmm_result, xmm_key11);
++        __ aesdec(xmm_result, xmm_key12);
++      }
++      if (k == 2) {
++        __ aesdec(xmm_result, xmm_key11);
++        load_key(key_tmp, key, 0xc0);
++        __ aesdec(xmm_result, key_tmp);
++        load_key(key_tmp, key, 0xd0);
++        __ aesdec(xmm_result, key_tmp);
++        load_key(key_tmp, key, 0xe0);
++        __ aesdec(xmm_result, key_tmp);
++      }
++
++      __ aesdeclast(xmm_result, xmm_key_last); // xmm15 always came from key+0
++      __ pxor(xmm_result, xmm_prev_block_cipher); // xor with the current r vector
++      __ movdqu(Address(to, pos, Address::times_1, 0), xmm_result); // store into the next 16 bytes of output
++      // no need to store r to memory until we exit
++      __ movdqa(xmm_prev_block_cipher, xmm_prev_block_cipher_save); // set up next r vector with cipher input from this block
++      __ addptr(pos, AESBlockSize);
++      __ subptr(len_reg, AESBlockSize);
++      __ jcc(Assembler::notEqual, L_singleBlock_loopTop[k]);
++      if (k != 2) {
++        __ jmp(L_exit);
++      }
++    } //for 128/192/256
++
++    __ BIND(L_exit);
++    __ movdqu(Address(rvec, 0), xmm_prev_block_cipher);     // final value of r stored in rvec of CipherBlockChaining object
++    __ pop(rbx);
++#ifdef _WIN64
++    __ movl(rax, len_mem);
++#else
++    __ pop(rax); // return length
++#endif
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++    return start;
++}
++
++  address generate_upper_word_mask() {
++    __ align(64);
++    StubCodeMark mark(this, "StubRoutines", "upper_word_mask");
++    address start = __ pc();
++    __ emit_data64(0x0000000000000000, relocInfo::none);
++    __ emit_data64(0xFFFFFFFF00000000, relocInfo::none);
++    return start;
++  }
++
++  address generate_shuffle_byte_flip_mask() {
++    __ align(64);
++    StubCodeMark mark(this, "StubRoutines", "shuffle_byte_flip_mask");
++    address start = __ pc();
++    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
++    __ emit_data64(0x0001020304050607, relocInfo::none);
++    return start;
++  }
++
++  // ofs and limit are use for multi-block byte array.
++  // int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
++  address generate_sha1_implCompress(bool multi_block, const char *name) {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);
++    address start = __ pc();
++
++    Register buf = c_rarg0;
++    Register state = c_rarg1;
++    Register ofs = c_rarg2;
++    Register limit = c_rarg3;
++
++    const XMMRegister abcd = xmm0;
++    const XMMRegister e0 = xmm1;
++    const XMMRegister e1 = xmm2;
++    const XMMRegister msg0 = xmm3;
++
++    const XMMRegister msg1 = xmm4;
++    const XMMRegister msg2 = xmm5;
++    const XMMRegister msg3 = xmm6;
++    const XMMRegister shuf_mask = xmm7;
++
++    __ enter();
++
++    __ subptr(rsp, 4 * wordSize);
++
++    __ fast_sha1(abcd, e0, e1, msg0, msg1, msg2, msg3, shuf_mask,
++      buf, state, ofs, limit, rsp, multi_block);
++
++    __ addptr(rsp, 4 * wordSize);
++
++    __ leave();
++    __ ret(0);*/
++    return start;
++  }
++
++  address generate_pshuffle_byte_flip_mask() {
++    __ align(64);
++    StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ emit_data64(0x0405060700010203, relocInfo::none);
++    __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
++
++    if (VM_Version::supports_avx2()) {
++      __ emit_data64(0x0405060700010203, relocInfo::none); // second copy
++      __ emit_data64(0x0c0d0e0f08090a0b, relocInfo::none);
++      // _SHUF_00BA
++      __ emit_data64(0x0b0a090803020100, relocInfo::none);
++      __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
++      __ emit_data64(0x0b0a090803020100, relocInfo::none);
++      __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
++      // _SHUF_DC00
++      __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
++      __ emit_data64(0x0b0a090803020100, relocInfo::none);
++      __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
++      __ emit_data64(0x0b0a090803020100, relocInfo::none);
++    }
++*/
++    return start;
++  }
++
++  //Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb.
++  address generate_pshuffle_byte_flip_mask_sha512() {
++    __ align(32);
++    StubCodeMark mark(this, "StubRoutines", "pshuffle_byte_flip_mask_sha512");ShouldNotReachHere();
++    address start = __ pc();/*
++    if (VM_Version::supports_avx2()) {
++      __ emit_data64(0x0001020304050607, relocInfo::none); // PSHUFFLE_BYTE_FLIP_MASK
++      __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none);
++      __ emit_data64(0x1011121314151617, relocInfo::none);
++      __ emit_data64(0x18191a1b1c1d1e1f, relocInfo::none);
++      __ emit_data64(0x0000000000000000, relocInfo::none); //MASK_YMM_LO
++      __ emit_data64(0x0000000000000000, relocInfo::none);
++      __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
++      __ emit_data64(0xFFFFFFFFFFFFFFFF, relocInfo::none);
++    }
++*/
++    return start;
++  }
++
++// ofs and limit are use for multi-block byte array.
++// int com.sun.security.provider.DigestBase.implCompressMultiBlock(byte[] b, int ofs, int limit)
++  address generate_sha256_implCompress(bool multi_block, const char *name) {
++    //assert(VM_Version::supports_sha() || VM_Version::supports_avx2(), "");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);ShouldNotReachHere();
++    address start = __ pc();/*
++
++    Register buf = c_rarg0;
++    Register state = c_rarg1;
++    Register ofs = c_rarg2;
++    Register limit = c_rarg3;
++
++    const XMMRegister msg = xmm0;
++    const XMMRegister state0 = xmm1;
++    const XMMRegister state1 = xmm2;
++    const XMMRegister msgtmp0 = xmm3;
++
++    const XMMRegister msgtmp1 = xmm4;
++    const XMMRegister msgtmp2 = xmm5;
++    const XMMRegister msgtmp3 = xmm6;
++    const XMMRegister msgtmp4 = xmm7;
++
++    const XMMRegister shuf_mask = xmm8;
++
++    __ enter();
++
++    __ subptr(rsp, 4 * wordSize);
++
++    if (VM_Version::supports_sha()) {
++      __ fast_sha256(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
++        buf, state, ofs, limit, rsp, multi_block, shuf_mask);
++    } else if (VM_Version::supports_avx2()) {
++      __ sha256_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
++        buf, state, ofs, limit, rsp, multi_block, shuf_mask);
++    }
++    __ addptr(rsp, 4 * wordSize);
++    __ vzeroupper();
++    __ leave();
++    __ ret(0);*/
++    return start;
++  }
++
++  address generate_sha512_implCompress(bool multi_block, const char *name) {
++    //assert(VM_Version::supports_avx2(), "");
++    //assert(VM_Version::supports_bmi2(), "");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", name);ShouldNotReachHere();
++    address start = __ pc();/*
++
++    Register buf = c_rarg0;
++    Register state = c_rarg1;
++    Register ofs = c_rarg2;
++    Register limit = c_rarg3;
++
++    const XMMRegister msg = xmm0;
++    const XMMRegister state0 = xmm1;
++    const XMMRegister state1 = xmm2;
++    const XMMRegister msgtmp0 = xmm3;
++    const XMMRegister msgtmp1 = xmm4;
++    const XMMRegister msgtmp2 = xmm5;
++    const XMMRegister msgtmp3 = xmm6;
++    const XMMRegister msgtmp4 = xmm7;
++
++    const XMMRegister shuf_mask = xmm8;
++
++    __ enter();
++
++    __ sha512_AVX2(msg, state0, state1, msgtmp0, msgtmp1, msgtmp2, msgtmp3, msgtmp4,
++    buf, state, ofs, limit, rsp, multi_block, shuf_mask);
++
++    __ vzeroupper();
++    __ leave();
++    __ ret(0);*/
++    return start;
++  }
++
++  // This is a version of CTR/AES crypt which does 6 blocks in a loop at a time
++  // to hide instruction latency
++  //
++  // Arguments:
++  //
++  // Inputs:
++  //   c_rarg0   - source byte array address
++  //   c_rarg1   - destination byte array address
++  //   c_rarg2   - K (key) in little endian int array
++  //   c_rarg3   - counter vector byte array address
++  //   Linux
++  //     c_rarg4   -          input length
++  //     c_rarg5   -          saved encryptedCounter start
++  //     rbp + 6 * wordSize - saved used length
++  //   Windows
++  //     rbp + 6 * wordSize - input length
++  //     rbp + 7 * wordSize - saved encryptedCounter start
++  //     rbp + 8 * wordSize - saved used length
++  //
++  // Output:
++  //   rax       - input length
++  //
++  address generate_counterMode_AESCrypt_Parallel() {
++    assert(UseAES, "need AES instructions and misaligned SSE support");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "counterMode_AESCrypt");ShouldNotReachHere();
++    address start = __ pc();/*
++    const Register from = c_rarg0; // source array address
++    const Register to = c_rarg1; // destination array address
++    const Register key = c_rarg2; // key array address
++    const Register counter = c_rarg3; // counter byte array initialized from counter array address
++                                      // and updated with the incremented counter in the end
++#ifndef _WIN64
++    const Register len_reg = c_rarg4;
++    const Register saved_encCounter_start = c_rarg5;
++    const Register used_addr = r10;
++    const Address  used_mem(rbp, 2 * wordSize);
++    const Register used = r11;
++#else
++    const Address len_mem(rbp, 6 * wordSize); // length is on stack on Win64
++    const Address saved_encCounter_mem(rbp, 7 * wordSize); // length is on stack on Win64
++    const Address used_mem(rbp, 8 * wordSize); // length is on stack on Win64
++    const Register len_reg = r10; // pick the first volatile windows register
++    const Register saved_encCounter_start = r11;
++    const Register used_addr = r13;
++    const Register used = r14;
++#endif
++    const Register pos = rax;
++
++    const int PARALLEL_FACTOR = 6;
++    const XMMRegister xmm_counter_shuf_mask = xmm0;
++    const XMMRegister xmm_key_shuf_mask = xmm1; // used temporarily to swap key bytes up front
++    const XMMRegister xmm_curr_counter = xmm2;
++
++    const XMMRegister xmm_key_tmp0 = xmm3;
++    const XMMRegister xmm_key_tmp1 = xmm4;
++
++    // registers holding the four results in the parallelized loop
++    const XMMRegister xmm_result0 = xmm5;
++    const XMMRegister xmm_result1 = xmm6;
++    const XMMRegister xmm_result2 = xmm7;
++    const XMMRegister xmm_result3 = xmm8;
++    const XMMRegister xmm_result4 = xmm9;
++    const XMMRegister xmm_result5 = xmm10;
++
++    const XMMRegister xmm_from0 = xmm11;
++    const XMMRegister xmm_from1 = xmm12;
++    const XMMRegister xmm_from2 = xmm13;
++    const XMMRegister xmm_from3 = xmm14; //the last one is xmm14. we have to preserve it on WIN64.
++    const XMMRegister xmm_from4 = xmm3; //reuse xmm3~4. Because xmm_key_tmp0~1 are useless when loading input text
++    const XMMRegister xmm_from5 = xmm4;
++
++    //for key_128, key_192, key_256
++    const int rounds[3] = {10, 12, 14};
++    Label L_exit_preLoop, L_preLoop_start;
++    Label L_multiBlock_loopTop[3];
++    Label L_singleBlockLoopTop[3];
++    Label L__incCounter[3][6]; //for 6 blocks
++    Label L__incCounter_single[3]; //for single block, key128, key192, key256
++    Label L_processTail_insr[3], L_processTail_4_insr[3], L_processTail_2_insr[3], L_processTail_1_insr[3], L_processTail_exit_insr[3];
++    Label L_processTail_extr[3], L_processTail_4_extr[3], L_processTail_2_extr[3], L_processTail_1_extr[3], L_processTail_exit_extr[3];
++
++    Label L_exit;
++
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifdef _WIN64
++    // allocate spill slots for r13, r14
++    enum {
++        saved_r13_offset,
++        saved_r14_offset
++    };
++    __ subptr(rsp, 2 * wordSize);
++    __ movptr(Address(rsp, saved_r13_offset * wordSize), r13);
++    __ movptr(Address(rsp, saved_r14_offset * wordSize), r14);
++
++    // on win64, fill len_reg from stack position
++    __ movl(len_reg, len_mem);
++    __ movptr(saved_encCounter_start, saved_encCounter_mem);
++    __ movptr(used_addr, used_mem);
++    __ movl(used, Address(used_addr, 0));
++#else
++    __ push(len_reg); // Save
++    __ movptr(used_addr, used_mem);
++    __ movl(used, Address(used_addr, 0));
++#endif
++
++    __ push(rbx); // Save RBX
++    __ movdqu(xmm_curr_counter, Address(counter, 0x00)); // initialize counter with initial counter
++    __ movdqu(xmm_counter_shuf_mask, ExternalAddress(StubRoutines::x86::counter_shuffle_mask_addr()), pos); // pos as scratch
++    __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled
++    __ movptr(pos, 0);
++
++    // Use the partially used encrpyted counter from last invocation
++    __ BIND(L_preLoop_start);
++    __ cmpptr(used, 16);
++    __ jcc(Assembler::aboveEqual, L_exit_preLoop);
++      __ cmpptr(len_reg, 0);
++      __ jcc(Assembler::lessEqual, L_exit_preLoop);
++      __ movb(rbx, Address(saved_encCounter_start, used));
++      __ xorb(rbx, Address(from, pos));
++      __ movb(Address(to, pos), rbx);
++      __ addptr(pos, 1);
++      __ addptr(used, 1);
++      __ subptr(len_reg, 1);
++
++    __ jmp(L_preLoop_start);
++
++    __ BIND(L_exit_preLoop);
++    __ movl(Address(used_addr, 0), used);
++
++    // key length could be only {11, 13, 15} * 4 = {44, 52, 60}
++    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()), rbx); // rbx as scratch
++    __ movl(rbx, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++    __ cmpl(rbx, 52);
++    __ jcc(Assembler::equal, L_multiBlock_loopTop[1]);
++    __ cmpl(rbx, 60);
++    __ jcc(Assembler::equal, L_multiBlock_loopTop[2]);
++
++#define CTR_DoSix(opc, src_reg)                \
++    __ opc(xmm_result0, src_reg);              \
++    __ opc(xmm_result1, src_reg);              \
++    __ opc(xmm_result2, src_reg);              \
++    __ opc(xmm_result3, src_reg);              \
++    __ opc(xmm_result4, src_reg);              \
++    __ opc(xmm_result5, src_reg);
++
++    // k == 0 :  generate code for key_128
++    // k == 1 :  generate code for key_192
++    // k == 2 :  generate code for key_256
++    for (int k = 0; k < 3; ++k) {
++      //multi blocks starts here
++      __ align(OptoLoopAlignment);
++      __ BIND(L_multiBlock_loopTop[k]);
++      __ cmpptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // see if at least PARALLEL_FACTOR blocks left
++      __ jcc(Assembler::less, L_singleBlockLoopTop[k]);
++      load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
++
++      //load, then increase counters
++      CTR_DoSix(movdqa, xmm_curr_counter);
++      inc_counter(rbx, xmm_result1, 0x01, L__incCounter[k][0]);
++      inc_counter(rbx, xmm_result2, 0x02, L__incCounter[k][1]);
++      inc_counter(rbx, xmm_result3, 0x03, L__incCounter[k][2]);
++      inc_counter(rbx, xmm_result4, 0x04, L__incCounter[k][3]);
++      inc_counter(rbx, xmm_result5,  0x05, L__incCounter[k][4]);
++      inc_counter(rbx, xmm_curr_counter, 0x06, L__incCounter[k][5]);
++      CTR_DoSix(pshufb, xmm_counter_shuf_mask); // after increased, shuffled counters back for PXOR
++      CTR_DoSix(pxor, xmm_key_tmp0);   //PXOR with Round 0 key
++
++      //load two ROUND_KEYs at a time
++      for (int i = 1; i < rounds[k]; ) {
++        load_key(xmm_key_tmp1, key, (0x10 * i), xmm_key_shuf_mask);
++        load_key(xmm_key_tmp0, key, (0x10 * (i+1)), xmm_key_shuf_mask);
++        CTR_DoSix(aesenc, xmm_key_tmp1);
++        i++;
++        if (i != rounds[k]) {
++          CTR_DoSix(aesenc, xmm_key_tmp0);
++        } else {
++          CTR_DoSix(aesenclast, xmm_key_tmp0);
++        }
++        i++;
++      }
++
++      // get next PARALLEL_FACTOR blocks into xmm_result registers
++      __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
++      __ movdqu(xmm_from1, Address(from, pos, Address::times_1, 1 * AESBlockSize));
++      __ movdqu(xmm_from2, Address(from, pos, Address::times_1, 2 * AESBlockSize));
++      __ movdqu(xmm_from3, Address(from, pos, Address::times_1, 3 * AESBlockSize));
++      __ movdqu(xmm_from4, Address(from, pos, Address::times_1, 4 * AESBlockSize));
++      __ movdqu(xmm_from5, Address(from, pos, Address::times_1, 5 * AESBlockSize));
++
++      __ pxor(xmm_result0, xmm_from0);
++      __ pxor(xmm_result1, xmm_from1);
++      __ pxor(xmm_result2, xmm_from2);
++      __ pxor(xmm_result3, xmm_from3);
++      __ pxor(xmm_result4, xmm_from4);
++      __ pxor(xmm_result5, xmm_from5);
++
++      // store 6 results into the next 64 bytes of output
++      __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
++      __ movdqu(Address(to, pos, Address::times_1, 1 * AESBlockSize), xmm_result1);
++      __ movdqu(Address(to, pos, Address::times_1, 2 * AESBlockSize), xmm_result2);
++      __ movdqu(Address(to, pos, Address::times_1, 3 * AESBlockSize), xmm_result3);
++      __ movdqu(Address(to, pos, Address::times_1, 4 * AESBlockSize), xmm_result4);
++      __ movdqu(Address(to, pos, Address::times_1, 5 * AESBlockSize), xmm_result5);
++
++      __ addptr(pos, PARALLEL_FACTOR * AESBlockSize); // increase the length of crypt text
++      __ subptr(len_reg, PARALLEL_FACTOR * AESBlockSize); // decrease the remaining length
++      __ jmp(L_multiBlock_loopTop[k]);
++
++      // singleBlock starts here
++      __ align(OptoLoopAlignment);
++      __ BIND(L_singleBlockLoopTop[k]);
++      __ cmpptr(len_reg, 0);
++      __ jcc(Assembler::lessEqual, L_exit);
++      load_key(xmm_key_tmp0, key, 0x00, xmm_key_shuf_mask);
++      __ movdqa(xmm_result0, xmm_curr_counter);
++      inc_counter(rbx, xmm_curr_counter, 0x01, L__incCounter_single[k]);
++      __ pshufb(xmm_result0, xmm_counter_shuf_mask);
++      __ pxor(xmm_result0, xmm_key_tmp0);
++      for (int i = 1; i < rounds[k]; i++) {
++        load_key(xmm_key_tmp0, key, (0x10 * i), xmm_key_shuf_mask);
++        __ aesenc(xmm_result0, xmm_key_tmp0);
++      }
++      load_key(xmm_key_tmp0, key, (rounds[k] * 0x10), xmm_key_shuf_mask);
++      __ aesenclast(xmm_result0, xmm_key_tmp0);
++      __ cmpptr(len_reg, AESBlockSize);
++      __ jcc(Assembler::less, L_processTail_insr[k]);
++        __ movdqu(xmm_from0, Address(from, pos, Address::times_1, 0 * AESBlockSize));
++        __ pxor(xmm_result0, xmm_from0);
++        __ movdqu(Address(to, pos, Address::times_1, 0 * AESBlockSize), xmm_result0);
++        __ addptr(pos, AESBlockSize);
++        __ subptr(len_reg, AESBlockSize);
++        __ jmp(L_singleBlockLoopTop[k]);
++      __ BIND(L_processTail_insr[k]);                               // Process the tail part of the input array
++        __ addptr(pos, len_reg);                                    // 1. Insert bytes from src array into xmm_from0 register
++        __ testptr(len_reg, 8);
++        __ jcc(Assembler::zero, L_processTail_4_insr[k]);
++          __ subptr(pos,8);
++          __ pinsrq(xmm_from0, Address(from, pos), 0);
++        __ BIND(L_processTail_4_insr[k]);
++        __ testptr(len_reg, 4);
++        __ jcc(Assembler::zero, L_processTail_2_insr[k]);
++          __ subptr(pos,4);
++          __ pslldq(xmm_from0, 4);
++          __ pinsrd(xmm_from0, Address(from, pos), 0);
++        __ BIND(L_processTail_2_insr[k]);
++        __ testptr(len_reg, 2);
++        __ jcc(Assembler::zero, L_processTail_1_insr[k]);
++          __ subptr(pos, 2);
++          __ pslldq(xmm_from0, 2);
++          __ pinsrw(xmm_from0, Address(from, pos), 0);
++        __ BIND(L_processTail_1_insr[k]);
++        __ testptr(len_reg, 1);
++        __ jcc(Assembler::zero, L_processTail_exit_insr[k]);
++          __ subptr(pos, 1);
++          __ pslldq(xmm_from0, 1);
++          __ pinsrb(xmm_from0, Address(from, pos), 0);
++        __ BIND(L_processTail_exit_insr[k]);
++
++        __ movdqu(Address(saved_encCounter_start, 0), xmm_result0);  // 2. Perform pxor of the encrypted counter and plaintext Bytes.
++        __ pxor(xmm_result0, xmm_from0);                             //    Also the encrypted counter is saved for next invocation.
++
++        __ testptr(len_reg, 8);
++        __ jcc(Assembler::zero, L_processTail_4_extr[k]);            // 3. Extract bytes from xmm_result0 into the dest. array
++          __ pextrq(Address(to, pos), xmm_result0, 0);
++          __ psrldq(xmm_result0, 8);
++          __ addptr(pos, 8);
++        __ BIND(L_processTail_4_extr[k]);
++        __ testptr(len_reg, 4);
++        __ jcc(Assembler::zero, L_processTail_2_extr[k]);
++          __ pextrd(Address(to, pos), xmm_result0, 0);
++          __ psrldq(xmm_result0, 4);
++          __ addptr(pos, 4);
++        __ BIND(L_processTail_2_extr[k]);
++        __ testptr(len_reg, 2);
++        __ jcc(Assembler::zero, L_processTail_1_extr[k]);
++          __ pextrw(Address(to, pos), xmm_result0, 0);
++          __ psrldq(xmm_result0, 2);
++          __ addptr(pos, 2);
++        __ BIND(L_processTail_1_extr[k]);
++        __ testptr(len_reg, 1);
++        __ jcc(Assembler::zero, L_processTail_exit_extr[k]);
++          __ pextrb(Address(to, pos), xmm_result0, 0);
++
++        __ BIND(L_processTail_exit_extr[k]);
++        __ movl(Address(used_addr, 0), len_reg);
++        __ jmp(L_exit);
++
++    }
++
++    __ BIND(L_exit);
++    __ pshufb(xmm_curr_counter, xmm_counter_shuf_mask); //counter is shuffled back.
++    __ movdqu(Address(counter, 0), xmm_curr_counter); //save counter back
++    __ pop(rbx); // pop the saved RBX.
++#ifdef _WIN64
++    __ movl(rax, len_mem);
++    __ movptr(r13, Address(rsp, saved_r13_offset * wordSize));
++    __ movptr(r14, Address(rsp, saved_r14_offset * wordSize));
++    __ addptr(rsp, 2 * wordSize);
++#else
++    __ pop(rax); // return 'len'
++#endif
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);*/
++    return start;
++  }
++
++/*void roundDec(XMMRegister xmm_reg) {
++  __ vaesdec(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdec(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdec(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdec(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdec(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdec(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdec(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdec(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
++}*/
++
++/*void roundDeclast(XMMRegister xmm_reg) {
++  __ vaesdeclast(xmm1, xmm1, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdeclast(xmm2, xmm2, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdeclast(xmm3, xmm3, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdeclast(xmm4, xmm4, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdeclast(xmm5, xmm5, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdeclast(xmm6, xmm6, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdeclast(xmm7, xmm7, xmm_reg, Assembler::AVX_512bit);
++  __ vaesdeclast(xmm8, xmm8, xmm_reg, Assembler::AVX_512bit);
++}*/
++
++/*void ev_load_key(XMMRegister xmmdst, Register key, int offset, XMMRegister xmm_shuf_mask = NULL) {
++    __ movdqu(xmmdst, Address(key, offset));
++    if (xmm_shuf_mask != NULL) {
++      __ pshufb(xmmdst, xmm_shuf_mask);
++    } else {
++      __ pshufb(xmmdst, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
++    }
++    __ evshufi64x2(xmmdst, xmmdst, xmmdst, 0x0, Assembler::AVX_512bit);
++
++  }*/
++
++address generate_cipherBlockChaining_decryptVectorAESCrypt() {
++    //assert(VM_Version::supports_vaes(), "need AES instructions and misaligned SSE support");
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "cipherBlockChaining_decryptAESCrypt");ShouldNotReachHere();
++    address start = __ pc();/*
++
++    const Register from = c_rarg0;  // source array address
++    const Register to = c_rarg1;  // destination array address
++    const Register key = c_rarg2;  // key array address
++    const Register rvec = c_rarg3;  // r byte array initialized from initvector array address
++    // and left with the results of the last encryption block
++#ifndef _WIN64
++    const Register len_reg = c_rarg4;  // src len (must be multiple of blocksize 16)
++#else
++    const Address  len_mem(rbp, 6 * wordSize);  // length is on stack on Win64
++    const Register len_reg = r11;      // pick the volatile windows register
++#endif
++
++    Label Loop, Loop1, L_128, L_256, L_192, KEY_192, KEY_256, Loop2, Lcbc_dec_rem_loop,
++          Lcbc_dec_rem_last, Lcbc_dec_ret, Lcbc_dec_rem, Lcbc_exit;
++
++    __ enter();
++
++#ifdef _WIN64
++  // on win64, fill len_reg from stack position
++    __ movl(len_reg, len_mem);
++#else
++    __ push(len_reg); // Save
++#endif
++    __ push(rbx);
++    __ vzeroupper();
++
++    // Temporary variable declaration for swapping key bytes
++    const XMMRegister xmm_key_shuf_mask = xmm1;
++    __ movdqu(xmm_key_shuf_mask, ExternalAddress(StubRoutines::x86::key_shuffle_mask_addr()));
++
++    // Calculate number of rounds from key size: 44 for 10-rounds, 52 for 12-rounds, 60 for 14-rounds
++    const Register rounds = rbx;
++    __ movl(rounds, Address(key, arrayOopDesc::length_offset_in_bytes() - arrayOopDesc::base_offset_in_bytes(T_INT)));
++
++    const XMMRegister IV = xmm0;
++    // Load IV and broadcast value to 512-bits
++    __ evbroadcasti64x2(IV, Address(rvec, 0), Assembler::AVX_512bit);
++
++    // Temporary variables for storing round keys
++    const XMMRegister RK0 = xmm30;
++    const XMMRegister RK1 = xmm9;
++    const XMMRegister RK2 = xmm18;
++    const XMMRegister RK3 = xmm19;
++    const XMMRegister RK4 = xmm20;
++    const XMMRegister RK5 = xmm21;
++    const XMMRegister RK6 = xmm22;
++    const XMMRegister RK7 = xmm23;
++    const XMMRegister RK8 = xmm24;
++    const XMMRegister RK9 = xmm25;
++    const XMMRegister RK10 = xmm26;
++
++     // Load and shuffle key
++    // the java expanded key ordering is rotated one position from what we want
++    // so we start from 1*16 here and hit 0*16 last
++    ev_load_key(RK1, key, 1 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK2, key, 2 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK3, key, 3 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK4, key, 4 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK5, key, 5 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK6, key, 6 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK7, key, 7 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK8, key, 8 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK9, key, 9 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK10, key, 10 * 16, xmm_key_shuf_mask);
++    ev_load_key(RK0, key, 0*16, xmm_key_shuf_mask);
++
++    // Variables for storing source cipher text
++    const XMMRegister S0 = xmm10;
++    const XMMRegister S1 = xmm11;
++    const XMMRegister S2 = xmm12;
++    const XMMRegister S3 = xmm13;
++    const XMMRegister S4 = xmm14;
++    const XMMRegister S5 = xmm15;
++    const XMMRegister S6 = xmm16;
++    const XMMRegister S7 = xmm17;
++
++    // Variables for storing decrypted text
++    const XMMRegister B0 = xmm1;
++    const XMMRegister B1 = xmm2;
++    const XMMRegister B2 = xmm3;
++    const XMMRegister B3 = xmm4;
++    const XMMRegister B4 = xmm5;
++    const XMMRegister B5 = xmm6;
++    const XMMRegister B6 = xmm7;
++    const XMMRegister B7 = xmm8;
++
++    __ cmpl(rounds, 44);
++    __ jcc(Assembler::greater, KEY_192);
++    __ jmp(Loop);
++
++    __ BIND(KEY_192);
++    const XMMRegister RK11 = xmm27;
++    const XMMRegister RK12 = xmm28;
++    ev_load_key(RK11, key, 11*16, xmm_key_shuf_mask);
++    ev_load_key(RK12, key, 12*16, xmm_key_shuf_mask);
++
++    __ cmpl(rounds, 52);
++    __ jcc(Assembler::greater, KEY_256);
++    __ jmp(Loop);
++
++    __ BIND(KEY_256);
++    const XMMRegister RK13 = xmm29;
++    const XMMRegister RK14 = xmm31;
++    ev_load_key(RK13, key, 13*16, xmm_key_shuf_mask);
++    ev_load_key(RK14, key, 14*16, xmm_key_shuf_mask);
++
++    __ BIND(Loop);
++    __ cmpl(len_reg, 512);
++    __ jcc(Assembler::below, Lcbc_dec_rem);
++    __ BIND(Loop1);
++    __ subl(len_reg, 512);
++    __ evmovdquq(S0, Address(from, 0 * 64), Assembler::AVX_512bit);
++    __ evmovdquq(S1, Address(from, 1 * 64), Assembler::AVX_512bit);
++    __ evmovdquq(S2, Address(from, 2 * 64), Assembler::AVX_512bit);
++    __ evmovdquq(S3, Address(from, 3 * 64), Assembler::AVX_512bit);
++    __ evmovdquq(S4, Address(from, 4 * 64), Assembler::AVX_512bit);
++    __ evmovdquq(S5, Address(from, 5 * 64), Assembler::AVX_512bit);
++    __ evmovdquq(S6, Address(from, 6 * 64), Assembler::AVX_512bit);
++    __ evmovdquq(S7, Address(from, 7 * 64), Assembler::AVX_512bit);
++    __ leaq(from, Address(from, 8 * 64));
++
++    __ evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
++    __ evpxorq(B1, S1, RK1, Assembler::AVX_512bit);
++    __ evpxorq(B2, S2, RK1, Assembler::AVX_512bit);
++    __ evpxorq(B3, S3, RK1, Assembler::AVX_512bit);
++    __ evpxorq(B4, S4, RK1, Assembler::AVX_512bit);
++    __ evpxorq(B5, S5, RK1, Assembler::AVX_512bit);
++    __ evpxorq(B6, S6, RK1, Assembler::AVX_512bit);
++    __ evpxorq(B7, S7, RK1, Assembler::AVX_512bit);
++
++    __ evalignq(IV, S0, IV, 0x06);
++    __ evalignq(S0, S1, S0, 0x06);
++    __ evalignq(S1, S2, S1, 0x06);
++    __ evalignq(S2, S3, S2, 0x06);
++    __ evalignq(S3, S4, S3, 0x06);
++    __ evalignq(S4, S5, S4, 0x06);
++    __ evalignq(S5, S6, S5, 0x06);
++    __ evalignq(S6, S7, S6, 0x06);
++
++    roundDec(RK2);
++    roundDec(RK3);
++    roundDec(RK4);
++    roundDec(RK5);
++    roundDec(RK6);
++    roundDec(RK7);
++    roundDec(RK8);
++    roundDec(RK9);
++    roundDec(RK10);
++
++    __ cmpl(rounds, 44);
++    __ jcc(Assembler::belowEqual, L_128);
++    roundDec(RK11);
++    roundDec(RK12);
++
++    __ cmpl(rounds, 52);
++    __ jcc(Assembler::belowEqual, L_192);
++    roundDec(RK13);
++    roundDec(RK14);
++
++    __ BIND(L_256);
++    roundDeclast(RK0);
++    __ jmp(Loop2);
++
++    __ BIND(L_128);
++    roundDeclast(RK0);
++    __ jmp(Loop2);
++
++    __ BIND(L_192);
++    roundDeclast(RK0);
++
++    __ BIND(Loop2);
++    __ evpxorq(B0, B0, IV, Assembler::AVX_512bit);
++    __ evpxorq(B1, B1, S0, Assembler::AVX_512bit);
++    __ evpxorq(B2, B2, S1, Assembler::AVX_512bit);
++    __ evpxorq(B3, B3, S2, Assembler::AVX_512bit);
++    __ evpxorq(B4, B4, S3, Assembler::AVX_512bit);
++    __ evpxorq(B5, B5, S4, Assembler::AVX_512bit);
++    __ evpxorq(B6, B6, S5, Assembler::AVX_512bit);
++    __ evpxorq(B7, B7, S6, Assembler::AVX_512bit);
++    __ evmovdquq(IV, S7, Assembler::AVX_512bit);
++
++    __ evmovdquq(Address(to, 0 * 64), B0, Assembler::AVX_512bit);
++    __ evmovdquq(Address(to, 1 * 64), B1, Assembler::AVX_512bit);
++    __ evmovdquq(Address(to, 2 * 64), B2, Assembler::AVX_512bit);
++    __ evmovdquq(Address(to, 3 * 64), B3, Assembler::AVX_512bit);
++    __ evmovdquq(Address(to, 4 * 64), B4, Assembler::AVX_512bit);
++    __ evmovdquq(Address(to, 5 * 64), B5, Assembler::AVX_512bit);
++    __ evmovdquq(Address(to, 6 * 64), B6, Assembler::AVX_512bit);
++    __ evmovdquq(Address(to, 7 * 64), B7, Assembler::AVX_512bit);
++    __ leaq(to, Address(to, 8 * 64));
++    __ jmp(Loop);
++
++    __ BIND(Lcbc_dec_rem);
++    __ evshufi64x2(IV, IV, IV, 0x03, Assembler::AVX_512bit);
++
++    __ BIND(Lcbc_dec_rem_loop);
++    __ subl(len_reg, 16);
++    __ jcc(Assembler::carrySet, Lcbc_dec_ret);
++
++    __ movdqu(S0, Address(from, 0));
++    __ evpxorq(B0, S0, RK1, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK2, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK3, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK4, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK5, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK6, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK7, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK8, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK9, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK10, Assembler::AVX_512bit);
++    __ cmpl(rounds, 44);
++    __ jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
++
++    __ vaesdec(B0, B0, RK11, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK12, Assembler::AVX_512bit);
++    __ cmpl(rounds, 52);
++    __ jcc(Assembler::belowEqual, Lcbc_dec_rem_last);
++
++    __ vaesdec(B0, B0, RK13, Assembler::AVX_512bit);
++    __ vaesdec(B0, B0, RK14, Assembler::AVX_512bit);
++
++    __ BIND(Lcbc_dec_rem_last);
++    __ vaesdeclast(B0, B0, RK0, Assembler::AVX_512bit);
++
++    __ evpxorq(B0, B0, IV, Assembler::AVX_512bit);
++    __ evmovdquq(IV, S0, Assembler::AVX_512bit);
++    __ movdqu(Address(to, 0), B0);
++    __ leaq(from, Address(from, 16));
++    __ leaq(to, Address(to, 16));
++    __ jmp(Lcbc_dec_rem_loop);
++
++    __ BIND(Lcbc_dec_ret);
++    __ movdqu(Address(rvec, 0), IV);
++
++    // Zero out the round keys
++    __ evpxorq(RK0, RK0, RK0, Assembler::AVX_512bit);
++    __ evpxorq(RK1, RK1, RK1, Assembler::AVX_512bit);
++    __ evpxorq(RK2, RK2, RK2, Assembler::AVX_512bit);
++    __ evpxorq(RK3, RK3, RK3, Assembler::AVX_512bit);
++    __ evpxorq(RK4, RK4, RK4, Assembler::AVX_512bit);
++    __ evpxorq(RK5, RK5, RK5, Assembler::AVX_512bit);
++    __ evpxorq(RK6, RK6, RK6, Assembler::AVX_512bit);
++    __ evpxorq(RK7, RK7, RK7, Assembler::AVX_512bit);
++    __ evpxorq(RK8, RK8, RK8, Assembler::AVX_512bit);
++    __ evpxorq(RK9, RK9, RK9, Assembler::AVX_512bit);
++    __ evpxorq(RK10, RK10, RK10, Assembler::AVX_512bit);
++    __ cmpl(rounds, 44);
++    __ jcc(Assembler::belowEqual, Lcbc_exit);
++    __ evpxorq(RK11, RK11, RK11, Assembler::AVX_512bit);
++    __ evpxorq(RK12, RK12, RK12, Assembler::AVX_512bit);
++    __ cmpl(rounds, 52);
++    __ jcc(Assembler::belowEqual, Lcbc_exit);
++    __ evpxorq(RK13, RK13, RK13, Assembler::AVX_512bit);
++    __ evpxorq(RK14, RK14, RK14, Assembler::AVX_512bit);
++
++    __ BIND(Lcbc_exit);
++    __ pop(rbx);
++#ifdef _WIN64
++    __ movl(rax, len_mem);
++#else
++    __ pop(rax); // return length
++#endif
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);*/
++    return start;
++}
++
++  // byte swap x86 long
++  address generate_ghash_long_swap_mask() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "ghash_long_swap_mask");
++    address start = __ pc();ShouldNotReachHere();
++//    __ emit_data64(0x0f0e0d0c0b0a0908, relocInfo::none );
++//    __ emit_data64(0x0706050403020100, relocInfo::none );
++  return start;
++  }
++
++  // byte swap x86 byte array
++  address generate_ghash_byte_swap_mask() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "ghash_byte_swap_mask");
++    address start = __ pc();ShouldNotReachHere();
++//    __ emit_data64(0x08090a0b0c0d0e0f, relocInfo::none );
++//    __ emit_data64(0x0001020304050607, relocInfo::none );
++  return start;
++  }
++
++  /* Single and multi-block ghash operations */
++  address generate_ghash_processBlocks() {
++    __ align(CodeEntryAlignment);
++    Label L_ghash_loop, L_exit;
++    StubCodeMark mark(this, "StubRoutines", "ghash_processBlocks");ShouldNotReachHere();
++    address start = __ pc();/*
++
++    const Register state        = c_rarg0;
++    const Register subkeyH      = c_rarg1;
++    const Register data         = c_rarg2;
++    const Register blocks       = c_rarg3;
++
++    const XMMRegister xmm_temp0 = xmm0;
++    const XMMRegister xmm_temp1 = xmm1;
++    const XMMRegister xmm_temp2 = xmm2;
++    const XMMRegister xmm_temp3 = xmm3;
++    const XMMRegister xmm_temp4 = xmm4;
++    const XMMRegister xmm_temp5 = xmm5;
++    const XMMRegister xmm_temp6 = xmm6;
++    const XMMRegister xmm_temp7 = xmm7;
++    const XMMRegister xmm_temp8 = xmm8;
++    const XMMRegister xmm_temp9 = xmm9;
++    const XMMRegister xmm_temp10 = xmm10;
++
++    __ enter();
++
++    __ movdqu(xmm_temp10, ExternalAddress(StubRoutines::x86::ghash_long_swap_mask_addr()));
++
++    __ movdqu(xmm_temp0, Address(state, 0));
++    __ pshufb(xmm_temp0, xmm_temp10);
++
++
++    __ BIND(L_ghash_loop);
++    __ movdqu(xmm_temp2, Address(data, 0));
++    __ pshufb(xmm_temp2, ExternalAddress(StubRoutines::x86::ghash_byte_swap_mask_addr()));
++
++    __ movdqu(xmm_temp1, Address(subkeyH, 0));
++    __ pshufb(xmm_temp1, xmm_temp10);
++
++    __ pxor(xmm_temp0, xmm_temp2);
++
++    //
++    // Multiply with the hash key
++    //
++    __ movdqu(xmm_temp3, xmm_temp0);
++    __ pclmulqdq(xmm_temp3, xmm_temp1, 0);      // xmm3 holds a0*b0
++    __ movdqu(xmm_temp4, xmm_temp0);
++    __ pclmulqdq(xmm_temp4, xmm_temp1, 16);     // xmm4 holds a0*b1
++
++    __ movdqu(xmm_temp5, xmm_temp0);
++    __ pclmulqdq(xmm_temp5, xmm_temp1, 1);      // xmm5 holds a1*b0
++    __ movdqu(xmm_temp6, xmm_temp0);
++    __ pclmulqdq(xmm_temp6, xmm_temp1, 17);     // xmm6 holds a1*b1
++
++    __ pxor(xmm_temp4, xmm_temp5);      // xmm4 holds a0*b1 + a1*b0
++
++    __ movdqu(xmm_temp5, xmm_temp4);    // move the contents of xmm4 to xmm5
++    __ psrldq(xmm_temp4, 8);    // shift by xmm4 64 bits to the right
++    __ pslldq(xmm_temp5, 8);    // shift by xmm5 64 bits to the left
++    __ pxor(xmm_temp3, xmm_temp5);
++    __ pxor(xmm_temp6, xmm_temp4);      // Register pair <xmm6:xmm3> holds the result
++                                        // of the carry-less multiplication of
++                                        // xmm0 by xmm1.
++
++    // We shift the result of the multiplication by one bit position
++    // to the left to cope for the fact that the bits are reversed.
++    __ movdqu(xmm_temp7, xmm_temp3);
++    __ movdqu(xmm_temp8, xmm_temp6);
++    __ pslld(xmm_temp3, 1);
++    __ pslld(xmm_temp6, 1);
++    __ psrld(xmm_temp7, 31);
++    __ psrld(xmm_temp8, 31);
++    __ movdqu(xmm_temp9, xmm_temp7);
++    __ pslldq(xmm_temp8, 4);
++    __ pslldq(xmm_temp7, 4);
++    __ psrldq(xmm_temp9, 12);
++    __ por(xmm_temp3, xmm_temp7);
++    __ por(xmm_temp6, xmm_temp8);
++    __ por(xmm_temp6, xmm_temp9);
++
++    //
++    // First phase of the reduction
++    //
++    // Move xmm3 into xmm7, xmm8, xmm9 in order to perform the shifts
++    // independently.
++    __ movdqu(xmm_temp7, xmm_temp3);
++    __ movdqu(xmm_temp8, xmm_temp3);
++    __ movdqu(xmm_temp9, xmm_temp3);
++    __ pslld(xmm_temp7, 31);    // packed right shift shifting << 31
++    __ pslld(xmm_temp8, 30);    // packed right shift shifting << 30
++    __ pslld(xmm_temp9, 25);    // packed right shift shifting << 25
++    __ pxor(xmm_temp7, xmm_temp8);      // xor the shifted versions
++    __ pxor(xmm_temp7, xmm_temp9);
++    __ movdqu(xmm_temp8, xmm_temp7);
++    __ pslldq(xmm_temp7, 12);
++    __ psrldq(xmm_temp8, 4);
++    __ pxor(xmm_temp3, xmm_temp7);      // first phase of the reduction complete
++
++    //
++    // Second phase of the reduction
++    //
++    // Make 3 copies of xmm3 in xmm2, xmm4, xmm5 for doing these
++    // shift operations.
++    __ movdqu(xmm_temp2, xmm_temp3);
++    __ movdqu(xmm_temp4, xmm_temp3);
++    __ movdqu(xmm_temp5, xmm_temp3);
++    __ psrld(xmm_temp2, 1);     // packed left shifting >> 1
++    __ psrld(xmm_temp4, 2);     // packed left shifting >> 2
++    __ psrld(xmm_temp5, 7);     // packed left shifting >> 7
++    __ pxor(xmm_temp2, xmm_temp4);      // xor the shifted versions
++    __ pxor(xmm_temp2, xmm_temp5);
++    __ pxor(xmm_temp2, xmm_temp8);
++    __ pxor(xmm_temp3, xmm_temp2);
++    __ pxor(xmm_temp6, xmm_temp3);      // the result is in xmm6
++
++    __ decrement(blocks);
++    __ jcc(Assembler::zero, L_exit);
++    __ movdqu(xmm_temp0, xmm_temp6);
++    __ addptr(data, 16);
++    __ jmp(L_ghash_loop);
++
++    __ BIND(L_exit);
++    __ pshufb(xmm_temp6, xmm_temp10);          // Byte swap 16-byte result
++    __ movdqu(Address(state, 0), xmm_temp6);   // store the result
++    __ leave();
++    __ ret(0);*/
++    return start;
++  }
++
++  //base64 character set
++  address base64_charset_addr() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "base64_charset");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ emit_data64(0x0000004200000041, relocInfo::none);
++    __ emit_data64(0x0000004400000043, relocInfo::none);
++    __ emit_data64(0x0000004600000045, relocInfo::none);
++    __ emit_data64(0x0000004800000047, relocInfo::none);
++    __ emit_data64(0x0000004a00000049, relocInfo::none);
++    __ emit_data64(0x0000004c0000004b, relocInfo::none);
++    __ emit_data64(0x0000004e0000004d, relocInfo::none);
++    __ emit_data64(0x000000500000004f, relocInfo::none);
++    __ emit_data64(0x0000005200000051, relocInfo::none);
++    __ emit_data64(0x0000005400000053, relocInfo::none);
++    __ emit_data64(0x0000005600000055, relocInfo::none);
++    __ emit_data64(0x0000005800000057, relocInfo::none);
++    __ emit_data64(0x0000005a00000059, relocInfo::none);
++    __ emit_data64(0x0000006200000061, relocInfo::none);
++    __ emit_data64(0x0000006400000063, relocInfo::none);
++    __ emit_data64(0x0000006600000065, relocInfo::none);
++    __ emit_data64(0x0000006800000067, relocInfo::none);
++    __ emit_data64(0x0000006a00000069, relocInfo::none);
++    __ emit_data64(0x0000006c0000006b, relocInfo::none);
++    __ emit_data64(0x0000006e0000006d, relocInfo::none);
++    __ emit_data64(0x000000700000006f, relocInfo::none);
++    __ emit_data64(0x0000007200000071, relocInfo::none);
++    __ emit_data64(0x0000007400000073, relocInfo::none);
++    __ emit_data64(0x0000007600000075, relocInfo::none);
++    __ emit_data64(0x0000007800000077, relocInfo::none);
++    __ emit_data64(0x0000007a00000079, relocInfo::none);
++    __ emit_data64(0x0000003100000030, relocInfo::none);
++    __ emit_data64(0x0000003300000032, relocInfo::none);
++    __ emit_data64(0x0000003500000034, relocInfo::none);
++    __ emit_data64(0x0000003700000036, relocInfo::none);
++    __ emit_data64(0x0000003900000038, relocInfo::none);
++    __ emit_data64(0x0000002f0000002b, relocInfo::none);*/
++    return start;
++  }
++
++  //base64 url character set
++  address base64url_charset_addr() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "base64url_charset");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ emit_data64(0x0000004200000041, relocInfo::none);
++    __ emit_data64(0x0000004400000043, relocInfo::none);
++    __ emit_data64(0x0000004600000045, relocInfo::none);
++    __ emit_data64(0x0000004800000047, relocInfo::none);
++    __ emit_data64(0x0000004a00000049, relocInfo::none);
++    __ emit_data64(0x0000004c0000004b, relocInfo::none);
++    __ emit_data64(0x0000004e0000004d, relocInfo::none);
++    __ emit_data64(0x000000500000004f, relocInfo::none);
++    __ emit_data64(0x0000005200000051, relocInfo::none);
++    __ emit_data64(0x0000005400000053, relocInfo::none);
++    __ emit_data64(0x0000005600000055, relocInfo::none);
++    __ emit_data64(0x0000005800000057, relocInfo::none);
++    __ emit_data64(0x0000005a00000059, relocInfo::none);
++    __ emit_data64(0x0000006200000061, relocInfo::none);
++    __ emit_data64(0x0000006400000063, relocInfo::none);
++    __ emit_data64(0x0000006600000065, relocInfo::none);
++    __ emit_data64(0x0000006800000067, relocInfo::none);
++    __ emit_data64(0x0000006a00000069, relocInfo::none);
++    __ emit_data64(0x0000006c0000006b, relocInfo::none);
++    __ emit_data64(0x0000006e0000006d, relocInfo::none);
++    __ emit_data64(0x000000700000006f, relocInfo::none);
++    __ emit_data64(0x0000007200000071, relocInfo::none);
++    __ emit_data64(0x0000007400000073, relocInfo::none);
++    __ emit_data64(0x0000007600000075, relocInfo::none);
++    __ emit_data64(0x0000007800000077, relocInfo::none);
++    __ emit_data64(0x0000007a00000079, relocInfo::none);
++    __ emit_data64(0x0000003100000030, relocInfo::none);
++    __ emit_data64(0x0000003300000032, relocInfo::none);
++    __ emit_data64(0x0000003500000034, relocInfo::none);
++    __ emit_data64(0x0000003700000036, relocInfo::none);
++    __ emit_data64(0x0000003900000038, relocInfo::none);
++    __ emit_data64(0x0000005f0000002d, relocInfo::none);
++*/
++    return start;
++  }
++
++  address base64_bswap_mask_addr() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "bswap_mask_base64");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ emit_data64(0x0504038002010080, relocInfo::none);
++    __ emit_data64(0x0b0a098008070680, relocInfo::none);
++    __ emit_data64(0x0908078006050480, relocInfo::none);
++    __ emit_data64(0x0f0e0d800c0b0a80, relocInfo::none);
++    __ emit_data64(0x0605048003020180, relocInfo::none);
++    __ emit_data64(0x0c0b0a8009080780, relocInfo::none);
++    __ emit_data64(0x0504038002010080, relocInfo::none);
++    __ emit_data64(0x0b0a098008070680, relocInfo::none);
++*/
++    return start;
++  }
++
++  address base64_right_shift_mask_addr() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "right_shift_mask");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++    __ emit_data64(0x0006000400020000, relocInfo::none);
++*/
++    return start;
++  }
++
++  address base64_left_shift_mask_addr() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "left_shift_mask");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++    __ emit_data64(0x0000000200040000, relocInfo::none);
++*/
++    return start;
++  }
++
++  address base64_and_mask_addr() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "and_mask");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);
++    __ emit_data64(0x3f003f003f000000, relocInfo::none);*/
++    return start;
++  }
++
++  address base64_gather_mask_addr() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "gather_mask");ShouldNotReachHere();
++    address start = __ pc();
++//    __ emit_data64(0xffffffffffffffff, relocInfo::none);
++    return start;
++  }
++
++// Code for generating Base64 encoding.
++// Intrinsic function prototype in Base64.java:
++// private void encodeBlock(byte[] src, int sp, int sl, byte[] dst, int dp, boolean isURL) {
++  address generate_base64_encodeBlock() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "implEncode");ShouldNotReachHere();
++    address start = __ pc();/*
++    __ enter();
++
++    // Save callee-saved registers before using them
++    __ push(r12);
++    __ push(r13);
++    __ push(r14);
++    __ push(r15);
++
++    // arguments
++    const Register source = c_rarg0; // Source Array
++    const Register start_offset = c_rarg1; // start offset
++    const Register end_offset = c_rarg2; // end offset
++    const Register dest = c_rarg3; // destination array
++
++#ifndef _WIN64
++    const Register dp = c_rarg4;  // Position for writing to dest array
++    const Register isURL = c_rarg5;// Base64 or URL character set
++#else
++    const Address  dp_mem(rbp, 6 * wordSize);  // length is on stack on Win64
++    const Address isURL_mem(rbp, 7 * wordSize);
++    const Register isURL = r10;      // pick the volatile windows register
++    const Register dp = r12;
++    __ movl(dp, dp_mem);
++    __ movl(isURL, isURL_mem);
++#endif
++
++    const Register length = r14;
++    Label L_process80, L_process32, L_process3, L_exit, L_processdata;
++
++    // calculate length from offsets
++    __ movl(length, end_offset);
++    __ subl(length, start_offset);
++    __ cmpl(length, 0);
++    __ jcc(Assembler::lessEqual, L_exit);
++
++    __ lea(r11, ExternalAddress(StubRoutines::x86::base64_charset_addr()));
++    // check if base64 charset(isURL=0) or base64 url charset(isURL=1) needs to be loaded
++    __ cmpl(isURL, 0);
++    __ jcc(Assembler::equal, L_processdata);
++    __ lea(r11, ExternalAddress(StubRoutines::x86::base64url_charset_addr()));
++
++    // load masks required for encoding data
++    __ BIND(L_processdata);
++    __ movdqu(xmm16, ExternalAddress(StubRoutines::x86::base64_gather_mask_addr()));
++    // Set 64 bits of K register.
++    __ evpcmpeqb(k3, xmm16, xmm16, Assembler::AVX_512bit);
++    __ evmovdquq(xmm12, ExternalAddress(StubRoutines::x86::base64_bswap_mask_addr()), Assembler::AVX_256bit, r13);
++    __ evmovdquq(xmm13, ExternalAddress(StubRoutines::x86::base64_right_shift_mask_addr()), Assembler::AVX_512bit, r13);
++    __ evmovdquq(xmm14, ExternalAddress(StubRoutines::x86::base64_left_shift_mask_addr()), Assembler::AVX_512bit, r13);
++    __ evmovdquq(xmm15, ExternalAddress(StubRoutines::x86::base64_and_mask_addr()), Assembler::AVX_512bit, r13);
++
++    // Vector Base64 implementation, producing 96 bytes of encoded data
++    __ BIND(L_process80);
++    __ cmpl(length, 80);
++    __ jcc(Assembler::below, L_process32);
++    __ evmovdquq(xmm0, Address(source, start_offset, Address::times_1, 0), Assembler::AVX_256bit);
++    __ evmovdquq(xmm1, Address(source, start_offset, Address::times_1, 24), Assembler::AVX_256bit);
++    __ evmovdquq(xmm2, Address(source, start_offset, Address::times_1, 48), Assembler::AVX_256bit);
++
++    //permute the input data in such a manner that we have continuity of the source
++    __ vpermq(xmm3, xmm0, 148, Assembler::AVX_256bit);
++    __ vpermq(xmm4, xmm1, 148, Assembler::AVX_256bit);
++    __ vpermq(xmm5, xmm2, 148, Assembler::AVX_256bit);
++
++    //shuffle input and group 3 bytes of data and to it add 0 as the 4th byte.
++    //we can deal with 12 bytes at a time in a 128 bit register
++    __ vpshufb(xmm3, xmm3, xmm12, Assembler::AVX_256bit);
++    __ vpshufb(xmm4, xmm4, xmm12, Assembler::AVX_256bit);
++    __ vpshufb(xmm5, xmm5, xmm12, Assembler::AVX_256bit);
++
++    //convert byte to word. Each 128 bit register will have 6 bytes for processing
++    __ vpmovzxbw(xmm3, xmm3, Assembler::AVX_512bit);
++    __ vpmovzxbw(xmm4, xmm4, Assembler::AVX_512bit);
++    __ vpmovzxbw(xmm5, xmm5, Assembler::AVX_512bit);
++
++    // Extract bits in the following pattern 6, 4+2, 2+4, 6 to convert 3, 8 bit numbers to 4, 6 bit numbers
++    __ evpsrlvw(xmm0, xmm3, xmm13,  Assembler::AVX_512bit);
++    __ evpsrlvw(xmm1, xmm4, xmm13, Assembler::AVX_512bit);
++    __ evpsrlvw(xmm2, xmm5, xmm13, Assembler::AVX_512bit);
++
++    __ evpsllvw(xmm3, xmm3, xmm14, Assembler::AVX_512bit);
++    __ evpsllvw(xmm4, xmm4, xmm14, Assembler::AVX_512bit);
++    __ evpsllvw(xmm5, xmm5, xmm14, Assembler::AVX_512bit);
++
++    __ vpsrlq(xmm0, xmm0, 8, Assembler::AVX_512bit);
++    __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit);
++    __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit);
++
++    __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit);
++    __ vpsllq(xmm4, xmm4, 8, Assembler::AVX_512bit);
++    __ vpsllq(xmm5, xmm5, 8, Assembler::AVX_512bit);
++
++    __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit);
++    __ vpandq(xmm4, xmm4, xmm15, Assembler::AVX_512bit);
++    __ vpandq(xmm5, xmm5, xmm15, Assembler::AVX_512bit);
++
++    // Get the final 4*6 bits base64 encoding
++    __ vporq(xmm3, xmm3, xmm0, Assembler::AVX_512bit);
++    __ vporq(xmm4, xmm4, xmm1, Assembler::AVX_512bit);
++    __ vporq(xmm5, xmm5, xmm2, Assembler::AVX_512bit);
++
++    // Shift
++    __ vpsrlq(xmm3, xmm3, 8, Assembler::AVX_512bit);
++    __ vpsrlq(xmm4, xmm4, 8, Assembler::AVX_512bit);
++    __ vpsrlq(xmm5, xmm5, 8, Assembler::AVX_512bit);
++
++    // look up 6 bits in the base64 character set to fetch the encoding
++    // we are converting word to dword as gather instructions need dword indices for looking up encoding
++    __ vextracti64x4(xmm6, xmm3, 0);
++    __ vpmovzxwd(xmm0, xmm6, Assembler::AVX_512bit);
++    __ vextracti64x4(xmm6, xmm3, 1);
++    __ vpmovzxwd(xmm1, xmm6, Assembler::AVX_512bit);
++
++    __ vextracti64x4(xmm6, xmm4, 0);
++    __ vpmovzxwd(xmm2, xmm6, Assembler::AVX_512bit);
++    __ vextracti64x4(xmm6, xmm4, 1);
++    __ vpmovzxwd(xmm3, xmm6, Assembler::AVX_512bit);
++
++    __ vextracti64x4(xmm4, xmm5, 0);
++    __ vpmovzxwd(xmm6, xmm4, Assembler::AVX_512bit);
++
++    __ vextracti64x4(xmm4, xmm5, 1);
++    __ vpmovzxwd(xmm7, xmm4, Assembler::AVX_512bit);
++
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm4, k2, Address(r11, xmm0, Address::times_4, 0), Assembler::AVX_512bit);
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm5, k2, Address(r11, xmm1, Address::times_4, 0), Assembler::AVX_512bit);
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm8, k2, Address(r11, xmm2, Address::times_4, 0), Assembler::AVX_512bit);
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm9, k2, Address(r11, xmm3, Address::times_4, 0), Assembler::AVX_512bit);
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm10, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit);
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm11, k2, Address(r11, xmm7, Address::times_4, 0), Assembler::AVX_512bit);
++
++    //Down convert dword to byte. Final output is 16*6 = 96 bytes long
++    __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm4, Assembler::AVX_512bit);
++    __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm5, Assembler::AVX_512bit);
++    __ evpmovdb(Address(dest, dp, Address::times_1, 32), xmm8, Assembler::AVX_512bit);
++    __ evpmovdb(Address(dest, dp, Address::times_1, 48), xmm9, Assembler::AVX_512bit);
++    __ evpmovdb(Address(dest, dp, Address::times_1, 64), xmm10, Assembler::AVX_512bit);
++    __ evpmovdb(Address(dest, dp, Address::times_1, 80), xmm11, Assembler::AVX_512bit);
++
++    __ addq(dest, 96);
++    __ addq(source, 72);
++    __ subq(length, 72);
++    __ jmp(L_process80);
++
++    // Vector Base64 implementation generating 32 bytes of encoded data
++    __ BIND(L_process32);
++    __ cmpl(length, 32);
++    __ jcc(Assembler::below, L_process3);
++    __ evmovdquq(xmm0, Address(source, start_offset), Assembler::AVX_256bit);
++    __ vpermq(xmm0, xmm0, 148, Assembler::AVX_256bit);
++    __ vpshufb(xmm6, xmm0, xmm12, Assembler::AVX_256bit);
++    __ vpmovzxbw(xmm6, xmm6, Assembler::AVX_512bit);
++    __ evpsrlvw(xmm2, xmm6, xmm13, Assembler::AVX_512bit);
++    __ evpsllvw(xmm3, xmm6, xmm14, Assembler::AVX_512bit);
++
++    __ vpsrlq(xmm2, xmm2, 8, Assembler::AVX_512bit);
++    __ vpsllq(xmm3, xmm3, 8, Assembler::AVX_512bit);
++    __ vpandq(xmm3, xmm3, xmm15, Assembler::AVX_512bit);
++    __ vporq(xmm1, xmm2, xmm3, Assembler::AVX_512bit);
++    __ vpsrlq(xmm1, xmm1, 8, Assembler::AVX_512bit);
++    __ vextracti64x4(xmm9, xmm1, 0);
++    __ vpmovzxwd(xmm6, xmm9, Assembler::AVX_512bit);
++    __ vextracti64x4(xmm9, xmm1, 1);
++    __ vpmovzxwd(xmm5, xmm9,  Assembler::AVX_512bit);
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm8, k2, Address(r11, xmm6, Address::times_4, 0), Assembler::AVX_512bit);
++    __ kmovql(k2, k3);
++    __ evpgatherdd(xmm10, k2, Address(r11, xmm5, Address::times_4, 0), Assembler::AVX_512bit);
++    __ evpmovdb(Address(dest, dp, Address::times_1, 0), xmm8, Assembler::AVX_512bit);
++    __ evpmovdb(Address(dest, dp, Address::times_1, 16), xmm10, Assembler::AVX_512bit);
++    __ subq(length, 24);
++    __ addq(dest, 32);
++    __ addq(source, 24);
++    __ jmp(L_process32);
++*/
++    // Scalar data processing takes 3 bytes at a time and produces 4 bytes of encoded data
++    /* This code corresponds to the scalar version of the following snippet in Base64.java
++    ** int bits = (src[sp0++] & 0xff) << 16 |(src[sp0++] & 0xff) << 8 |(src[sp0++] & 0xff);
++    ** dst[dp0++] = (byte)base64[(bits >> > 18) & 0x3f];
++    ** dst[dp0++] = (byte)base64[(bits >> > 12) & 0x3f];
++    ** dst[dp0++] = (byte)base64[(bits >> > 6) & 0x3f];
++    ** dst[dp0++] = (byte)base64[bits & 0x3f];*//*
++    __ BIND(L_process3);
++    __ cmpl(length, 3);
++    __ jcc(Assembler::below, L_exit);
++    // Read 1 byte at a time
++    __ movzbl(rax, Address(source, start_offset));
++    __ shll(rax, 0x10);
++    __ movl(r15, rax);
++    __ movzbl(rax, Address(source, start_offset, Address::times_1, 1));
++    __ shll(rax, 0x8);
++    __ movzwl(rax, rax);
++    __ orl(r15, rax);
++    __ movzbl(rax, Address(source, start_offset, Address::times_1, 2));
++    __ orl(rax, r15);
++    // Save 3 bytes read in r15
++    __ movl(r15, rax);
++    __ shrl(rax, 0x12);
++    __ andl(rax, 0x3f);
++    // rax contains the index, r11 contains base64 lookup table
++    __ movb(rax, Address(r11, rax, Address::times_4));
++    // Write the encoded byte to destination
++    __ movb(Address(dest, dp, Address::times_1, 0), rax);
++    __ movl(rax, r15);
++    __ shrl(rax, 0xc);
++    __ andl(rax, 0x3f);
++    __ movb(rax, Address(r11, rax, Address::times_4));
++    __ movb(Address(dest, dp, Address::times_1, 1), rax);
++    __ movl(rax, r15);
++    __ shrl(rax, 0x6);
++    __ andl(rax, 0x3f);
++    __ movb(rax, Address(r11, rax, Address::times_4));
++    __ movb(Address(dest, dp, Address::times_1, 2), rax);
++    __ movl(rax, r15);
++    __ andl(rax, 0x3f);
++    __ movb(rax, Address(r11, rax, Address::times_4));
++    __ movb(Address(dest, dp, Address::times_1, 3), rax);
++    __ subl(length, 3);
++    __ addq(dest, 4);
++    __ addq(source, 3);
++    __ jmp(L_process3);
++    __ BIND(L_exit);
++    __ pop(r15);
++    __ pop(r14);
++    __ pop(r13);
++    __ pop(r12);
++    __ leave();
++    __ ret(0);*/
++    return start;
++  }
++
++  /**
++   *  Arguments:
++   *
++   * Inputs:
++   *   c_rarg0   - int crc
++   *   c_rarg1   - byte* buf
++   *   c_rarg2   - int length
++   *
++   * Ouput:
++   *       rax   - int crc result
++   */
++  address generate_updateBytesCRC32() {
++    assert(UseCRC32Intrinsics, "need AVX and CLMUL instructions");
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32");ShouldNotReachHere();
++
++    address start = __ pc();
++    __ stop("generate_updateBytesCRC32");
++    /*
++    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
++    // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
++    // rscratch1: r10
++    const Register crc   = c_rarg0;  // crc
++    const Register buf   = c_rarg1;  // source java byte array address
++    const Register len   = c_rarg2;  // length
++    const Register table = c_rarg3;  // crc_table address (reuse register)
++    const Register tmp   = r11;
++    assert_different_registers(crc, buf, len, table, tmp, rax);
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ kernel_crc32(crc, buf, len, table, tmp);
++
++    __ movl(rax, crc);
++    __ vzeroupper();
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++  }
++
++  /**
++  *  Arguments:
++  *
++  * Inputs:
++  *   c_rarg0   - int crc
++  *   c_rarg1   - byte* buf
++  *   c_rarg2   - long length
++  *   c_rarg3   - table_start - optional (present only when doing a library_call,
++  *              not used by x86 algorithm)
++  *
++  * Ouput:
++  *       rax   - int crc result
++  */
++  address generate_updateBytesCRC32C(bool is_pclmulqdq_supported) {
++      assert(UseCRC32CIntrinsics, "need SSE4_2");
++      __ align(CodeEntryAlignment);
++      StubCodeMark mark(this, "StubRoutines", "updateBytesCRC32C");ShouldNotReachHere();
++      address start = __ pc();
++      __ stop("generate_updateBytesCRC32C");
++      /*
++      //reg.arg        int#0        int#1        int#2        int#3        int#4        int#5        float regs
++      //Windows        RCX          RDX          R8           R9           none         none         XMM0..XMM3
++      //Lin / Sol      RDI          RSI          RDX          RCX          R8           R9           XMM0..XMM7
++      const Register crc = c_rarg0;  // crc
++      const Register buf = c_rarg1;  // source java byte array address
++      const Register len = c_rarg2;  // length
++      const Register a = rax;
++      const Register j = r9;
++      const Register k = r10;
++      const Register l = r11;
++#ifdef _WIN64
++      const Register y = rdi;
++      const Register z = rsi;
++#else
++      const Register y = rcx;
++      const Register z = r8;
++#endif
++      assert_different_registers(crc, buf, len, a, j, k, l, y, z);
++
++      BLOCK_COMMENT("Entry:");
++      __ enter(); // required for proper stackwalking of RuntimeStub frame
++#ifdef _WIN64
++      __ push(y);
++      __ push(z);
++#endif
++      __ crc32c_ipl_alg2_alt2(crc, buf, len,
++                              a, j, k,
++                              l, y, z,
++                              c_farg0, c_farg1, c_farg2,
++                              is_pclmulqdq_supported);
++      __ movl(rax, crc);
++#ifdef _WIN64
++      __ pop(z);
++      __ pop(y);
++#endif
++      __ vzeroupper();
++      __ leave(); // required for proper stackwalking of RuntimeStub frame
++      __ ret(0);
++*/
++      return start;
++  }
++
++  /**
++   *  Arguments:
++   *
++   *  Input:
++   *    c_rarg0   - x address
++   *    c_rarg1   - x length
++   *    c_rarg2   - y address
++   *    c_rarg3   - y length
++   * not Win64
++   *    c_rarg4   - z address
++   *    c_rarg5   - z length
++   * Win64
++   *    rsp+40    - z address
++   *    rsp+48    - z length
++   */
++  address generate_multiplyToLen() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "multiplyToLen");ShouldNotReachHere();
++
++    address start = __ pc();
++    __ stop("generate_multiplyToLen");
++    /*
++    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
++    // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
++    const Register x     = rdi;
++    const Register xlen  = rax;
++    const Register y     = rsi;
++    const Register ylen  = rcx;
++    const Register z     = r8;
++    const Register zlen  = r11;
++
++    // Next registers will be saved on stack in multiply_to_len().
++    const Register tmp1  = r12;
++    const Register tmp2  = r13;
++    const Register tmp3  = r14;
++    const Register tmp4  = r15;
++    const Register tmp5  = rbx;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifndef _WIN64
++    __ movptr(zlen, r9); // Save r9 in r11 - zlen
++#endif
++    setup_arg_regs(4); // x => rdi, xlen => rsi, y => rdx
++                       // ylen => rcx, z => r8, zlen => r11
++                       // r9 and r10 may be used to save non-volatile registers
++#ifdef _WIN64
++    // last 2 arguments (#4, #5) are on stack on Win64
++    __ movptr(z, Address(rsp, 6 * wordSize));
++    __ movptr(zlen, Address(rsp, 7 * wordSize));
++#endif
++
++    __ movptr(xlen, rsi);
++    __ movptr(y,    rdx);
++    __ multiply_to_len(x, xlen, y, ylen, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5);
++
++    restore_arg_regs();
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++  }
++
++  /**
++  *  Arguments:
++  *
++  *  Input:
++  *    c_rarg0   - obja     address
++  *    c_rarg1   - objb     address
++  *    c_rarg3   - length   length
++  *    c_rarg4   - scale    log2_array_indxscale
++  *
++  *  Output:
++  *        rax   - int >= mismatched index, < 0 bitwise complement of tail
++  */
++  address generate_vectorizedMismatch() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "vectorizedMismatch");ShouldNotReachHere();
++    address start = __ pc();/*
++
++    BLOCK_COMMENT("Entry:");
++    __ enter();
++
++#ifdef _WIN64  // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
++    const Register scale = c_rarg0;  //rcx, will exchange with r9
++    const Register objb = c_rarg1;   //rdx
++    const Register length = c_rarg2; //r8
++    const Register obja = c_rarg3;   //r9
++    __ xchgq(obja, scale);  //now obja and scale contains the correct contents
++
++    const Register tmp1 = r10;
++    const Register tmp2 = r11;
++#endif
++#ifndef _WIN64 // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
++    const Register obja = c_rarg0;   //U:rdi
++    const Register objb = c_rarg1;   //U:rsi
++    const Register length = c_rarg2; //U:rdx
++    const Register scale = c_rarg3;  //U:rcx
++    const Register tmp1 = r8;
++    const Register tmp2 = r9;
++#endif
++    const Register result = rax; //return value
++    const XMMRegister vec0 = xmm0;
++    const XMMRegister vec1 = xmm1;
++    const XMMRegister vec2 = xmm2;
++
++    __ vectorized_mismatch(obja, objb, length, scale, result, tmp1, tmp2, vec0, vec1, vec2);
++
++    __ vzeroupper();
++    __ leave();
++    __ ret(0);
++*/
++    return start;
++  }
++
++/**
++   *  Arguments:
++   *
++  //  Input:
++  //    c_rarg0   - x address
++  //    c_rarg1   - x length
++  //    c_rarg2   - z address
++  //    c_rarg3   - z lenth
++   *
++   */
++  address generate_squareToLen() {
++
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "squareToLen");ShouldNotReachHere();
++
++    address start = __ pc();/*
++    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
++    // Unix:  rdi, rsi, rdx, rcx (c_rarg0, c_rarg1, ...)
++    const Register x      = rdi;
++    const Register len    = rsi;
++    const Register z      = r8;
++    const Register zlen   = rcx;
++
++   const Register tmp1      = r12;
++   const Register tmp2      = r13;
++   const Register tmp3      = r14;
++   const Register tmp4      = r15;
++   const Register tmp5      = rbx;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++       setup_arg_regs(4); // x => rdi, len => rsi, z => rdx
++                          // zlen => rcx
++                          // r9 and r10 may be used to save non-volatile registers
++    __ movptr(r8, rdx);
++    __ square_to_len(x, len, z, zlen, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
++
++    restore_arg_regs();
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++  }
++
++   /**
++   *  Arguments:
++   *
++   *  Input:
++   *    c_rarg0   - out address
++   *    c_rarg1   - in address
++   *    c_rarg2   - offset
++   *    c_rarg3   - len
++   * not Win64
++   *    c_rarg4   - k
++   * Win64
++   *    rsp+40    - k
++   */
++  address generate_mulAdd() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "mulAdd");ShouldNotReachHere();
++
++    address start = __ pc();/*
++    // Win64: rcx, rdx, r8, r9 (c_rarg0, c_rarg1, ...)
++    // Unix:  rdi, rsi, rdx, rcx, r8, r9 (c_rarg0, c_rarg1, ...)
++    const Register out     = rdi;
++    const Register in      = rsi;
++    const Register offset  = r11;
++    const Register len     = rcx;
++    const Register k       = r8;
++
++    // Next registers will be saved on stack in mul_add().
++    const Register tmp1  = r12;
++    const Register tmp2  = r13;
++    const Register tmp3  = r14;
++    const Register tmp4  = r15;
++    const Register tmp5  = rbx;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    setup_arg_regs(4); // out => rdi, in => rsi, offset => rdx
++                       // len => rcx, k => r8
++                       // r9 and r10 may be used to save non-volatile registers
++#ifdef _WIN64
++    // last argument is on stack on Win64
++    __ movl(k, Address(rsp, 6 * wordSize));
++#endif
++    __ movptr(r11, rdx);  // move offset in rdx to offset(r11)
++    __ mul_add(out, in, offset, len, k, tmp1, tmp2, tmp3, tmp4, tmp5, rdx, rax);
++
++    restore_arg_regs();
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++
++    return start;
++  }
++
++  address generate_libmExp() {
++    StubCodeMark mark(this, "StubRoutines", "libmExp");
++
++    address start = __ pc();
++
++    const XMMRegister x0  = xmm0;
++    const XMMRegister x1  = xmm1;
++    const XMMRegister x2  = xmm2;
++    const XMMRegister x3  = xmm3;
++
++    const XMMRegister x4  = xmm4;
++    const XMMRegister x5  = xmm5;
++    const XMMRegister x6  = xmm6;
++    const XMMRegister x7  = xmm7;
++
++    const Register tmp   = r11;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ fast_exp(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++
++  }
++
++  address generate_libmLog() {
++    StubCodeMark mark(this, "StubRoutines", "libmLog");ShouldNotReachHere();
++
++    address start = __ pc();/*
++
++    const XMMRegister x0 = xmm0;
++    const XMMRegister x1 = xmm1;
++    const XMMRegister x2 = xmm2;
++    const XMMRegister x3 = xmm3;
++
++    const XMMRegister x4 = xmm4;
++    const XMMRegister x5 = xmm5;
++    const XMMRegister x6 = xmm6;
++    const XMMRegister x7 = xmm7;
++
++    const Register tmp1 = r11;
++    const Register tmp2 = r8;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ fast_log(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++
++  }
++
++  address generate_libmLog10() {
++    StubCodeMark mark(this, "StubRoutines", "libmLog10");ShouldNotReachHere();
++
++    address start = __ pc();/*
++
++    const XMMRegister x0 = xmm0;
++    const XMMRegister x1 = xmm1;
++    const XMMRegister x2 = xmm2;
++    const XMMRegister x3 = xmm3;
++
++    const XMMRegister x4 = xmm4;
++    const XMMRegister x5 = xmm5;
++    const XMMRegister x6 = xmm6;
++    const XMMRegister x7 = xmm7;
++
++    const Register tmp = r11;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ fast_log10(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++
++  }
++
++  address generate_libmPow() {
++    StubCodeMark mark(this, "StubRoutines", "libmPow");ShouldNotReachHere();
++
++    address start = __ pc();/*
++
++    const XMMRegister x0 = xmm0;
++    const XMMRegister x1 = xmm1;
++    const XMMRegister x2 = xmm2;
++    const XMMRegister x3 = xmm3;
++
++    const XMMRegister x4 = xmm4;
++    const XMMRegister x5 = xmm5;
++    const XMMRegister x6 = xmm6;
++    const XMMRegister x7 = xmm7;
++
++    const Register tmp1 = r8;
++    const Register tmp2 = r9;
++    const Register tmp3 = r10;
++    const Register tmp4 = r11;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++    __ fast_pow(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++
++  }
++
++  address generate_libmSin() {
++    StubCodeMark mark(this, "StubRoutines", "libmSin");ShouldNotReachHere();
++
++    address start = __ pc();/*
++
++    const XMMRegister x0 = xmm0;
++    const XMMRegister x1 = xmm1;
++    const XMMRegister x2 = xmm2;
++    const XMMRegister x3 = xmm3;
++
++    const XMMRegister x4 = xmm4;
++    const XMMRegister x5 = xmm5;
++    const XMMRegister x6 = xmm6;
++    const XMMRegister x7 = xmm7;
++
++    const Register tmp1 = r8;
++    const Register tmp2 = r9;
++    const Register tmp3 = r10;
++    const Register tmp4 = r11;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifdef _WIN64
++    __ push(rsi);
++    __ push(rdi);
++#endif
++    __ fast_sin(x0, x1, x2, x3, x4, x5, x6, x7, rax, rbx, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
++
++#ifdef _WIN64
++    __ pop(rdi);
++    __ pop(rsi);
++#endif
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++
++  }
++
++  address generate_libmCos() {
++    StubCodeMark mark(this, "StubRoutines", "libmCos");ShouldNotReachHere();
++
++    address start = __ pc();/*
++
++    const XMMRegister x0 = xmm0;
++    const XMMRegister x1 = xmm1;
++    const XMMRegister x2 = xmm2;
++    const XMMRegister x3 = xmm3;
++
++    const XMMRegister x4 = xmm4;
++    const XMMRegister x5 = xmm5;
++    const XMMRegister x6 = xmm6;
++    const XMMRegister x7 = xmm7;
++
++    const Register tmp1 = r8;
++    const Register tmp2 = r9;
++    const Register tmp3 = r10;
++    const Register tmp4 = r11;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifdef _WIN64
++    __ push(rsi);
++    __ push(rdi);
++#endif
++    __ fast_cos(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
++
++#ifdef _WIN64
++    __ pop(rdi);
++    __ pop(rsi);
++#endif
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++
++  }
++
++  address generate_libmTan() {
++    StubCodeMark mark(this, "StubRoutines", "libmTan");ShouldNotReachHere();
++
++    address start = __ pc();/*
++
++    const XMMRegister x0 = xmm0;
++    const XMMRegister x1 = xmm1;
++    const XMMRegister x2 = xmm2;
++    const XMMRegister x3 = xmm3;
++
++    const XMMRegister x4 = xmm4;
++    const XMMRegister x5 = xmm5;
++    const XMMRegister x6 = xmm6;
++    const XMMRegister x7 = xmm7;
++
++    const Register tmp1 = r8;
++    const Register tmp2 = r9;
++    const Register tmp3 = r10;
++    const Register tmp4 = r11;
++
++    BLOCK_COMMENT("Entry:");
++    __ enter(); // required for proper stackwalking of RuntimeStub frame
++
++#ifdef _WIN64
++    __ push(rsi);
++    __ push(rdi);
++#endif
++    __ fast_tan(x0, x1, x2, x3, x4, x5, x6, x7, rax, rcx, rdx, tmp1, tmp2, tmp3, tmp4);
++
++#ifdef _WIN64
++    __ pop(rdi);
++    __ pop(rsi);
++#endif
++
++    __ leave(); // required for proper stackwalking of RuntimeStub frame
++    __ ret(0);
++*/
++    return start;
++
++  }
++  
++
++
++  void copy_core_forward(int limit, Register src, Register dst, Register count, Register tmp1, Register tmp2){
++      Label l_misalign, l_misalign_simd, l_align_simd, l_before_tail, l_exit;
++
++    __ and_ins(src, 31, tmp1);   
++    __ beq_l(tmp1, l_align_simd);      
++    
++    __ BIND(l_misalign);
++    __ and_ins(src, 31, tmp1);       //from low-5-bit = src mod 32
++    __ slll(tmp1, 3, tmp1);
++    __ ifmovs(tmp1, f15);
++    __ ldi(tmp2, 256, R0);
++    __ subl(tmp2, tmp1, tmp1); 
++    __ ifmovs(tmp1, F17);
++    __ andnot(src, 31, tmp1);
++    __ vldd(f10, 0, tmp1); //load 32 bytes from src
++
++    __ BIND(l_misalign_simd);
++    __ srlow(f10, f15, f12);//get high feild  bytes of 32 bytes
++    __ vldd(f10, 32, tmp1); //load next 32 bytes from src+32
++    __ sllow(f10, F17, f13);//get low field bytes of 32 bytes
++    __ vlog(0xfc, f12, f13, f31, f12);  //merge f12, f13, into f12
++    __ vstd(f12, 0, dst);
++
++    __ addl(tmp1, 32, tmp1);
++    __ addl(dst, 32, dst);
++    __ subl(count, limit, count);
++
++    __ cmple(count, limit-1, tmp2); //At least one more trip?
++    __ beq_l(tmp2, l_misalign_simd);
++    __ beq_l(R0, l_before_tail);
++    
++    __ BIND(l_align_simd);
++    __ vldd(f10, 0, src);
++    __ vstd(f10, 0, dst);
++    __ subl(count, limit, count);
++    __ addl(src, 32, src);
++    __ addl(dst, 32, dst);
++    __ cmple(count, limit-1, tmp1);         //while count >=32, do simd
++    __ beq_l(tmp1, l_align_simd);  
++    __ beq_l(R0, l_exit);
++    
++    __ BIND(l_before_tail);
++    __ and_ins(src, 31, src);
++    __ addl(tmp1, src, src);
++    
++    __ BIND(l_exit);
++  }
++  
++  void copy_core_backward(int limit, Register end_src, Register end_dst, Register count, Register tmp1, Register tmp2){
++      Label l_misalign, l_misalign_simd, l_align_simd, l_before_tail, l_exit;
++
++    __ and_ins(end_src, 31, tmp1);   
++    __ beq_l(tmp1, l_align_simd);      
++    
++    __ BIND(l_misalign);
++    __ and_ins(end_src, 31, tmp1);       //from low-5-bit = src mod 32
++    __ slll(tmp1, 3, tmp1);
++    __ ifmovs(tmp1, f15);
++    __ ldi(tmp2, 256, R0);
++    __ subl(tmp2, tmp1, tmp1); 
++    __ ifmovs(tmp1, F17);
++    __ andnot(end_src, 31, tmp1);
++    __ vldd(f10, 0, tmp1); //load 32 bytes from src
++
++    __ BIND(l_misalign_simd);
++    __ sllow(f10, F17, f13);//get low field bytes of 32 bytes
++    __ vldd(f10, -32, tmp1); //load next 32 bytes from src+32
++    __ srlow(f10, f15, f12);//get high feild  bytes of 32 bytes
++    __ vlog(0xfc, f12, f13, f31, f12);  //merge f12, f13, into f12
++    __ vstd(f12, -32, end_dst);
++
++    __ subl(tmp1, 32, tmp1);
++    __ subl(end_dst, 32, end_dst);
++    __ subl(count, limit, count);
++
++    __ cmple(count, limit-1, tmp2); //At least one more trip?
++    __ beq_l(tmp2, l_misalign_simd);
++    __ beq_l(R0, l_before_tail);
++    
++    __ BIND(l_align_simd);
++    __ vldd(f10, -32, end_src);
++    __ vstd(f10, -32, end_dst);
++    __ subl(count, limit, count);
++    __ subl(end_src, 32, end_src);
++    __ subl(end_dst, 32, end_dst);
++    __ cmple(count, limit-1, tmp1);         //while count >=32, do simd
++    __ beq_l(tmp1, l_align_simd);  
++    __ beq_l(R0, l_exit);
++    
++    __ BIND(l_before_tail);
++    __ and_ins(end_src, 31, end_src);
++    __ addl(tmp1, end_src, end_src);
++    
++    __ BIND(l_exit);
++  }
++
++  // Continuation point for throwing of implicit exceptions that are
++  // not handled in the current activation. Fabricates an exception
++  // oop and initiates normal exception dispatching in this
++  // frame. Since we need to preserve callee-saved values (currently
++  // only for C2, but done for C1 as well) we need a callee-saved oop
++  // map and therefore have to make these stubs into RuntimeStubs
++  // rather than BufferBlobs.  If the compiler needs all registers to
++  // be preserved between the fault point and the exception handler
++  // then it must assume responsibility for that in
++  // AbstractCompiler::continuation_for_implicit_null_exception or
++  // continuation_for_implicit_division_by_zero_exception. All other
++  // implicit exceptions (e.g., NullPointerException or
++  // AbstractMethodError on entry) are either at call sites or
++  // otherwise assume that stack unwinding will be initiated, so
++  // caller saved registers were assumed volatile in the compiler.
++
++  
++  // Arguments:
++  //   aligned - true => Input and output aligned on a HeapWord == 8-byte boundary
++  //             ignored
++  //   name    - stub name string
++  //
++  // Inputs:
++  //   c_rarg0   - source array address
++  //   c_rarg1   - destination array address
++  //   c_rarg2   - element count, treated as ssize_t, can be zero
++  //
++  // If 'from' and/or 'to' are aligned on 4-, 2-, or 1-byte boundaries,
++  // we let the hardware handle it.  The one to eight bytes within words,
++  // dwords or qwords that span cache line boundaries will still be loaded
++  // and stored atomically.
++  //
++  // Side Effects:
++  //   disjoint_byte_copy_entry is set to the no-overlap entry point
++  //   used by generate_conjoint_byte_copy().
++  //
++  void generate_disjoint_copy(int widthInByte, Register src, Register dst, Register count) {
++    //    Label lblMissAlignInByte, lblMissAlignInShort, lblMissAlignInWord, lblMissAlignInLong;
++    Label lblMissAlign[4];
++    //    Label lblSkipByte, lblSkipInShort, lblSkipInWord, lblSkipInLong;
++    Label lblSkip[4];
++    //   Label lblCopyByte, lblCopyShort, lblCopyWord, lblCopyLong;
++    Label lblCopy[4];
++    char buf[50];
++    if (widthInByte == 0) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[1]);}
++    if (widthInByte == 1) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[2]);}
++
++    for (int i = widthInByte; i < 3; i++) {
++        __ xorptr(src, dst, AT);
++        __ andptr(AT, 1 << i, AT); // if the backward ith bit of src and and dst is the same
++        __ jcc(Assembler::notEqual, lblMissAlign[i+1], AT); // if arrays don't have the same alignment, ...
++
++        __ andptr(src, 1 << i, AT);
++        __ jcc(Assembler::equal, lblSkip[i], AT); // have same alignment but extra byte/short/int
++
++        __ load(i, AT, 0, src);
++        __ store(i, AT, 0, dst);
++        __ addl(src, 1 << i, src);
++        __ addl(dst, 1 << i, dst);
++        __ subl(count, 1 << i, count);
++
++        __ bind(lblSkip[i]);
++        sprintf(buf, "lblSkip[%d]", i);
++        __ block_comment(buf);
++    }
++
++    for (int i = 3; i >= widthInByte; i--) { // FasterArrayCopy
++        if (i == widthInByte) {
++            __ jcc(Assembler::equal, lblMissAlign[i], count);
++        } else {
++        __ cmplt(count, 1 << i, AT);
++        __ jcc(Assembler::notEqual, lblMissAlign[i], AT);
++        }
++        __ bind(lblCopy[i]);
++        sprintf(buf, "lblCopy[%d]", i);
++        __ block_comment(buf);
++        
++        __ load(i, AT, 0, src);
++        __ store(i, AT, 0, dst);
++        __ addl(src, 1 << i, src);
++        __ addl(dst, 1 << i, dst);
++        __ subl(count, 1 << i, count);
++        if(i == widthInByte){
++            __ jcc(Assembler::notEqual, lblCopy[i], count);
++        }else{
++        __ subl(count, 1 << i, AT);
++        __ jcc(Assembler::greaterEqual, lblCopy[i], AT);
++        }
++        __ bind(lblMissAlign[i]);
++          sprintf(buf, "lblMissAlign[%d]", i);
++        __ block_comment(buf);
++    }
++  }
++  
++  void generate_conjoint_copy(int widthInByte,Register src, Register dst, Register count) {SCOPEMARK_NAME(generate_conjoint_copy, _masm)
++    //    Label lblMissAlignInByte, lblMissAlignInShort, lblMissAlignInWord, lblMissAlignInLong;
++    Label lblMissAlign[4];
++    //    Label lblSkipByte, lblSkipInShort, lblSkipInWord, lblSkipInLong;
++    Label lblSkip[4];
++    //   Label lblCopyByte, lblCopyShort, lblCopyWord, lblCopyLong;
++    Label lblCopy[4];
++    char buf[50];
++    
++    assert_different_registers(src, dst, AT);
++    //__ stop("TODO:generate_conjoint_copy jzy");
++    if (widthInByte == 0) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[1]);}
++    if (widthInByte == 1) {__ subl(count, 9, AT); __ ble_l(AT, lblMissAlign[2]);}
++
++
++    for (int i = widthInByte; i < 3; i++) {
++        __ xorptr(src, dst, AT);
++        __ andptr(AT, 1 << i, AT); // if the backward ith bit of src and and dst is the same
++        __ jcc(Assembler::notEqual, lblMissAlign[i+1], AT); // if arrays don't have the same alignment, ...
++
++        __ andptr(src, 1 << i, AT);
++        __ jcc(Assembler::equal, lblSkip[i], AT); // have same alignment but extra byte/short/int
++
++        __ subl(src, 1 << i, src);
++        __ subl(dst, 1 << i, dst);
++        __ load(i, AT, 0, src); //TODO:refactor? jzy
++        __ store(i, AT, 0, dst);//TODO:refactor? jzy
++        __ subl(count, 1 << i, count);
++
++        __ bind(lblSkip[i]);
++        sprintf(buf, "lblSkip[%d]", i);
++        __ block_comment(buf);
++    }
++
++    for (int i = 3; i >= widthInByte; i--) { // FasterArrayCopy
++      if(i == widthInByte){
++        __ jcc(Assembler::equal, lblMissAlign[i], count);
++      }else{
++        __ cmpl(count, 1 << i);
++        __ jcc(Assembler::less, lblMissAlign[i]);
++      }
++
++      __ bind(lblCopy[i]);
++      sprintf(buf, "lblCopy[%d]", i);
++      __ block_comment(buf);
++        
++      __ subl(src, 1 << i, src);
++      __ subl(dst, 1 << i, dst);
++      __ load(i, AT, 0, src);
++      __ store(i, AT, 0, dst);
++      __ subl(count, 1 << i, count);
++      if (i == widthInByte) {
++        __ jcc(Assembler::notEqual, lblCopy[i], count);
++      } else {
++        __ cmpl(count, 1 << i);
++        __ jcc(Assembler::greaterEqual, lblCopy[i]);
++      }
++      __ bind(lblMissAlign[i]);
++      sprintf(buf, "lblMissAlign[%d]", i);
++      __ block_comment(buf);
++    }
++  }
++  
++#undef __
++#define __ masm->
++
++  address generate_throw_exception(const char* name,
++                                   address runtime_entry,
++                                   Register arg1 = noreg,
++                                   Register arg2 = noreg) {
++    // Information about frame layout at time of blocking runtime call.
++    // Note that we only have to preserve callee-saved registers since
++    // the compilers are responsible for supplying a continuation point
++    // if they expect all registers to be preserved.
++    // n.b. sw64 asserts that frame::arg_reg_save_area_bytes == 0
++    enum layout {
++      rfp_off = frame::arg_reg_save_area_bytes/BytesPerInt,
++      rfp_off2,
++      return_off,
++      return_off2,
++      framesize // inclusive of return address
++    };
++
++    int insts_size = 2048;
++    int locs_size  = 32;
++
++    CodeBuffer code(name, insts_size, locs_size);
++    OopMapSet* oop_maps  = new OopMapSet();
++    MacroAssembler* masm = new MacroAssembler(&code);
++
++    address start = __ pc();
++
++    // This is an inlined and slightly modified version of call_VM
++    // which has the ability to fetch the return PC out of
++    // thread-local storage and also sets up last_Java_sp slightly
++    // differently than the real call_VM
++    Register java_thread = rthread;
++
++    //Label frame_return;
++    //__ stop("no check:jzy");
++    __ enter(); // Save FP and LR before call
++
++    __ mov_immediate64(rscratch3, (framesize-4) << LogBytesPerWord);
++    __ subptr(esp, rscratch3, esp); // prolog
++
++    int frame_complete = __ pc() - start;
++
++    // Set up last_Java_sp and last_Java_fp
++    address the_pc = __ pc();
++    __ set_last_Java_frame(esp, rfp, the_pc, rscratch3);
++    
++    // Call runtime
++    if (arg1 != noreg) {
++      assert(arg2 != c_rarg1, "clobbered");
++      __ movl(c_rarg1, arg1);
++    }
++    if (arg2 != noreg) {
++      __ movl(c_rarg2, arg2);
++    }
++    __ movl(c_rarg0, rthread);
++    
++    // Call runtime
++    __ call(RuntimeAddress(runtime_entry));
++    
++    // Generate oop map
++    OopMap* map =  new OopMap(framesize, 0);
++    oop_maps->add_gc_map(the_pc - start,  map);
++
++    __ reset_last_Java_frame(true);
++
++    // discard arguments
++    __ leave();
++    // check for pending exceptions
++#ifdef ASSERT
++    Label L;
++    __ cmpptr(Address(java_thread, Thread::pending_exception_offset()),
++            (int32_t) NULL_WORD);
++    __ jcc(Assembler::notEqual, L);
++    __ should_not_reach_here("Thread::pending_exception_offset");
++    __ bind(L);
++#endif //ASSERT
++//    __ push(RA);
++    __ jump(RuntimeAddress(StubRoutines::forward_exception_entry()));
++
++
++    // codeBlob framesize is in words (not VMRegImpl::slot_size)
++    RuntimeStub* stub =
++      RuntimeStub::new_runtime_stub(name,
++                                    &code,
++                                    frame_complete,
++                                    (framesize >> (LogBytesPerWord - LogBytesPerInt)),
++                                    oop_maps, false);
++    return stub->entry_point();
++  }
++
++  // Initialization
++  void generate_initial() {
++    if (SafePatch) {
++      NativeCall::instruction_size           = 6 * BytesPerInstWord;
++      NativeCall::return_address_offset      = 6 * BytesPerInstWord;
++      NativeJump::instruction_size    = 6 * BytesPerInstWord;
++      NativeJump::instruction_size    = 6 * BytesPerInstWord;
++//    NativeMovConstReg::instruction_size        = 5 * BytesPerInstWord;
++//    NativeMovConstReg::next_instruction_offset = 5 * BytesPerInstWord;
++    }
++    // Generate initial stubs and initializes the entry points
++
++    // entry points that exist in all platforms Note: This is code
++    // that could be shared among different platforms - however the
++    // benefit seems to be smaller than the disadvantage of having a
++    // much more complicated generator structure. See also comment in
++    // stubRoutines.hpp.
++
++    StubRoutines::_forward_exception_entry = generate_forward_exception();
++
++    StubRoutines::_call_stub_entry = generate_call_stub(StubRoutines::_call_stub_return_address);
++
++    // is referenced by megamorphic call
++    StubRoutines::_catch_exception_entry = generate_catch_exception();
++
++    // atomic calls
++    StubRoutines::_atomic_xchg_entry          = generate_atomic_xchg();
++    StubRoutines::_atomic_xchg_long_entry     = generate_atomic_xchg_long();
++    StubRoutines::_atomic_cmpxchg_entry       = generate_atomic_cmpxchg();
++    StubRoutines::_atomic_cmpxchg_byte_entry  = generate_atomic_cmpxchg_byte();
++    StubRoutines::_atomic_cmpxchg_long_entry  = generate_atomic_cmpxchg_long();
++    StubRoutines::_atomic_add_entry           = generate_atomic_add();
++    StubRoutines::_atomic_add_long_entry      = generate_atomic_add_long();
++    StubRoutines::_fence_entry                = generate_orderaccess_fence();
++    
++////    StubRoutines::_handler_for_unsafe_access_entry = generate_handler_for_unsafe_access();
++
++    StubRoutines::_throw_StackOverflowError_entry = 
++            generate_throw_exception("StackOverflowError throw_exception",
++                                     CAST_FROM_FN_PTR(address, 
++                                                      SharedRuntime::
++                                                      throw_StackOverflowError));
++    // platform dependent
++    StubRoutines::sw64::_get_previous_fp_entry = generate_get_previous_fp();
++    StubRoutines::sw64::_get_previous_sp_entry = generate_get_previous_sp();
++    if (UseCRC32Intrinsics) {
++      // set table address before stub generation which use it
++      StubRoutines::_crc_table_adr = (address)StubRoutines::sw64::_crc_table;
++      //ShouldNotReachHere();
++      StubRoutines::_updateBytesCRC32 = CAST_FROM_FN_PTR(address, SharedRuntime::updateBytesCRC32);
++     }
++  }
++
++  void generate_all() {
++    // Generates all stubs and initializes the entry points
++    
++    // These entry points require SharedInfo::stack0 to be set up in
++    // non-core builds and need to be relocatable, so they each
++    // fabricate a RuntimeStub internally.
++    StubRoutines::_throw_AbstractMethodError_entry = 
++      generate_throw_exception("AbstractMethodError throw_exception",
++                               CAST_FROM_FN_PTR(address,
++                                                SharedRuntime::
++                                                throw_AbstractMethodError));
++
++    StubRoutines::_throw_IncompatibleClassChangeError_entry = 
++      generate_throw_exception("IncompatibleClassChangeError throw_exception",
++                               CAST_FROM_FN_PTR(address, 
++                                                SharedRuntime::
++                                                throw_IncompatibleClassChangeError));
++
++    StubRoutines::_throw_NullPointerException_at_call_entry = 
++      generate_throw_exception("NullPointerException at call throw_exception",
++                               CAST_FROM_FN_PTR(address, 
++                                                SharedRuntime::
++                                                throw_NullPointerException_at_call));
++
++    // entry points that are platform specific
++
++    // support for verify_oop (must happen after universe_init)
++    StubRoutines::_verify_oop_subroutine_entry     = generate_verify_oop();
++    // arraycopy stubs used by compilers
++    generate_arraycopy_stubs();
++
++    // Safefetch stubs.
++    generate_safefetch("SafeFetch32", sizeof(int),     &StubRoutines::_safefetch32_entry,
++                                                       &StubRoutines::_safefetch32_fault_pc,
++                                                       &StubRoutines::_safefetch32_continuation_pc);
++    generate_safefetch("SafeFetchN", sizeof(intptr_t), &StubRoutines::_safefetchN_entry,
++                                                       &StubRoutines::_safefetchN_fault_pc,
++                                                       &StubRoutines::_safefetchN_continuation_pc);
++#ifdef COMPILER2
++    //TODO:jzy
++    if (UseMontgomeryMultiplyIntrinsic) {
++      StubRoutines::_montgomeryMultiply
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_multiply);
++    }
++    if (UseMontgomerySquareIntrinsic) {
++      StubRoutines::_montgomerySquare
++        = CAST_FROM_FN_PTR(address, SharedRuntime::montgomery_square);
++    }  
++#endif
++    
++  }
++  
++ public:
++  StubGenerator(CodeBuffer* code, bool all) : StubCodeGenerator(code) {
++    if (all) {
++      generate_all();
++    } else {
++      generate_initial();
++    }
++  }
++}; // end class declaration
++
++void StubGenerator_generate(CodeBuffer* code, bool all) {
++  StubGenerator g(code, all);
++}
+diff --git a/src/hotspot/cpu/sw64/stubRoutines_sw64.cpp b/src/hotspot/cpu/sw64/stubRoutines_sw64.cpp
+new file mode 100644
+index 0000000000..c6214148d3
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/stubRoutines_sw64.cpp
+@@ -0,0 +1,95 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "utilities/globalDefinitions.hpp"
++
++// Implementation of the platform-specific part of StubRoutines - for
++// a description of how to extend it, see the stubRoutines.hpp file.
++
++//find the last fp value
++address StubRoutines::sw64::_get_previous_fp_entry = NULL;
++address StubRoutines::sw64::_get_previous_sp_entry = NULL;
++address StubRoutines::sw64::_call_stub_compiled_return = NULL;
++
++juint StubRoutines::sw64::_crc_table[] =
++{
++    0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
++    0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
++    0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
++    0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
++    0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
++    0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
++    0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
++    0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
++    0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
++    0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
++    0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
++    0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
++    0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
++    0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
++    0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
++    0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
++    0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
++    0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
++    0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
++    0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
++    0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
++    0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
++    0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
++    0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
++    0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
++    0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
++    0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
++    0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
++    0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
++    0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
++    0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
++    0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
++    0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
++    0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
++    0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
++    0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
++    0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
++    0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
++    0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
++    0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
++    0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
++    0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
++    0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
++    0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
++    0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
++    0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
++    0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
++    0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
++    0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
++    0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
++    0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
++    0x2d02ef8dUL
++};
+diff --git a/src/hotspot/cpu/sw64/stubRoutines_sw64.hpp b/src/hotspot/cpu/sw64/stubRoutines_sw64.hpp
+new file mode 100644
+index 0000000000..f928309364
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/stubRoutines_sw64.hpp
+@@ -0,0 +1,68 @@
++/*
++ * Copyright (c) 2003, 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_STUBROUTINES_SW64_HPP
++#define CPU_SW64_VM_STUBROUTINES_SW64_HPP
++
++// This file holds the platform specific parts of the StubRoutines
++// definition. See stubRoutines.hpp for a description on how to
++// extend it.
++
++static bool    returns_to_call_stub(address return_pc)   {
++  return return_pc == _call_stub_return_address || return_pc == sw64::get_call_stub_compiled_return();
++}
++
++enum platform_dependent_constants {
++  code_size1 = 20000 LP64_ONLY(+12000),         // simply increase if too small (assembler will crash if too small)
++  code_size2 = 42000 LP64_ONLY(+12000)          // simply increase if too small (assembler will crash if too small)
++};
++
++class sw64 {
++  friend class StubGenerator;
++  friend class VMStructs;
++ private:
++  // If we call compiled code directly from the call stub we will
++  // need to adjust the return back to the call stub to a specialized
++  // piece of code that can handle compiled results and cleaning the fpu
++  // stack. The variable holds that location.
++  static address _call_stub_compiled_return;
++  static address _get_previous_fp_entry;
++  static address _get_previous_sp_entry;
++  static address _verify_mxcsr_entry;
++  // shuffle mask for fixing up 128-bit words consisting of big-endian 32-bit integers
++  static address _key_shuffle_mask_addr;
++  // masks and table for CRC32
++  static uint64_t _crc_by128_masks[];
++  static juint    _crc_table[];
++public:
++  // Call back points for traps in compiled code
++  static address get_previous_fp_entry()     { return _get_previous_fp_entry; }
++  static address get_previous_sp_entry()     { return _get_previous_sp_entry; }
++  static address get_call_stub_compiled_return()    { return _call_stub_compiled_return; }
++  static void set_call_stub_compiled_return(address ret) { _call_stub_compiled_return = ret; }
++
++};
++
++#endif // CPU_SW64_VM_STUBROUTINES_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/sw64.ad b/src/hotspot/cpu/sw64/sw64.ad
+new file mode 100644
+index 0000000000..af56faf453
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/sw64.ad
+@@ -0,0 +1,16196 @@
++//
++// Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2014, 2019, Red Hat, Inc. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// Sw64 Architecture Description File
++
++//----------REGISTER DEFINITION BLOCK------------------------------------------
++// This information is used by the matcher and the register allocator to
++// describe individual registers and classes of registers within the target
++// archtecture.
++
++// format:
++// reg_def name (call convention, c-call convention, ideal type, encoding);
++//     call convention :
++//      NS  = No-Save
++//      SOC = Save-On-Call
++//      SOE = Save-On-Entry
++//      AS  = Always-Save
++//    ideal type :
++//      see opto/opcodes.hpp for more info
++// reg_class name (reg, ...);
++// alloc_class name (reg, ...);
++
++register %{
++// We must define the 64 bit int registers in two 32 bit halves, the
++// real lower register and a virtual upper half register. upper halves
++// are used by the register allocator but are not actually supplied as
++// operands to memory ops.
++//
++// follow the C1 compiler in making registers
++//
++//   r0-r7,r10-r26 volatile (caller save)
++//   r27-r32 system (no save, no allocate)
++//   r8-r9 invisible to the allocator (so we can use them as scratch regs)
++//
++// as regards Java usage. we don't use any callee save registers
++// because this makes it difficult to de-optimise a frame (see comment
++// in x86 implementation of Deoptimization::unwind_callee_save_values)
++//
++
++// General Registers
++// Integer Registers
++reg_def V0      (SOC, SOC,  Op_RegI,  0, i0->as_VMReg());
++reg_def V0_H    (SOC, SOC,  Op_RegI,  0, i0->as_VMReg()->next());
++
++reg_def T0      (SOC, SOC,  Op_RegI,  1, i1->as_VMReg());
++reg_def T0_H    (SOC, SOC,  Op_RegI,  1, i1->as_VMReg()->next());
++reg_def T1      (SOC, SOC,  Op_RegI,  2, i2->as_VMReg());
++reg_def T1_H    (SOC, SOC,  Op_RegI,  2, i2->as_VMReg()->next());
++reg_def T2      (SOC, SOC,  Op_RegI,  3, i3->as_VMReg());
++reg_def T2_H    (SOC, SOC,  Op_RegI,  3, i3->as_VMReg()->next());
++reg_def T3      (SOC, SOC,  Op_RegI,  4, i4->as_VMReg());
++reg_def T3_H    (SOC, SOC,  Op_RegI,  4, i4->as_VMReg()->next());
++reg_def T4      (SOC, SOC,  Op_RegI,  5, i5->as_VMReg());
++reg_def T4_H    (SOC, SOC,  Op_RegI,  5, i5->as_VMReg()->next());
++reg_def T5      (SOC, SOC,  Op_RegI,  6, i6->as_VMReg());
++reg_def T5_H    (SOC, SOC,  Op_RegI,  6, i6->as_VMReg()->next());
++reg_def T6      (SOC, SOC,  Op_RegI,  7, i7->as_VMReg());
++reg_def T6_H    (SOC, SOC,  Op_RegI,  7, i7->as_VMReg()->next());
++reg_def T7      (SOC, SOC,  Op_RegI,  8, i8->as_VMReg());
++reg_def T7_H    (SOC, SOC,  Op_RegI,  8, i8->as_VMReg()->next());
++
++reg_def S0      (SOC, SOE,  Op_RegI,  9, i9->as_VMReg());
++reg_def S0_H    (SOC, SOE,  Op_RegI,  9, i9->as_VMReg()->next());
++reg_def S1      (SOC, SOE,  Op_RegI, 10, i10->as_VMReg());
++reg_def S1_H    (SOC, SOE,  Op_RegI, 10, i10->as_VMReg()->next());
++reg_def S2      (SOC, SOE,  Op_RegI, 11, i11->as_VMReg());
++reg_def S2_H    (SOC, SOE,  Op_RegI, 11, i11->as_VMReg()->next());
++reg_def S3      (SOC, SOE,  Op_RegI, 12, i12->as_VMReg());
++reg_def S3_H    (SOC, SOE,  Op_RegI, 12, i12->as_VMReg()->next());
++reg_def S4      (SOC, SOE,  Op_RegI, 13, i13->as_VMReg());
++reg_def S4_H    (SOC, SOE,  Op_RegI, 13, i13->as_VMReg()->next());
++reg_def S5      (SOC, SOE,  Op_RegI, 14, i14->as_VMReg());
++reg_def S5_H    (SOC, SOE,  Op_RegI, 14, i14->as_VMReg()->next());
++reg_def FP      ( NS, SOE,  Op_RegI, 15, i15->as_VMReg());
++reg_def FP_H    ( NS, SOE,  Op_RegI, 15, i15->as_VMReg()->next());
++
++reg_def A0      (SOC, SOC,  Op_RegI, 16, i16->as_VMReg());
++reg_def A0_H    (SOC, SOC,  Op_RegI, 16, i16->as_VMReg()->next());
++reg_def A1      (SOC, SOC,  Op_RegI, 17, i17->as_VMReg());
++reg_def A1_H    (SOC, SOC,  Op_RegI, 17, i17->as_VMReg()->next());
++reg_def A2      (SOC, SOC,  Op_RegI, 18, i18->as_VMReg());
++reg_def A2_H    (SOC, SOC,  Op_RegI, 18, i18->as_VMReg()->next());
++reg_def A3      (SOC, SOC,  Op_RegI, 19, i19->as_VMReg());
++reg_def A3_H    (SOC, SOC,  Op_RegI, 19, i19->as_VMReg()->next());
++reg_def A4      (SOC, SOC,  Op_RegI, 20, i20->as_VMReg());
++reg_def A4_H    (SOC, SOC,  Op_RegI, 20, i20->as_VMReg()->next());
++reg_def A5      (SOC, SOC,  Op_RegI, 21, i21->as_VMReg());
++reg_def A5_H    (SOC, SOC,  Op_RegI, 21, i21->as_VMReg()->next());
++
++reg_def T8      (SOC, SOC,  Op_RegI, 22, i22->as_VMReg());
++reg_def T8_H    (SOC, SOC,  Op_RegI, 22, i22->as_VMReg()->next());
++reg_def T9      (SOC, SOC,  Op_RegI, 23, i23->as_VMReg());
++reg_def T9_H    (SOC, SOC,  Op_RegI, 23, i23->as_VMReg()->next());
++reg_def T10     (SOC, SOC,  Op_RegI, 24, i24->as_VMReg());
++reg_def T10_H   (SOC, SOC,  Op_RegI, 24, i24->as_VMReg()->next());
++reg_def T11     (SOC, SOC,  Op_RegI, 25, i25->as_VMReg());
++reg_def T11_H   (SOC, SOC,  Op_RegI, 25, i25->as_VMReg()->next());
++reg_def RA      ( NS,  NS,  Op_RegI, 26, i26->as_VMReg());
++reg_def RA_H    ( NS,  NS,  Op_RegI, 26, i26->as_VMReg()->next());
++reg_def T12     (SOC, SOC,  Op_RegI, 27, i27->as_VMReg());
++reg_def T12_H   (SOC, SOC,  Op_RegI, 27, i27->as_VMReg()->next());
++reg_def AT      ( NS,  NS,  Op_RegI, 28, i28->as_VMReg());
++reg_def AT_H    ( NS,  NS,  Op_RegI, 28, i28->as_VMReg()->next());
++reg_def GP      ( NS,  NS,  Op_RegI, 29, i29->as_VMReg());
++reg_def GP_H    ( NS,  NS,  Op_RegI, 29, i29->as_VMReg()->next());
++reg_def SP      ( NS,  NS,  Op_RegI, 30, i30->as_VMReg());
++reg_def SP_H    ( NS,  NS,  Op_RegI, 30, i30->as_VMReg()->next());
++reg_def R0      ( NS,  NS,  Op_RegI, 31, VMRegImpl::Bad());
++
++// Floating registers.
++reg_def F0          ( SOC, SOC, Op_RegF, 0, f0->as_VMReg());
++reg_def F0_H        ( SOC, SOC, Op_RegF, 0, f0->as_VMReg()->next());
++reg_def F1          ( SOC, SOC, Op_RegF, 1, f1->as_VMReg());
++reg_def F1_H        ( SOC, SOC, Op_RegF, 1, f1->as_VMReg()->next());
++reg_def F2          ( SOC, SOC, Op_RegF, 2, f2->as_VMReg());
++reg_def F2_H        ( SOC, SOC, Op_RegF, 2, f2->as_VMReg()->next());
++reg_def F3          ( SOC, SOC, Op_RegF, 3, f3->as_VMReg());
++reg_def F3_H        ( SOC, SOC, Op_RegF, 3, f3->as_VMReg()->next());
++reg_def F4          ( SOC, SOC, Op_RegF, 4, f4->as_VMReg());
++reg_def F4_H        ( SOC, SOC, Op_RegF, 4, f4->as_VMReg()->next());
++reg_def F5          ( SOC, SOC, Op_RegF, 5, f5->as_VMReg());
++reg_def F5_H        ( SOC, SOC, Op_RegF, 5, f5->as_VMReg()->next());
++reg_def F6          ( SOC, SOC, Op_RegF, 6, f6->as_VMReg());
++reg_def F6_H        ( SOC, SOC, Op_RegF, 6, f6->as_VMReg()->next());
++reg_def F7          ( SOC, SOC, Op_RegF, 7, f7->as_VMReg());
++reg_def F7_H        ( SOC, SOC, Op_RegF, 7, f7->as_VMReg()->next());
++reg_def F8          ( SOC, SOC, Op_RegF, 8, f8->as_VMReg());
++reg_def F8_H        ( SOC, SOC, Op_RegF, 8, f8->as_VMReg()->next());
++reg_def F9          ( SOC, SOC, Op_RegF, 9, f9->as_VMReg());
++reg_def F9_H        ( SOC, SOC, Op_RegF, 9, f9->as_VMReg()->next());
++reg_def F10         ( SOC, SOC, Op_RegF, 10, f10->as_VMReg());
++reg_def F10_H       ( SOC, SOC, Op_RegF, 10, f10->as_VMReg()->next());
++reg_def F11         ( SOC, SOC, Op_RegF, 11, f11->as_VMReg());
++reg_def F11_H       ( SOC, SOC, Op_RegF, 11, f11->as_VMReg()->next());
++reg_def F12         ( SOC, SOC, Op_RegF, 12, f12->as_VMReg());
++reg_def F12_H       ( SOC, SOC, Op_RegF, 12, f12->as_VMReg()->next());
++reg_def F13         ( SOC, SOC, Op_RegF, 13, f13->as_VMReg());
++reg_def F13_H       ( SOC, SOC, Op_RegF, 13, f13->as_VMReg()->next());
++reg_def F14         ( SOC, SOC, Op_RegF, 14, f14->as_VMReg());
++reg_def F14_H       ( SOC, SOC, Op_RegF, 14, f14->as_VMReg()->next());
++reg_def F15         ( SOC, SOC, Op_RegF, 15, f15->as_VMReg());
++reg_def F15_H       ( SOC, SOC, Op_RegF, 15, f15->as_VMReg()->next());
++reg_def F16         ( SOC, SOC, Op_RegF, 16, f16->as_VMReg());
++reg_def F16_H       ( SOC, SOC, Op_RegF, 16, f16->as_VMReg()->next());
++reg_def F17         ( SOC, SOC, Op_RegF, 17, f17->as_VMReg());
++reg_def F17_H       ( SOC, SOC, Op_RegF, 17, f17->as_VMReg()->next());
++reg_def F18         ( SOC, SOC, Op_RegF, 18, f18->as_VMReg());
++reg_def F18_H       ( SOC, SOC, Op_RegF, 18, f18->as_VMReg()->next());
++reg_def F19         ( SOC, SOC, Op_RegF, 19, f19->as_VMReg());
++reg_def F19_H       ( SOC, SOC, Op_RegF, 19, f19->as_VMReg()->next());
++reg_def F20         ( SOC, SOC, Op_RegF, 20, f20->as_VMReg());
++reg_def F20_H       ( SOC, SOC, Op_RegF, 20, f20->as_VMReg()->next());
++reg_def F21         ( SOC, SOC, Op_RegF, 21, f21->as_VMReg());
++reg_def F21_H       ( SOC, SOC, Op_RegF, 21, f21->as_VMReg()->next());
++reg_def F22         ( SOC, SOC, Op_RegF, 22, f22->as_VMReg());
++reg_def F22_H       ( SOC, SOC, Op_RegF, 22, f22->as_VMReg()->next());
++reg_def F23         ( SOC, SOC, Op_RegF, 23, f23->as_VMReg());
++reg_def F23_H       ( SOC, SOC, Op_RegF, 23, f23->as_VMReg()->next());
++reg_def F24         ( SOC, SOC, Op_RegF, 24, f24->as_VMReg());
++reg_def F24_H       ( SOC, SOC, Op_RegF, 24, f24->as_VMReg()->next());
++reg_def F25         ( SOC, SOC, Op_RegF, 25, f25->as_VMReg());
++reg_def F25_H       ( SOC, SOC, Op_RegF, 25, f25->as_VMReg()->next());
++reg_def F26         ( SOC, SOC, Op_RegF, 26, f26->as_VMReg());
++reg_def F26_H       ( SOC, SOC, Op_RegF, 26, f26->as_VMReg()->next());
++reg_def F27         ( SOC, SOC, Op_RegF, 27, f27->as_VMReg());
++reg_def F27_H       ( SOC, SOC, Op_RegF, 27, f27->as_VMReg()->next());
++reg_def F28         ( SOC, SOC, Op_RegF, 28, f28->as_VMReg());
++reg_def F28_H       ( SOC, SOC, Op_RegF, 28, f28->as_VMReg()->next());
++reg_def F29         ( SOC, SOC, Op_RegF, 29, f29->as_VMReg());
++reg_def F29_H       ( SOC, SOC, Op_RegF, 29, f29->as_VMReg()->next());
++reg_def F30         ( SOC, SOC, Op_RegF, 30, f30->as_VMReg());
++reg_def F30_H       ( SOC, SOC, Op_RegF, 30, f30->as_VMReg()->next());
++reg_def F31         ( SOC, SOC, Op_RegF, 31, f31->as_VMReg());
++reg_def F31_H       ( SOC, SOC, Op_RegF, 31, f31->as_VMReg()->next());
++
++
++// ----------------------------
++// Special Registers
++// Condition Codes Flag Registers
++// swjdk11 flag reg is GP
++reg_def SW64_FLAG (SOC, SOC,  Op_RegFlags, 29, as_Register(29)->as_VMReg());
++
++//S2 is used for get_thread(S2)
++//S5 is uesd for heapbase of compressed oop
++alloc_class chunk0(
++                     S0, S0_H,
++                     S1, S1_H,
++                     S3, S3_H,
++                     S4, S4_H,
++                     S5, S5_H,
++                     S2, S2_H,
++                     T2, T2_H,
++                     T3, T3_H,
++                     //T11, T11_H, jzy use rscratch3
++                     T12, T12_H,
++                     T1, T1_H, // inline_cache_reg
++                     A5, A5_H,
++                     A4, A4_H,
++                     V0, V0_H,
++                     A3, A3_H,
++                     A2, A2_H,
++                     A1, A1_H,
++                     A0, A0_H,
++                     T0, T0_H,
++                     T4, T4_H,
++                     T5, T5_H,
++                     T6, T6_H,
++                     T7, T7_H,
++                     T8, T8_H,
++                     T9, T9_H,
++                     T10, T10_H,
++                     GP, GP_H,
++                     RA, RA_H,
++                     AT, AT_H,
++                     SP, SP_H, // stack_pointer
++                     FP, FP_H  // frame_pointer
++                 );
++
++alloc_class chunk1(  F0, F0_H,
++                     F1, F1_H,
++                     F2, F2_H,
++                     F3, F3_H,
++                     F4, F4_H,
++                     F5, F5_H,
++                     F6, F6_H,
++                     F7, F7_H,
++                     F8, F8_H,
++                     F9, F9_H,
++                     F10, F10_H,
++                     F11, F11_H,
++                     F20, F20_H,
++                     F21, F21_H,
++                     F22, F22_H,
++                     F23, F23_H,
++                     F24, F24_H,
++                     F25, F25_H,
++                     F26, F26_H,
++                     F27, F27_H,
++                     F28, F28_H,
++                     F19, F19_H,
++                     F18, F18_H,
++                     F17, F17_H,
++                     F16, F16_H,
++                     F15, F15_H,
++                     F14, F14_H,
++                     F13, F13_H,
++                     F12, F12_H,
++                     F29, F29_H,
++                     F30, F30_H,
++                     F31, F31_H);
++
++alloc_class chunk2(SW64_FLAG);
++
++reg_class s_reg( S0, S1, S2, S3, S4, S5 );
++reg_class s0_reg( S0 );
++reg_class s1_reg( S1 );
++reg_class s2_reg( S2 );
++reg_class s3_reg( S3 );
++reg_class s4_reg( S4 );
++reg_class s5_reg( S5 );
++
++//reg_class t_reg( T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T11, T12 ); //jzy
++reg_class t_reg( T0, T1, T2, T3, T4, T5, T6, T7, T8, T9, T10, T12 );
++reg_class t0_reg( T0 );
++reg_class t1_reg( T1 );
++reg_class t2_reg( T2 );
++reg_class t3_reg( T3 );
++reg_class t4_reg( T4 );
++reg_class t5_reg( T5 );
++reg_class t6_reg( T6 );
++reg_class t7_reg( T7 );
++reg_class t8_reg( T8 );
++reg_class t9_reg( T9 );
++reg_class t10_reg( T10 );
++//reg_class t11_reg( T11 );
++reg_class t12_reg( T12 );
++
++reg_class a_reg( A0, A1, A2, A3, A4, A5 );
++reg_class a0_reg( A0 );
++reg_class a1_reg( A1 );
++reg_class a2_reg( A2 );
++reg_class a3_reg( A3 );
++reg_class a4_reg( A4 );
++reg_class a5_reg( A5 );
++
++reg_class v0_reg( V0 );
++
++reg_class sp_reg( SP, SP_H );
++reg_class fp_reg( FP, FP_H );
++
++reg_class sw64_flags(SW64_FLAG);
++
++reg_class v0_long_reg( V0, V0_H );
++
++reg_class t0_long_reg( T0, T0_H );
++reg_class t1_long_reg( T1, T1_H );
++reg_class t2_long_reg( T2, T2_H );
++reg_class t3_long_reg( T3, T3_H );
++reg_class t4_long_reg( T4, T4_H );
++reg_class t5_long_reg( T5, T5_H );
++reg_class t6_long_reg( T6, T6_H );
++reg_class t7_long_reg( T7, T7_H );
++reg_class t8_long_reg( T8, T8_H );
++reg_class t9_long_reg( T9, T9_H );
++reg_class t10_long_reg( T10, T10_H );
++//reg_class t11_long_reg( T11, T11_H ); jzy
++reg_class t12_long_reg( T12, T12_H );
++
++reg_class a0_long_reg( A0, A0_H );
++reg_class a1_long_reg( A1, A1_H );
++reg_class a2_long_reg( A2, A2_H );
++reg_class a3_long_reg( A3, A3_H );
++reg_class a4_long_reg( A4, A4_H );
++reg_class a5_long_reg( A5, A5_H );
++
++reg_class s0_long_reg( S0, S0_H );
++reg_class s1_long_reg( S1, S1_H );
++reg_class s2_long_reg( S2, S2_H );
++reg_class s3_long_reg( S3, S3_H );
++reg_class s4_long_reg( S4, S4_H );
++reg_class s5_long_reg( S5, S5_H );
++
++//TODO:order is OK? jzy
++//TODO:no S2 & S5 jzy 
++//NO T12?
++//reg_class int_reg( S1, S0, S4, S3, T11, T2, T3, T1, A5, A4, V0, A3, A2, A1, A0, T0, T4, T5, T6, T7, T8, T9, T10 ); jzy
++reg_class int_reg( S1, S0, S4, S3, T2, T3, T1, A5, A4, V0, A3, A2, A1, A0, T0, T4, T5, T6, T7, T8, T9, T10 );
++//TODO:no S2 & S5 jzy
++//NO T12?
++//reg_class no_Ax_int_reg( S1, S0, S4, S3, T11, T2, T3, T1, V0, T0, T4, T5, T6, T7, T8, T9, T10 ); jzy
++reg_class no_Ax_int_reg( S1, S0, S4, S3, T2, T3, T1, V0, T0, T4, T5, T6, T7, T8, T9, T10 );
++//TODO: no S2 & S5 
++reg_class any_reg(                  //  without FP
++                S1, S1_H,
++                S0, S0_H,
++                S4, S4_H,
++                S3, S3_H,
++                T11, T11_H,
++                T2, T2_H,
++                T3, T3_H,
++                T1, T1_H,
++                A5, A5_H,
++                A4, A4_H,
++                A3, A3_H,
++                A2, A2_H,
++                A1, A1_H,
++                A0, A0_H,
++                T0, T0_H,
++                T4, T4_H,
++                T5, T5_H,
++                T6, T6_H,
++                T7, T7_H,
++                T8, T8_H,
++                T9, T9_H,
++                T10, T10_H,
++                S2, S2_H,           //  TLS thread
++                S5, S5_H,           //  heapbase
++                V0, V0_H,
++                RA, RA_H,
++                T12, T12_H,
++                AT, AT_H,
++                GP, GP_H,
++                SP, SP_H,
++);
++reg_class ptr_reg(
++                 S1, S1_H,
++                 S0, S0_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 //T11, T11_H, jzy
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H,
++                 T4, T4_H,
++                 T5, T5_H,
++                 T6, T6_H,
++                 T7, T7_H,
++                 T8, T8_H,
++                 T9, T9_H,
++                 T10, T10_H,
++                 V0, V0_H
++               );
++//TODO:who no T11? what perpose of T11? jzy
++reg_class no_T11_p_reg(
++                 S1, S1_H,
++                 S0, S0_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H,
++                 T4, T4_H,
++                 T5, T5_H,
++                 T6, T6_H,
++                 T7, T7_H,
++                 T8, T8_H,
++                 T9, T9_H,
++                 T10, T10_H,
++                 V0, V0_H
++               );
++
++reg_class long_reg(
++                 S1, S1_H,
++                 S0, S0_H,
++                 S4, S4_H,
++                 S3, S3_H,
++                 //T11, T11_H, jzy
++                 T2, T2_H,
++                 T3, T3_H,
++                 T1, T1_H,
++                 A5, A5_H,
++                 A4, A4_H,
++                 A3, A3_H,
++                 A2, A2_H,
++                 A1, A1_H,
++                 A0, A0_H,
++                 T0, T0_H,
++                 T4, T4_H,
++                 T5, T5_H,
++                 T6, T6_H,
++                 T7, T7_H,
++                 T8, T8_H,
++                 T9, T9_H,
++                 T10, T10_H,
++                 V0, V0_H
++                );
++
++
++// Floating point registers.
++//2017/9/6 zyh: F28&F29 are used as temporary registers in float cmp instructs
++reg_class flt_reg( F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13, F14, F15, F16, F17 F18, F19, F20, F21, F22, F23, F24, F25, F26, F27);
++reg_class dbl_reg( F0, F0_H,
++                   F1, F1_H,
++                   F2, F2_H,
++                   F3, F3_H,
++                   F4, F4_H,
++                   F5, F5_H,
++                   F6, F6_H,
++                   F7, F7_H,
++                   F8, F8_H,
++                   F9, F9_H,
++                   F10, F10_H,
++                   F11, F11_H,
++                   F12, F12_H,
++                   F13, F13_H,
++                   F14, F14_H,
++                   F15, F15_H,
++                   F16, F16_H,
++                   F17, F17_H,
++                   F18, F18_H,
++                   F19, F19_H,
++                   F20, F20_H,
++                   F21, F21_H,
++                   F22, F22_H,
++                   F23, F23_H,
++                   F24, F24_H,
++                   F25, F25_H,
++                   F26, F26_H,
++                   F27, F27_H,
++//                   F28, F28_H,
++//                   F29, F29_H
++        );
++
++reg_class flt_arg0( F16 );
++reg_class dbl_arg0( F16, F16_H );
++reg_class dbl_arg1( F17, F17_H );
++reg_class dbl_tmp_f27( F27, F27_H );
++reg_class dbl_tmp_f28( F28, F28_H );
++reg_class dbl_tmp_f29( F29, F29_H );
++reg_class dbl_tmp_f30( F30, F30_H );
++
++%}
++
++//----------DEFINITION BLOCK---------------------------------------------------
++// Define name --> value mappings to inform the ADLC of an integer valued name
++// Current support includes integer values in the range [0, 0x7FFFFFFF]
++// Format:
++//        int_def  <name>         ( <int_value>, <expression>);
++// Generated Code in ad_<arch>.hpp
++//        #define  <name>   (<expression>)
++//        // value == <int_value>
++// Generated code in ad_<arch>.cpp adlc_verification()
++//        assert( <name> == <int_value>, "Expect (<expression>) to equal <int_value>");
++//
++definitions %{
++  //int_def DEFAULT_COST      (    100,     100);
++  int_def HUGE_COST         (1000000, 1000000);
++  int_def INSN_COST            (    100,     100);
++  
++  // Memory refs are twice as expensive as run-of-the-mill.
++  int_def MEMORY_REF_COST      (    200, INSN_COST * 2);
++  // Branches are even more expensive.
++  int_def BRANCH_COST          (    300, INSN_COST * 3);
++  // we use jr instruction to construct call, so more expensive
++  int_def CALL_COST            (    500, INSN_COST * 5);
++  int_def VOLATILE_REF_COST    (   1000, INSN_COST * 10); //not in 8?? CHECK djx
++%}
++
++
++//----------SOURCE BLOCK-------------------------------------------------------
++// This is a block of C++ code which provides values, functions, and
++// definitions necessary in the rest of the architecture description
++
++source_hpp %{
++// Header information of the source block.
++// Method declarations/definitions which are used outside
++// the ad-scope can conveniently be defined here.
++//
++// To keep related declarations/definitions/uses close together,
++// we switch between source %{ }% and source_hpp %{ }% freely as needed.
++
++#if INCLUDE_ZGC
++#include "gc/z/zBarrierSetAssembler.hpp"
++#endif
++#include "opto/machnode.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/cardTable.hpp"
++#include "gc/shared/cardTableBarrierSet.hpp"
++#include "gc/shared/collectedHeap.hpp"
++#include "opto/addnode.hpp"
++    
++class NativeJump;
++
++class CallStubImpl {
++
++  //--------------------------------------------------------------
++  //---<  Used for optimization in Compile::shorten_branches  >---
++  //--------------------------------------------------------------
++
++ public:
++  // Size of call trampoline stub.
++  static uint size_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++
++  // number of relocations needed by a call trampoline stub
++  static uint reloc_call_trampoline() {
++    return 0; // no call trampolines on this platform
++  }
++};
++
++class HandlerImpl {
++
++ public:
++
++  static int emit_exception_handler(CodeBuffer &cbuf);
++  static int emit_deopt_handler(CodeBuffer& cbuf);
++
++  static uint size_exception_handler() {
++    // NativeCall instruction size is the same as NativeJump.
++    // exception handler starts out as jump and can be patched to
++    // a call be deoptimization.  (4932387)
++    // Note that this value is also credited (in output.cpp) to
++    // the size of the code section.
++    int size =  NativeJump::instruction_size;
++//    int size = NativeCall::instruction_size;
++    return align_up(size, 16);//need align_up? jzy
++  }
++
++  static uint size_deopt_handler() {
++    int size = NativeCall::instruction_size; // BytesPerInstWord; //li48(4) + call(1) 
++    return align_up(size, 16);//need align_up? jzy
++  }
++  
++};
++
++  bool is_CAS(int opcode);
++  bool unnecessary_release(const Node *barrier);
++  // predicate controlling translation of StoreCM
++  bool unnecessary_storestore(const Node *storecm);
++
++%} // end source_hpp
++
++source %{
++
++
++#define __ _masm.
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++  // is_CAS(int opcode)
++  //
++  // return true if opcode is one of the possible CompareAndSwapX
++  // values otherwise false.
++
++bool is_CAS(int opcode)
++{
++  switch(opcode) {
++  // We handle these
++  case Op_CompareAndSwapI:
++  case Op_CompareAndSwapL:
++  case Op_CompareAndSwapP:
++  case Op_CompareAndSwapN:
++  case Op_GetAndSetI:
++  case Op_GetAndSetL:
++  case Op_GetAndSetP:
++  case Op_GetAndSetN:
++  case Op_GetAndAddI:
++  case Op_GetAndAddL:
++    return true;
++  default:
++    return false;
++  }
++}
++
++bool unnecessary_release(const Node *n)
++{
++  assert((n->is_MemBar() &&
++	  n->Opcode() == Op_MemBarRelease),
++	 "expecting a release membar");
++
++  MemBarNode *barrier = n->as_MemBar();
++
++  if (!barrier->leading()) {
++    return false;
++  } else {
++    Node* trailing = barrier->trailing_membar();
++    MemBarNode* trailing_mb = trailing->as_MemBar();
++    assert(trailing_mb->trailing(), "Not a trailing membar?");
++    assert(trailing_mb->leading_membar() == n, "inconsistent leading/trailing membars");
++
++    Node* mem = trailing_mb->in(MemBarNode::Precedent);
++    if (!mem->is_Store()) {
++      assert(mem->is_LoadStore(), "");
++      assert(trailing_mb->Opcode() == Op_MemBarAcquire, "");
++      return is_CAS(mem->Opcode());
++    }
++  }
++
++  return false;
++}
++
++bool unnecessary_storestore(const Node *storecm)
++{
++    assert(storecm->Opcode()  == Op_StoreCM, "expecting a StoreCM");
++
++    // we need to generate a dmb ishst between an object put and the
++    // associated card mark when we are using CMS without conditional
++    // card marking
++
++    if (UseConcMarkSweepGC && !UseCondCardMark) {
++        return false;
++    }
++
++    // a storestore is unnecesary in all other cases
++
++    return true;
++}
++
++// Emit exception handler code.
++// Stuff framesize into a register and call a VM stub routine.
++  int HandlerImpl::emit_exception_handler(CodeBuffer & cbuf)  {
++    // Note that the code buffer's insts_mark is always relative to insts.
++    // That's why we must use the macroassembler to generate a handler.
++    MacroAssembler _masm(&cbuf);
++    address base = __ start_a_stub(size_exception_handler());
++    if (base == NULL) {
++      ciEnv::current()->record_failure("CodeCache is full");
++      return 0; // CodeBuffer::expand failed
++    }
++
++    int offset = __ offset();
++
++    __ block_comment("; emit_exception_handler");
++
++    //cbuf.set_insts_mark();
++    //__ relocate(relocInfo::runtime_call_type);
++    //__ patchable_jump((address)(OptoRuntime::exception_blob()->entry_point()));
++    __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
++    __ align(16);
++    assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
++    __ end_a_stub();
++    return offset;
++    //    return 0;
++  }
++
++// Emit deopt handler code.
++int HandlerImpl::emit_deopt_handler(CodeBuffer& cbuf) {
++  
++  // Note that the code buffer's insts_mark is always relative to insts.
++  // That's why we must use the macroassembler to generate a handler.
++  MacroAssembler _masm(&cbuf);
++  address base = __ start_a_stub(size_deopt_handler());
++  if (base == NULL) {
++    ciEnv::current()->record_failure("CodeCache is full");
++    return 0;  // CodeBuffer::expand failed
++  }
++  //__ stop("TODO:not check jzy(emit_deopt_handler)");
++  int offset = __ offset();
++  address the_pc = (address) __ pc();
++  __ block_comment("; emit_deopt_handler");
++  if(UseAddpi){
++    __ addpi(-1, RA);
++  }else{
++    __ br(RA, 0);
++    __ subptr(RA, BytesPerInstWord, RA);//point to the begin of deopt
++  }
++  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
++  __ align(16);
++  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
++  __ end_a_stub();
++  return offset;
++}
++
++
++//=============================================================================
++
++/*
++  // Float masks come from different places depending on platform.
++#ifdef _LP64
++  static address float_signmask()  { return StubRoutines::x86::float_sign_mask(); }
++  static address float_signflip()  { return StubRoutines::x86::float_sign_flip(); }
++  static address double_signmask() { return StubRoutines::x86::double_sign_mask(); }
++  static address double_signflip() { return StubRoutines::x86::double_sign_flip(); }
++#else
++  static address float_signmask()  { return (address)float_signmask_pool; }
++  static address float_signflip()  { return (address)float_signflip_pool; }
++  static address double_signmask() { return (address)double_signmask_pool; }
++  static address double_signflip() { return (address)double_signflip_pool; }
++#endif
++*/
++
++
++const bool Matcher::match_rule_supported(int opcode) {
++  if (!has_match_rule(opcode))
++    return false;
++
++  switch (opcode) {
++    //Op_CountLeadingZerosI Op_CountLeadingZerosL can be deleted, all MIPS CPUs support clz & dclz.
++    case Op_CountLeadingZerosI:
++    case Op_CountLeadingZerosL:
++      if (!UseCountLeadingZerosInstruction)
++        return false;
++      break;
++    case Op_CountTrailingZerosI:
++    case Op_CountTrailingZerosL:
++      if (!UseCountTrailingZerosInstruction)
++        return false;
++      break;
++  }
++
++  return true;  // Per default match rules are supported.
++}
++
++ const bool Matcher::match_rule_supported_vector(int opcode, int vlen) {
++  // identify extra cases that we might want to provide match rules for
++  // e.g. Op_ vector nodes and other intrinsics while guarding with vlen
++  bool ret_value = match_rule_supported(opcode);
++  /*if (ret_value) {
++    switch (opcode) {
++      case Op_AddVB:
++      case Op_SubVB:
++        if ((vlen == 64) && (VM_Version::supports_avx512bw() == false))
++          ret_value = false;
++        break;
++      case Op_URShiftVS:
++      case Op_RShiftVS:
++      case Op_LShiftVS:
++      case Op_MulVS:
++      case Op_AddVS:
++      case Op_SubVS:
++        if ((vlen == 32) && (VM_Version::supports_avx512bw() == false))
++          ret_value = false;
++        break;
++      case Op_CMoveVF:
++        if (vlen != 8)
++          ret_value  = false;
++        break;
++      case Op_CMoveVD:
++        if (vlen != 4)
++          ret_value  = false;
++        break;
++    } 
++  }*/
++    
++      return ret_value;  // Per default match rules are supported.
++}
++
++const bool Matcher::has_predicated_vectors(void) {
++  bool ret_value = false;
++  /*if (UseAVX > 2) {
++    ret_value = VM_Version::supports_avx512vl();
++  }*/
++
++  return ret_value;
++}
++
++const int Matcher::float_pressure(int default_pressure_threshold) {
++  int float_pressure_threshold = default_pressure_threshold;
++  /*
++#ifdef _LP64
++  if (UseAVX > 2) {
++    // Increase pressure threshold on machines with AVX3 which have
++    // 2x more XMM registers.
++    float_pressure_threshold = default_pressure_threshold * 2;
++  }
++#endif
++  */
++  return float_pressure_threshold;
++}
++  
++// Max vector size in bytes. 0 if not supported.
++const int Matcher::vector_width_in_bytes(BasicType bt) {
++  /*assert(is_java_primitive(bt), "only primitive type vectors");
++  if (UseSSE < 2) return 0;
++  // SSE2 supports 128bit vectors for all types.
++  // AVX2 supports 256bit vectors for all types.
++  // AVX2/EVEX supports 512bit vectors for all types.
++  int size = (UseAVX > 1) ? (1 << UseAVX) * 8 : 16;
++  // AVX1 supports 256bit vectors only for FLOAT and DOUBLE.
++  if (UseAVX > 0 && (bt == T_FLOAT || bt == T_DOUBLE))
++    size = (UseAVX > 2) ? 64 : 32;
++  if (UseAVX > 2 && (bt == T_BYTE || bt == T_SHORT || bt == T_CHAR))
++    size = (VM_Version::supports_avx512bw()) ? 64 : 32;
++  // Use flag to limit vector size.
++  size = MIN2(size,(int)MaxVectorSize);
++  // Minimum 2 values in vector (or 4 for bytes).
++  switch (bt) {
++  case T_DOUBLE:
++  case T_LONG:
++    if (size < 16) return 0;
++    break;
++  case T_FLOAT:
++  case T_INT:
++    if (size < 8) return 0;
++    break;
++  case T_BOOLEAN:
++    if (size < 4) return 0;
++    break;
++  case T_CHAR:
++    if (size < 4) return 0;
++    break;
++  case T_BYTE:
++    if (size < 4) return 0;
++    break;
++  case T_SHORT:
++    if (size < 4) return 0;
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++  return size;*/
++  return 0;
++}
++
++// Limits on vector size (number of elements) loaded into vector.
++const int Matcher::max_vector_size(const BasicType bt) {
++  assert(is_java_primitive(bt), "only primitive type vectors");
++  return vector_width_in_bytes(bt)/type2aelembytes(bt);
++}
++
++const int Matcher::min_vector_size(const BasicType bt) {
++  return max_vector_size(bt); // Same as max.
++}
++
++// Vector ideal reg
++const uint Matcher::vector_ideal_reg(int size) {
++  assert(MaxVectorSize == 8, "");
++  switch(size) {
++    case  8: return Op_VecD;
++  }
++  ShouldNotReachHere();
++  return 0;
++}
++
++// Only lowest bits of xmm reg are used for vector shift count.
++const uint Matcher::vector_shift_count_ideal_reg(int size) {
++  fatal("vector shift is not supported");
++  return Node::NotAMachineReg;
++}
++
++// SW64 supports misaligned vectors store/load? FIXME
++const bool Matcher::misaligned_vectors_ok() {
++  return false;
++  //return !AlignVector; // can be changed by flag
++}
++
++// SW64 doesn't support AES intrinsics
++const bool Matcher::pass_original_key_for_aes() {
++  return false;
++}
++
++
++const bool Matcher::convi2l_type_required = true;
++
++// Should the Matcher clone shifts on addressing modes, expecting them
++// to be subsumed into complex addressing expressions or compute them
++// into registers?
++bool Matcher::clone_address_expressions(AddPNode* m, Matcher::MStack& mstack, VectorSet& address_visited) {
++  return clone_base_plus_offset_address(m, mstack, address_visited);
++}
++
++void Compile::reshape_address(AddPNode* addp) {
++}
++/*
++// Helper methods for MachSpillCopyNode::implementation().
++static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
++                          int src_hi, int dst_hi, uint ireg, outputStream* st) {
++  // In 64-bit VM size calculation is very complex. Emitting instructions
++  // into scratch buffer is used to get size in 64-bit VM.
++  /*
++  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
++  assert(ireg == Op_VecS || // 32bit vector
++         (src_lo & 1) == 0 && (src_lo + 1) == src_hi &&
++         (dst_lo & 1) == 0 && (dst_lo + 1) == dst_hi,
++         "no non-adjacent vector moves" );
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    switch (ireg) {
++    case Op_VecS: // copy whole register
++    case Op_VecD:
++    case Op_VecX:
++#ifndef _LP64
++      __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
++#else
++      if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
++        __ movdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
++      } else {
++        __ vextractf32x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
++     }
++#endif
++      break;
++    case Op_VecY:
++#ifndef _LP64
++      __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
++#else
++      if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
++        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]));
++      } else {
++        __ vextractf64x4(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 0x0);
++     }
++#endif
++      break;
++    case Op_VecZ:
++      __ evmovdquq(as_XMMRegister(Matcher::_regEncode[dst_lo]), as_XMMRegister(Matcher::_regEncode[src_lo]), 2);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++    int size = __ offset() - offset;
++#ifdef ASSERT
++    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
++    assert(!do_size || size == 4, "incorrect size calculattion");
++#endif
++    return size;
++#ifndef PRODUCT
++  } else if (!do_size) {
++    switch (ireg) {
++    case Op_VecS:
++    case Op_VecD:
++    case Op_VecX:
++      st->print("movdqu  %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
++      break;
++    case Op_VecY:
++    case Op_VecZ:
++      st->print("vmovdqu %s,%s\t# spill",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++#endif
++  }
++  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
++  return (UseAVX > 2) ? 6 : 4;
++ 
++          return 0;
++}
++
++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
++                            int stack_offset, int reg, uint ireg, outputStream* st) {
++  // In 64-bit VM size calculation is very complex. Emitting instructions
++  // into scratch buffer is used to get size in 64-bit VM.
++  /*
++  LP64_ONLY( assert(!do_size, "this method calculates size only for 32-bit VM"); )
++  if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    int offset = __ offset();
++    if (is_load) {
++      switch (ireg) {
++      case Op_VecS:
++        __ movdl(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
++        break;
++      case Op_VecD:
++        __ movq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
++        break;
++      case Op_VecX:
++#ifndef _LP64
++        __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
++#else
++        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
++          __ movdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
++        } else {
++          __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
++          __ vinsertf32x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
++        }
++#endif
++        break;
++      case Op_VecY:
++#ifndef _LP64
++        __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
++#else
++        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
++          __ vmovdqu(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset));
++        } else {
++          __ vpxor(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), 2);
++          __ vinsertf64x4(as_XMMRegister(Matcher::_regEncode[reg]), as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset),0x0);
++        }
++#endif
++        break;
++      case Op_VecZ:
++        __ evmovdquq(as_XMMRegister(Matcher::_regEncode[reg]), Address(rsp, stack_offset), 2);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++      case Op_VecS:
++        __ movdl(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
++        break;
++      case Op_VecD:
++        __ movq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
++        break;
++      case Op_VecX:
++#ifndef _LP64
++        __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
++#else
++        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
++          __ movdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
++        }
++        else {
++          __ vextractf32x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
++        }
++#endif
++        break;
++      case Op_VecY:
++#ifndef _LP64
++        __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
++#else
++        if ((UseAVX < 3) || VM_Version::supports_avx512vl()) {
++          __ vmovdqu(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]));
++        }
++        else {
++          __ vextractf64x4(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 0x0);
++        }
++#endif
++        break;
++      case Op_VecZ:
++        __ evmovdquq(Address(rsp, stack_offset), as_XMMRegister(Matcher::_regEncode[reg]), 2);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    }
++    int size = __ offset() - offset;
++#ifdef ASSERT
++    int offset_size = (stack_offset == 0) ? 0 : ((stack_offset < 0x80) ? 1 : (UseAVX > 2) ? 6 : 4);
++    // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
++    assert(!do_size || size == (5+offset_size), "incorrect size calculattion");
++#endif
++    return size;
++#ifndef PRODUCT
++  } else if (!do_size) {
++    if (is_load) {
++      switch (ireg) {
++      case Op_VecS:
++        st->print("movd    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
++        break;
++      case Op_VecD:
++        st->print("movq    %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
++        break;
++       case Op_VecX:
++        st->print("movdqu  %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
++        break;
++      case Op_VecY:
++      case Op_VecZ:
++        st->print("vmovdqu %s,[rsp + %d]\t# spill", Matcher::regName[reg], stack_offset);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    } else { // store
++      switch (ireg) {
++      case Op_VecS:
++        st->print("movd    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
++        break;
++      case Op_VecD:
++        st->print("movq    [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
++        break;
++       case Op_VecX:
++        st->print("movdqu  [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
++        break;
++      case Op_VecY:
++      case Op_VecZ:
++        st->print("vmovdqu [rsp + %d],%s\t# spill", stack_offset, Matcher::regName[reg]);
++        break;
++      default:
++        ShouldNotReachHere();
++      }
++    }
++#endif
++  }
++  bool is_single_byte = false;
++  int vec_len = 0;
++  if ((UseAVX > 2) && (stack_offset != 0)) {
++    int tuple_type = Assembler::EVEX_FVM;
++    int input_size = Assembler::EVEX_32bit;
++    switch (ireg) {
++    case Op_VecS:
++      tuple_type = Assembler::EVEX_T1S;
++      break;
++    case Op_VecD:
++      tuple_type = Assembler::EVEX_T1S;
++      input_size = Assembler::EVEX_64bit;
++      break;
++    case Op_VecX:
++      break;
++    case Op_VecY:
++      vec_len = 1;
++      break;
++    case Op_VecZ:
++      vec_len = 2;
++      break;
++    }
++    is_single_byte = Assembler::query_compressed_disp_byte(stack_offset, true, vec_len, tuple_type, input_size, 0);
++  }
++  int offset_size = 0;
++  int size = 5;
++  if (UseAVX > 2 ) {
++    if (VM_Version::supports_avx512novl() && (vec_len == 2)) {
++      offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
++      size += 2; // Need an additional two bytes for EVEX encoding
++    } else if (VM_Version::supports_avx512novl() && (vec_len < 2)) {
++      offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
++    } else {
++      offset_size = (stack_offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
++      size += 2; // Need an additional two bytes for EVEX encodding
++    }
++  } else {
++    offset_size = (stack_offset == 0) ? 0 : ((stack_offset <= 127) ? 1 : 4);
++  }
++  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
++  return size+offset_size;
++  return 0;
++}
++*/
++static inline jint replicate4_imm(int con, int width) {
++  // Load a constant of "width" (in bytes) and replicate it to fill 32bit.
++  assert(width == 1 || width == 2, "only byte or short types here");
++  int bit_width = width * 8;
++  jint val = con;
++  val &= (1 << bit_width) - 1;  // mask off sign bits
++  while(bit_width < 32) {
++    val |= (val << bit_width);
++    bit_width <<= 1;
++  }
++  return val;
++}
++
++static inline jlong replicate8_imm(int con, int width) {
++  // Load a constant of "width" (in bytes) and replicate it to fill 64bit.
++  assert(width == 1 || width == 2 || width == 4, "only byte, short or int types here");
++  int bit_width = width * 8;
++  jlong val = con;
++  val &= (((jlong) 1) << bit_width) - 1;  // mask off sign bits
++  while(bit_width < 64) {
++    val |= (val << bit_width);
++    bit_width <<= 1;
++  }
++  return val;
++}
++//=============================================================================
++#ifndef PRODUCT
++void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
++  st->print("NOP \t# %d bytes pad for loops and calls", 4 * _count);
++}
++#endif
++
++void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
++  MacroAssembler _masm(&cbuf);
++  int i = 0;
++  for(i = 0; i < _count; i++)
++     __ nop();
++}
++
++uint MachNopNode::size(PhaseRegAlloc *) const {
++  return 4 * _count;
++}
++
++const Pipeline* MachNopNode::pipeline() const {
++  return MachNode::pipeline_class();
++}
++
++#ifndef PRODUCT
++  void MachBreakpointNode::format(PhaseRegAlloc*, outputStream* st) const {
++    st->print("# breakpoint");
++  }
++#endif
++
++  void MachBreakpointNode::emit(CodeBuffer &cbuf, PhaseRegAlloc* ra_) const {
++    MacroAssembler _masm(&cbuf);
++    //__ stop("breakpoint! ");// stop is ok ?? lsp
++    __ block_comment("execute breakpoint");
++    __ int3();
++  }
++
++  uint MachBreakpointNode::size(PhaseRegAlloc* ra_) const {
++    return MachNode::size(ra_);
++  }
++ 
++#define   RELOC_IMM64    Assembler::imm_operand
++#define   RELOC_DISP32   Assembler::disp32_operand
++/*
++#define __ _masm.
++
++static bool generate_vzeroupper(Compile* C) {
++  return (VM_Version::supports_vzeroupper() && (C->max_vector_size() > 16 || C->clear_upper_avx() == true)) ? true: false;  // Generate vzeroupper
++}
++
++static int clear_avx_size() {
++  return generate_vzeroupper(Compile::current()) ? 3: 0;  // vzeroupper
++}*/
++
++// !!!!! Special hack to get all types of calls to specify the byte offset
++//       from the start of the call to the point where the return address
++//       will point.
++int MachCallStaticJavaNode::ret_addr_offset()
++{
++//  warning("TODO:MachCallStaticJavaNode::ret_addr_offset(), check lsp");
++  if (SafePatch) {
++    assert(NativeCall::instruction_size == 24, "in MachCallStaticJavaNode::ret_addr_offset");
++  } else {
++    assert(NativeCall::instruction_size == 20, "in MachCallStaticJavaNode::ret_addr_offset");  // don't consider setfpec1
++  }
++  return NativeCall::instruction_size;
++}
++
++int MachCallDynamicJavaNode::ret_addr_offset()
++{
++  //TODO:warning("TODO:MachCallDynamicJavaNode::ret_addr_offset(), check lsp");
++  //ldi IC_Klass,
++  //sll IC_Klass,
++  //ldih IC_Klass
++  //ldi IC_Klass  // refer to MacroAssembler::ic_call(address entry)
++
++  //ldi T12
++  //sll T12
++  //ldih T12
++  //ldi T12
++  //call T12
++  //nop
++  if (SafePatch) {
++    assert(NativeCall::instruction_size == 24, "in MachCallStaticJavaNode::ret_addr_offset");
++  } else {
++    assert(NativeCall::instruction_size == 20, "in MachCallStaticJavaNode::ret_addr_offset");  // don't consider setfpec1
++  }
++  return 4 * BytesPerInstWord + NativeCall::instruction_size;
++}
++
++int MachCallRuntimeNode::ret_addr_offset() {
++  if (SafePatch) {
++    assert(NativeCall::instruction_size == 24, "in MachCallRuntimeNode::ret_addr_offset()");
++  } else {
++//  warning("TODO:MachCallRuntimeNode::ret_addr_offset(), check lsp");// need adjust for enc_class Java_To_Runtime ? lsp
++    assert(NativeCall::instruction_size == 20, "in MachCallRuntimeNode::ret_addr_offset()");
++  }
++  return 4 * BytesPerInstWord + NativeCall::instruction_size;  // don't consider setfpec1
++}
++
++// Indicate if the safepoint node needs the polling page as an input.
++// Since SW64 doesn't have absolute addressing, it needs.
++bool SafePointNode::needs_polling_address_input()
++{
++  //TODO:warning("TODO:SafePointNode::needs_polling_address_input(), check lsp");  
++  return true;
++//  return SafepointMechanism::uses_thread_local_poll() || Assembler::is_polling_page_far();
++}
++
++//
++// Compute padding required for nodes which need alignment
++//
++
++// no use in sw8!! CHECK djx 
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallStaticJavaDirectNode::compute_padding(int current_offset) const
++{
++//  warning("TODO:CallStaticJavaDirectNode::compute_padding, check lsp");
++  return align_up(current_offset, alignment_required()) - current_offset;
++//  return 0;
++}
++
++// The address of the call instruction needs to be 4-byte aligned to
++// ensure that it does not span a cache line so that it can be patched.
++int CallDynamicJavaDirectNode::compute_padding(int current_offset) const
++{
++//  warning("TODO:CallDynamicJavaDirectNode::compute_padding, check lsp");    
++    current_offset += 4 * BytesPerInstWord; //skip li48
++    return align_up(current_offset, alignment_required()) - current_offset;
++//  return 0;
++}
++
++//swjdk8 has it  use for CallLeafNoFPDirect ins_alignment(16)   lsp
++//int CallLeafNoFPDirectNode::compute_padding(int current_offset) const {
++//  return round_to(current_offset, alignment_required()) - current_offset;
++//}
++//
++//use for CallRuntimeDirect  ins_alignment(16)
++//int CallLeafDirectNode::compute_padding(int current_offset) const {
++//  return round_to(current_offset, alignment_required()) - current_offset;
++//}
++//
++// use for CallRuntimeDirect  ins_alignment(16)
++//int CallRuntimeDirectNode::compute_padding(int current_offset) const {
++//  return round_to(current_offset, alignment_required()) - current_offset;
++//}
++
++//=============================================================================
++const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
++
++int Compile::ConstantTable::calculate_table_base_offset() const {
++  return 0;  // absolute addressing, no offset
++}
++
++bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
++void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
++  ShouldNotReachHere();
++}
++
++void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
++  // Empty encoding
++  }
++
++uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
++  return 0;
++}
++
++#ifndef PRODUCT
++void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  st->print("# MachConstantBaseNode (empty encoding)");
++}
++#endif
++
++
++//=============================================================================
++#ifndef PRODUCT
++void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
++  Compile* C = ra_->C;
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Calls to C2R adapters often do not accept exceptional returns.
++  // We require that their callers must bang for them.  But be careful, because
++  // some VM calls (such as call site linkage) can use several kilobytes of0fc1ecaa6b7d
++  // stack.  But the stack safety zone should account for that.
++  // See bugs 4446381, 4468289, 4497237.0fc1ecaa6b7d
++  if (C->need_stack_bang(bangsize)) {
++    st->print_cr("# stack bang %d", bangsize); st->print("\t");
++    }
++
++  st->print("\tsubptr    esp, %d, esp",framesize);
++  st->print("\tstl       ra, %d(esp)  @ MachPrologNode\n\t", framesize - wordSize);
++  st->print("\tstl       rfp, %d(esp)  \n\t", framesize - wordSize*2);
++  if (PreserveFramePointer) {
++    st->print("\taddptr    esp, %d, rfp \n\t", framesize - wordSize*2);
++  }
++
++}
++#endif
++
++void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  int bangsize = C->bang_size_in_bytes();
++
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++  
++  
++  if (C->need_stack_bang(bangsize)) {
++  __ generate_stack_overflow_check(bangsize);
++
++  }
++  __ subptr(esp, framesize, esp);
++  __ stl(RA, framesize - wordSize, esp);
++  __ stl(rfp, framesize - wordSize*2, esp);
++
++  if (PreserveFramePointer)
++    __ addptr(esp, framesize - wordSize*2, rfp);
++  __ nop(); //Make enough room for patch_verified_entry()
++  __ nop();
++
++  C->set_frame_complete(cbuf.insts_size());
++  if (C->has_mach_constant_base_node()) {
++    // NOTE: We set the table base offset here because users might be
++    // emitted before MachConstantBaseNode.
++    Compile::ConstantTable& constant_table = C->constant_table();
++    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
++  }
++}
++
++uint MachPrologNode::size(PhaseRegAlloc* ra_) const
++{
++  return MachNode::size(ra_); // too many variables; just compute it
++                              // the hard way
++}
++
++int MachPrologNode::reloc() const
++{
++  return 0; // a large enough number
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachEpilogNode::format(PhaseRegAlloc* ra_, outputStream* st) const
++{
++  Compile* C = ra_->C;
++//  if (generate_vzeroupper(C)) {
++//    st->print("vzeroupper");
++//    st->cr(); st->print("\t");
++//  }
++  int framesize = C->frame_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++//  // Remove word for return adr already pushed
++//  // and RBP
++//  framesize -= 2*wordSize;
++
++  st->print("\tldl    RA, %d, esp # Restore RA ", framesize - wordSize);
++  st->cr(); st->print("\t");
++  st->print("\tldl    rfp, %d, esp # Restore rfp ", framesize - wordSize*2);
++  st->cr(); st->print("\t");
++  st->print("addptr   esp, %d, esp # Rlease stack @ MachEpilogNode",framesize);
++  st->cr(); st->print("\t");
++
++  if (do_polling() && C->is_method_compilation()) {
++    st->print("\t");
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      st->print_cr("ldl   rscratch3, poll_offset[rthread] #polling_page_address\n\t"
++                   "ldw  rscratch3, [rscratch2_AT]\n\t"
++                   "# Safepoint: poll for GC");
++    } else {
++      st->print_cr("mov_immediate64 rscratch3, #offset_to_poll_page\n\t"
++                   "ldw            rscratch3, [rscratch3]\n\t"
++                   "# Safepoint: poll for GC");
++    }
++    }
++}
++#endif
++
++void MachEpilogNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
++{
++  Compile* C = ra_->C;
++  MacroAssembler _masm(&cbuf);
++
++  int framesize = C->frame_size_in_bytes();
++  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
++
++  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
++
++  __ ldl(RA, framesize - wordSize, esp);
++  __ ldl(rfp, framesize - wordSize * 2, esp);
++  __ addptr(esp, framesize, esp);
++
++  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
++    __ reserved_stack_check();
++  }
++
++  if (do_polling() && C->is_method_compilation()) {
++    MacroAssembler _masm(&cbuf);
++//    __ stop("TODO:check why rscratch2_AT? jzy");
++    if (SafepointMechanism::uses_thread_local_poll()) {
++      __ ldl(rscratch3, Address(rthread, Thread::polling_page_offset()));
++      __ relocate(relocInfo::poll_return_type);
++      __ ldw(rscratch3, Address(rscratch3, 0));
++    } else {
++      AddressLiteral polling_page(os::get_polling_page(), relocInfo::poll_return_type);
++      __ mov_immediate64(rscratch3, (long)os::get_polling_page());
++      __ relocate(relocInfo::poll_return_type);
++      __ ldw(rscratch3, Address(rscratch3, 0));
++      }
++    }
++}
++
++uint MachEpilogNode::size(PhaseRegAlloc* ra_) const
++{
++  return MachNode::size(ra_); // too many variables; just compute it
++                              // the hard way
++}
++
++int MachEpilogNode::reloc() const
++{
++//  tty->print_cr(">>>>MachEpilog"); while(1);
++  return 2; // a large enough number  
++}
++
++const Pipeline* MachEpilogNode::pipeline() const
++{
++  return MachNode::pipeline_class();
++}
++
++int MachEpilogNode::safepoint_offset() const
++{
++  return 0;
++}
++
++//=============================================================================
++
++enum RC {
++  rc_bad,
++  rc_int,
++  rc_float,
++  rc_stack
++};
++
++static enum RC rc_class(OptoReg::Name reg)
++{
++  if( !OptoReg::is_valid(reg)  ) return rc_bad;
++
++  if (OptoReg::is_stack(reg)) return rc_stack;
++
++  VMReg r = OptoReg::as_VMReg(reg);
++
++  if (r->is_Register()) return rc_int;
++
++  assert(r->is_FloatRegister(), "must be");
++  return rc_float;
++}
++
++// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
++/*static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
++                          int src_hi, int dst_hi, uint ireg, outputStream* st);
++
++static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
++                            int stack_offset, int reg, uint ireg, outputStream* st);*/
++
++static void vec_stack_to_stack_helper(CodeBuffer *cbuf, int src_offset,
++                                      int dst_offset, uint ireg, outputStream* st) {
++  /*if (cbuf) {
++    MacroAssembler _masm(cbuf);
++    switch (ireg) {
++    case Op_VecS:
++      __ movq(Address(rsp, -8), rax);
++      __ movl(rax, Address(rsp, src_offset));
++      __ movl(Address(rsp, dst_offset), rax);
++      __ movq(rax, Address(rsp, -8));
++      break;
++    case Op_VecD:
++      __ pushq(Address(rsp, src_offset));
++      __ popq (Address(rsp, dst_offset));
++      break;
++    case Op_VecX:
++      __ pushq(Address(rsp, src_offset));
++      __ popq (Address(rsp, dst_offset));
++      __ pushq(Address(rsp, src_offset+8));
++      __ popq (Address(rsp, dst_offset+8));
++      break;
++    case Op_VecY:
++      __ vmovdqu(Address(rsp, -32), xmm0);
++      __ vmovdqu(xmm0, Address(rsp, src_offset));
++      __ vmovdqu(Address(rsp, dst_offset), xmm0);
++      __ vmovdqu(xmm0, Address(rsp, -32));
++      break;
++    case Op_VecZ:
++      __ evmovdquq(Address(rsp, -64), xmm0, 2);
++      __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
++      __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
++      __ evmovdquq(xmm0, Address(rsp, -64), 2);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++#ifndef PRODUCT
++  } else {
++    switch (ireg) {
++    case Op_VecS:
++      st->print("movq    [rsp - #8], rax\t# 32-bit mem-mem spill\n\t"
++                "movl    rax, [rsp + #%d]\n\t"
++                "movl    [rsp + #%d], rax\n\t"
++                "movq    rax, [rsp - #8]",
++                src_offset, dst_offset);
++      break;
++    case Op_VecD:
++      st->print("pushq   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
++                "popq    [rsp + #%d]",
++                src_offset, dst_offset);
++      break;
++     case Op_VecX:
++      st->print("pushq   [rsp + #%d]\t# 128-bit mem-mem spill\n\t"
++                "popq    [rsp + #%d]\n\t"
++                "pushq   [rsp + #%d]\n\t"
++                "popq    [rsp + #%d]",
++                src_offset, dst_offset, src_offset+8, dst_offset+8);
++      break;
++    case Op_VecY:
++      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
++                "vmovdqu xmm0, [rsp + #%d]\n\t"
++                "vmovdqu [rsp + #%d], xmm0\n\t"
++                "vmovdqu xmm0, [rsp - #32]",
++                src_offset, dst_offset);
++      break;
++    case Op_VecZ:
++      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
++                "vmovdqu xmm0, [rsp + #%d]\n\t"
++                "vmovdqu [rsp + #%d], xmm0\n\t"
++                "vmovdqu xmm0, [rsp - #64]",
++                src_offset, dst_offset);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++#endif
++  }*/
++}
++
++uint MachSpillCopyNode::implementation(CodeBuffer* cbuf,
++                                       PhaseRegAlloc* ra_,
++                                       bool do_size,
++                                       outputStream* st) const {
++  assert(cbuf != NULL || st  != NULL, "sanity");
++  // Get registers to move
++  OptoReg::Name src_second = ra_->get_reg_second(in(1));
++  OptoReg::Name src_first = ra_->get_reg_first(in(1));
++  OptoReg::Name dst_second = ra_->get_reg_second(this);
++  OptoReg::Name dst_first = ra_->get_reg_first(this);
++
++  enum RC src_second_rc = rc_class(src_second);
++  enum RC src_first_rc = rc_class(src_first);
++  enum RC dst_second_rc = rc_class(dst_second);
++  enum RC dst_first_rc = rc_class(dst_first);
++
++  assert(OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first),
++         "must move at least 1 register" );
++  // Generate spill code!
++  int size = 0;
++  if (src_first == dst_first && src_second == dst_second) {
++    // Self copy, no move
++    return 0;
++  }
++  if (bottom_type()->isa_vect() != NULL) {
++    uint ireg = ideal_reg();
++    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
++    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
++    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
++      // mem -> mem
++      int src_offset = ra_->reg2offset(src_first);
++      int dst_offset = ra_->reg2offset(dst_first);
++      vec_stack_to_stack_helper(cbuf, src_offset, dst_offset, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_float ) {
++      //vec_mov_helper(cbuf, false, src_first, dst_first, src_second, dst_second, ireg, st);
++    } else if (src_first_rc == rc_float && dst_first_rc == rc_stack ) {
++      int stack_offset = ra_->reg2offset(dst_first);
++      //vec_spill_helper(cbuf, false, false, stack_offset, src_first, ireg, st);
++    } else if (src_first_rc == rc_stack && dst_first_rc == rc_float ) {
++      int stack_offset = ra_->reg2offset(src_first);
++      //vec_spill_helper(cbuf, false, true,  stack_offset, dst_first, ireg, st);
++    } else {
++      ShouldNotReachHere();
++    }
++    return 0;
++  }
++  if (src_first_rc == rc_stack) {
++    // mem ->
++    if (dst_first_rc == rc_stack) {
++      // mem -> mem
++      assert(src_second != dst_first, "overlap");
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ldl(rscratch3, Address(esp, src_offset));
++          __ stl(rscratch3, Address(esp, dst_offset));
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              st->print("ldl    rscratch3, [esp + #%d]\t# 64-bit mem-mem spill 1\n\t"
++                        "stl    rscratch3, [esp + #%d]",
++                    src_offset, dst_offset);
++            }
++#endif
++        }
++        size += 8;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        // No pushl/popl, so:
++        int src_offset = ra_->reg2offset(src_first);
++        int dst_offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ldw(rscratch3, Address(esp, src_offset));
++          __ stw(rscratch3, Address(esp, dst_offset));
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              st->print("ldw    rscratch3, [esp + #%d] spill 2\n\t"
++                        "stw    rscratch3, [esp + #%d]\n\t",
++                    src_offset, dst_offset);
++            }
++#endif
++        }
++        size += 8;
++      }
++      return size;
++    } else if (dst_first_rc == rc_int) {
++      // mem -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ldl(as_Register(Matcher::_regEncode[dst_first]), Address(esp, offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("ldl    %s, [esp + #%d]\t# spill 3",
++              Matcher::regName[dst_first],
++              offset);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++            __ ldw(as_Register(Matcher::_regEncode[dst_first]), Address(esp, offset));
++          else
++            __ ldwu(as_Register(Matcher::_regEncode[dst_first]), Address(esp, offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++              if (this->ideal_reg() == Op_RegI)
++                st->print("ldw    %s, [esp + #%d]\t# spill 4",
++                     Matcher::regName[dst_first],
++                     offset);
++              else
++                st->print("ldwu    %s, [esp + #%d]\t# spill 5",
++                     Matcher::regName[dst_first],
++                     offset);
++          }
++#endif
++        }
++        if (this->ideal_reg() == Op_RegI) {
++          size += 4;
++        } else {
++          size += 8;
++        }
++      }
++      return size;
++    } else if (dst_first_rc == rc_float) {
++      // mem-> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fldd( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(esp, offset));
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              st->print("fldd  %s, [esp + #%d]\t# spill 6",
++                    Matcher::regName[dst_first],
++                    offset);
++            }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(src_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ flds( as_FloatRegister(Matcher::_regEncode[dst_first]), Address(esp, offset));
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              st->print("flds   %s, [esp + #%d]\t# spill 7",
++                    Matcher::regName[dst_first],
++                    offset);
++            }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    }
++  } else if (src_first_rc == rc_int) {
++    // gpr ->
++    if (dst_first_rc == rc_stack) {
++      // gpr -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ stl(as_Register(Matcher::_regEncode[src_first]), Address(esp, offset));
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              st->print("stl    %s, [esp + #%d] # spill 8",
++                    Matcher::regName[src_first],
++                    offset);
++            }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ stw(as_Register(Matcher::_regEncode[src_first]), Address(esp, offset));
++#ifndef PRODUCT
++        } else {
++          if (!do_size) {
++              if (size != 0) st->print("\n\t");
++              st->print("stw    %s, [esp + #%d]\t# spill 9",
++                  Matcher::regName[src_first], offset);
++            }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    } else if (dst_first_rc == rc_int) {
++      // gpr -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ movl(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              st->print("movl    %s <-- %s\t# spill 10",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++            }
++#endif
++        }
++        size += 4;
++        return size;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          if (this->ideal_reg() == Op_RegI)
++              __ movws(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++          else
++              __ movl(as_Register(Matcher::_regEncode[dst_first]), as_Register(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          if (!do_size) {
++              if (size != 0) st->print("\n\t");
++              st->print("move(32-bit)    %s <-- %s\t# spill 11",
++                    Matcher::regName[dst_first],
++                    Matcher::regName[src_first]);
++            }
++#endif
++        }
++        size += 4;
++        return size;
++      }
++    } else if (dst_first_rc == rc_float) {
++      // gpr -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ifmovd(as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++              if (size != 0) st->print("\n\t");
++            st->print("ifmovd   %s, %s\t# spill 12",
++                    Matcher::regName[src_first],
++                    Matcher::regName[dst_first]);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ ifmovs( as_Register(Matcher::_regEncode[src_first]), as_FloatRegister(Matcher::_regEncode[dst_first]) );
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++            st->print("ifmovs   %s, %s\t# spill 13",
++                    Matcher::regName[src_first],
++                    Matcher::regName[dst_first]);
++            }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    }
++  } else if (src_first_rc == rc_float) {
++    // xmm ->
++    if (dst_first_rc == rc_stack) {
++      // xmm -> mem
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fstd( as_FloatRegister(Matcher::_regEncode[src_first]), Address(esp, offset) );
++#ifndef PRODUCT
++        } else {
++            if(!do_size){
++              if (size != 0) st->print("\n\t");
++              st->print("fstd   %s, [esp + #%d]\t# spill 14",
++                    Matcher::regName[src_first],
++                    offset);
++            }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        int offset = ra_->reg2offset(dst_first);
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fsts(as_FloatRegister(Matcher::_regEncode[src_first]), Address(esp, offset));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("fsts   %s, [esp + #%d]\t# spill 15",
++                Matcher::regName[src_first],
++                offset);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    } else if (dst_first_rc == rc_int) {
++      // xmm -> gpr
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fimovd( as_FloatRegister(Matcher::_regEncode[src_first]), as_Register(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("fimovd   %s, %s\t# spill 16",
++                  Matcher::regName[src_first],
++                  Matcher::regName[dst_first]);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ fimovs( as_FloatRegister(Matcher::_regEncode[src_first]), as_Register(Matcher::_regEncode[dst_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("fimovs   %s, %s\t# spill 17",
++                  Matcher::regName[src_first],
++                  Matcher::regName[dst_first]);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    } else if (dst_first_rc == rc_float) {
++      // xmm -> xmm
++      if ((src_first & 1) == 0 && src_first + 1 == src_second &&
++          (dst_first & 1) == 0 && dst_first + 1 == dst_second) {
++        // 64-bit
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_d( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("mov_d  %s <-- %s\t# spill 18",
++                  Matcher::regName[dst_first],
++                  Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      } else {
++        // 32-bit
++        assert(!((src_first & 1) == 0 && src_first + 1 == src_second), "no transform");
++        assert(!((dst_first & 1) == 0 && dst_first + 1 == dst_second), "no transform");
++        if (cbuf) {
++          MacroAssembler _masm(cbuf);
++          __ mov_s( as_FloatRegister(Matcher::_regEncode[dst_first]), as_FloatRegister(Matcher::_regEncode[src_first]));
++#ifndef PRODUCT
++        } else {
++          if(!do_size){
++            if (size != 0) st->print("\n\t");
++            st->print("mov_s  %s <-- %s\t# spill 19",
++                Matcher::regName[dst_first],
++                Matcher::regName[src_first]);
++          }
++#endif
++        }
++        size += 4;
++      }
++      return size;
++    }
++  }
++
++  assert(0," foo ");
++  Unimplemented();
++  return size;
++}
++
++#ifndef PRODUCT
++void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
++    implementation(NULL, ra_, false, st);
++}
++#endif
++
++void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
++    implementation(&cbuf, ra_, false, NULL);
++}
++
++uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
++  return MachNode::size(ra_);
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void BoxLockNode::format(PhaseRegAlloc* ra_, outputStream* st) const
++{
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_reg_first(this);
++  st->print("addl esp, %d, %s  \t# box lock@BoxLockNode", offset, Matcher::regName[reg]);
++}
++#endif
++
++void BoxLockNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const
++{
++  MacroAssembler _masm(&cbuf);
++  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  int reg = ra_->get_encode(this);
++  /*if (Assembler::operand_valid_for_simple_type_instruction_immediate(offset)) {
++    __ addl(esp, offset, as_Register(reg));
++  } else {
++    __ addptr(esp, offset, as_Register(reg));
++  }*/
++  __ addptr(esp, offset, as_Register(reg));
++}
++
++uint BoxLockNode::size(PhaseRegAlloc *ra_) const
++{
++  //int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
++  //return Assembler::operand_valid_for_simple_type_instruction_immediate(offset) ? 4 : 8;
++  return 8;
++}
++
++//=============================================================================
++#ifndef PRODUCT
++void MachUEPNode::format(PhaseRegAlloc* ra_, outputStream* st) const
++{
++  if (UseCompressedClassPointers) {
++    st->print_cr("movl    rscratch3, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t# compressed klass");
++    st->print_cr("\tdecode_klass_not_null rscratch4, rscratch4");
++    st->print_cr("\tcmpeq    iCache_v0, rscratch4\t # Inline cache check");
++  } else {
++    st->print_cr("\tcmpeq    iCache_v0, [j_rarg0 + oopDesc::klass_offset_in_bytes()]\t"
++                 "# Inline cache check");
++  }
++  st->print_cr("\tjne     SharedRuntime::_ic_miss_stub");
++  st->print_cr("\tnop\t# nops to align entry point");
++}
++#endif
++
++void MachUEPNode::emit(CodeBuffer& cbuf, PhaseRegAlloc * ra_) const {
++  MacroAssembler _masm(&cbuf);
++  int ic_reg = Matcher::inline_cache_reg_encode();//sw64  inline_cache_reg(V0);   
++  Register receiver = j_rarg0;
++  Register iCache = as_Register(ic_reg);
++
++  Label skip;
++  __ load_klass(rscratch4, receiver);
++  __ cmpptr(rscratch4, iCache);
++  __ jcc(Assembler::equal, skip);
++  __ relocate(relocInfo::runtime_call_type);
++  __ patchable_jump((address)SharedRuntime::get_ic_miss_stub());
++  __ align(CodeEntryAlignment);
++  __ bind(skip);
++}
++
++uint MachUEPNode::size(PhaseRegAlloc* ra_) const
++{
++  return MachNode::size(ra_); // too many variables; just compute it
++                              // the hard way
++}
++
++
++//=============================================================================
++
++int Matcher::regnum_to_fpu_offset(int regnum)
++{
++  return regnum - 32; // The FP registers are in the second chunk
++}
++
++// This is UltraSparc specific, true just means we have fast l2f conversion
++const bool Matcher::convL2FSupported(void) {
++  return true;
++}
++
++// Is this branch offset short enough that a short branch can be used?
++//
++// NOTE: If the platform does not provide any short branch variants, then
++//       this method should return false for offset 0.
++bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
++  // The passed offset is relative to address of the branch.
++  // On 86 a branch displacement is calculated relative to address
++  // of a next instruction.
++//  offset -= br_size;
++//
++//  // the short version of jmpConUCF2 contains multiple branches,
++//  // making the reach slightly less
++//  if (rule == jmpConUCF2_rule)
++//    return (-126 <= offset && offset <= 125);
++//  return (-128 <= offset && offset <= 127);
++    Unimplemented();
++  return false;
++}
++
++const bool Matcher::isSimpleConstant64(jlong value) {
++  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
++  //return value == (int) value;  // Cf. storeImmL and immL32.
++
++  // Probably always true, even if a temp register is required.
++  return true;
++}
++
++// The ecx parameter to rep stosq for the ClearArray node is in words.
++const bool Matcher::init_array_count_is_in_bytes = false;
++
++// No additional cost for CMOVL.
++const int Matcher::long_cmove_cost() { return 0; }
++
++// No CMOVF/CMOVD with SSE2
++const int Matcher::float_cmove_cost() { return ConditionalMoveLimit; }
++
++// Does the CPU require late expand (see block.cpp for description of late expand)?
++const bool Matcher::require_postalloc_expand = false;
++
++// Do we need to mask the count passed to shift instructions or does
++// the cpu only look at the lower 5/6 bits anyway?
++const bool Matcher::need_masked_shift_count = false;
++
++bool Matcher::narrow_oop_use_complex_address() {
++  assert(UseCompressedOops, "only for compressed oops code");
++//  return (LogMinObjAlignmentInBytes <= 3);
++  //Unimplemented();
++  return false;
++}
++
++bool Matcher::narrow_klass_use_complex_address() {
++  assert(UseCompressedClassPointers, "only for compressed klass code");
++//  return (LogKlassAlignmentInBytes <= 3);
++    //Unimplemented();
++  return false;
++}
++
++bool Matcher::const_oop_prefer_decode() {
++  // Prefer ConN+DecodeN over ConP.
++  return true;
++}
++
++bool Matcher::const_klass_prefer_decode() {
++  // TODO: Either support matching DecodeNKlass (heap-based) in operand
++  //       or condisider the following:
++  // Prefer ConNKlass+DecodeNKlass over ConP in simple compressed klass mode.
++  //return Universe::narrow_klass_base() == NULL;
++  return true;
++}
++
++// Is it better to copy float constants, or load them directly from
++// memory?  Intel can load a float constant from a direct address,
++// requiring no extra registers.  Most RISCs will have to materialize
++// an address into a register first, so they would do better to copy
++// the constant from stack.
++const bool Matcher::rematerialize_float_constants = false; // XXX  in sw8 is false! CHECK djx
++
++// If CPU can load and store mis-aligned doubles directly then no
++// fixup is needed.  Else we split the double into 2 integer pieces
++// and move it piece-by-piece.  Only happens when passing doubles into
++// C code as the Java calling convention forces doubles to be aligned.
++const bool Matcher::misaligned_doubles_ok = false; //in sw8 is false! CHECK djx
++
++// No-op on amd64
++void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {}
++
++// Advertise here if the CPU requires explicit rounding operations to
++// implement the UseStrictFP mode.
++const bool Matcher::strict_fp_requires_explicit_rounding = true;
++
++// Are floats conerted to double when stored to stack during deoptimization?
++// On x64 it is stored without convertion so we can use normal access.
++bool Matcher::float_in_double() { return true; }  //swjdk8 is return true lsp??
++
++// Do ints take an entire long register or just half?
++const bool Matcher::int_in_long = true;
++
++// Return whether or not this register is ever used as an argument.
++// This function is used on startup to build the trampoline stubs in
++// generateOptoStub.  Registers not mentioned will be killed by the VM
++// call in the trampoline, and arguments in those registers not be
++// available to the callee.
++bool Matcher::can_be_java_arg( int reg ) {
++  /* Refer to: [sharedRuntime_sw64.cpp] SharedRuntime::java_calling_convention() */
++  if ( /* reg == T0_num || reg == T0_H_num
++         || */ reg == A0_num || reg == A0_H_num
++       || reg == A1_num || reg == A1_H_num
++       || reg == A2_num || reg == A2_H_num
++       || reg == A3_num || reg == A3_H_num
++       || reg == A4_num || reg == A4_H_num
++       || reg == A5_num || reg == A5_H_num )
++    return true;
++
++  if (    reg == F16_num || reg == F16_H_num
++       || reg == F17_num || reg == F17_H_num
++       || reg == F18_num || reg == F18_H_num
++       || reg == F19_num || reg == F19_H_num
++       || reg == F20_num || reg == F20_H_num
++       || reg == F21_num || reg == F21_H_num )
++    return true;
++
++  return false;
++  }
++
++bool Matcher::is_spillable_arg(int reg)
++  {
++    return can_be_java_arg(reg);
++  }
++
++bool Matcher::use_asm_for_ldiv_by_con(jlong divisor) {
++    // In 64 bit mode a code which use multiply when
++    // devisor is constant is faster than hardware
++    // DIV instruction (it uses MulHiL).
++    return false;
++  }
++
++  // Register for DIVI projection of divmodI
++RegMask Matcher::divI_proj_mask() {
++    //  return INT_RAX_REG_mask();
++    Unimplemented();
++    return 0;
++  }
++
++  // Register for MODI projection of divmodI
++RegMask Matcher::modI_proj_mask() {
++    //  return INT_RDX_REG_mask();
++    Unimplemented();
++    return 0;
++  }
++
++  // Register for DIVL projection of divmodL
++RegMask Matcher::divL_proj_mask() {
++    //  return LONG_RAX_REG_mask();
++    Unimplemented();
++    return 0;
++  }
++
++  // Register for MODL projection of divmodL
++RegMask Matcher::modL_proj_mask() {
++    //  return LONG_RDX_REG_mask();
++    Unimplemented();
++    return 0;
++  }
++
++// Register for saving SP into on method handle invokes. Not used on x86_64.
++const RegMask Matcher::method_handle_invoke_SP_save_mask() {
++//    return NO_REG_mask();
++    //warning("TODO:Matcher::method_handle_invoke_SP_save_mask(), check lsp");
++    return FP_REG_mask();
++}
++
++%}
++
++//----------ENCODING BLOCK-----------------------------------------------------
++// This block specifies the encoding classes used by the compiler to output
++// byte streams.  Encoding classes generate functions which are called by
++// Machine Instruction Nodes in order to generate the bit encoding of the
++// instruction.  Operands specify their base encoding interface with the
++// interface keyword.  There are currently supported four interfaces,
++// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
++// operand to generate a function which returns its register number when
++// queried.   CONST_INTER causes an operand to generate a function which
++// returns the value of the constant when queried.  MEMORY_INTER causes an
++// operand to generate four functions which return the Base Register, the
++// Index Register, the Scale Value, and the Offset Value of the operand when
++// queried.  COND_INTER causes an operand to generate six functions which
++// return the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional instruction.
++// Instructions specify two basic values for encoding.  They use the
++// ins_encode keyword to specify their encoding class (which must be one of
++// the class names specified in the encoding block), and they use the
++// opcode keyword to specify, in order, their primary, secondary, and
++// tertiary opcode.  Only the opcode sections which a particular instruction
++// needs for encoding need to be specified.
++encode %{
++
++   enc_class load_N_enc (rRegN dst, memory mem) %{
++     MacroAssembler _masm(&cbuf);
++     int  dst = $dst$$reg;
++
++     relocInfo::relocType disp_reloc = $mem->disp_reloc();
++     assert(disp_reloc == relocInfo::none, "cannot have disp");
++     __ ldwu($dst$$Register, $mem$$Address);  
++  %}
++
++enc_class load_P_enc(rRegP dst, memory mem) %{
++    MacroAssembler _masm(&cbuf);
++    int dst = $dst$$reg;
++
++    relocInfo::relocType disp_reloc = $mem->disp_reloc();
++    assert(disp_reloc == relocInfo::none, "cannot have disp");
++    __ ldptr($dst$$Register, $mem$$Address);
++%}
++
++  enc_class sw64_Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
++    MacroAssembler _masm(&cbuf);
++    //__ stop("TODO:not check lsp(Java_To_Runtime)");
++//    // This is the instruction starting address for relocation info.
++    
++//    // the return address is store@(-1)SP by convention on mips, 
++//    // but we don't have this convention.
++//    // so we have to store the pc into last_java_frame by ourself before calling into runtime
++    address addr = (address)$meth$$method;
++    Label retaddr;
++    int offset = __ offset();
++    //assert( rscratch3 != T12, "rscratch3 can not is T12!" );
++    __ block_comment(";;execute sw64_Java_To_Runtime");
++    __ set_last_Java_frame(esp, noreg, retaddr, rscratch3, rscratch2_AT);
++    //lsp: same to swjdk8, different form aarch64, need to store retaddr in stack??
++    __ call(AddressLiteral(addr, relocInfo::runtime_call_type),&retaddr);// need to check lsp!!
++    //assert(__ offset() - offset <= (int) ret_addr_offset(), "overflow");
++    
++    //__ mov_immediate64(pv, (intptr_t)addr);
++    //__ push_RA_call(pv);
++  %}
++//
++  
++enc_class Java_Static_Call(method meth) %{
++    // JAVA STATIC CALL
++    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to
++    // determine who we intended to call.
++    MacroAssembler _masm(&cbuf);
++  
++    
++    //__ stop("TODO:not check lsp(Java_Static_Call)");
++    cbuf.set_insts_mark();//TODO:relate to relocate? jzy
++    
++    address addr = (address)$meth$$method;
++    address call;
++    if (!_method) {
++      // A call to a runtime wrapper, e.g. new, new_typeArray_Java, uncommon_trap.
++      //__ call(AddressLiteral(addr, relocInfo::runtime_call_type));
++      __ relocate(relocInfo::runtime_call_type);
++      __ patchable_call((address)($meth$$method));
++    } else {
++      int method_index = resolved_method_index(cbuf);
++      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
++                                                  : static_call_Relocation::spec(method_index);
++      __ relocate(rspec);
++      __ patchable_call((address)($meth$$method));
++
++      // Emit stubs for static call.
++      address mark = cbuf.insts_mark();
++      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf, mark);
++      if (stub == NULL) {
++        ciEnv::current()->record_failure("CodeCache is full");
++        return;
++      }
++#if INCLUDE_AOT
++      CompiledStaticCall::emit_to_aot_stub(cbuf, mark);
++#endif
++    }
++  %}
++
++     
++enc_class call_epilog() %{
++  MacroAssembler _masm(&cbuf);
++  if (VerifyStackAtCalls) {
++    Unimplemented();
++//        Label L;
++//      MacroAssembler _masm(&cbuf);
++//      int framesize = ra_->C->frame_size_in_bytes();
++//      __ addl(esp, framesize, rscratch2_AT);
++//      __ cmpptr(rscratch2_AT, rfp);
++//      __ jcc(Assembler::equal, L);
++//      __ stop("VerifyStackAtCalls failed");
++//      __ BIND(L);
++    }
++  %}
++  //
++  // [Ref: LIR_Assembler::ic_call() ]
++  //
++enc_class Java_Dynamic_Call (method meth) %{   
++    MacroAssembler _masm(&cbuf);
++    __ block_comment("Java_Dynamic_Call");
++    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
++%}
++  
++
++enc_class Set_Flags_After_Fast_Lock_Unlock(FlagsReg cr) %{
++//    Register flags = $cr$$Register;
++//    Label  L;
++//
++//    MacroAssembler _masm(&cbuf);
++//
++//    __ addu(flags, R0, R0);
++//    __ beq(AT, L);
++//    __ move(flags, 0xFFFFFFFF);
++//    __ BIND(L);
++%}
++
++  enc_class enc_PartialSubtypeCheck(rRegP result, rRegP sub, rRegP super) %{
++    Register result = $result$$Register;
++    Register sub    = $sub$$Register;
++    Register super  = $super$$Register;
++    Register length = rscratch3;
++    Label miss;
++//
++//    // result may be the same as sub
++//    //    47c   B40: #    B21 B41 <- B20  Freq: 0.155379
++//    //    47c     partialSubtypeCheck result=S1, sub=S1, super=S3, length=S0
++//    //    4bc     mov   S2, NULL #@loadConP
++//    //    4c0     beq   S1, S2, B21 #@branchConP  P=0.999999 C=-1.000000
++//    //
++    MacroAssembler _masm(&cbuf);
++    Label done;
++    __ check_klass_subtype_slow_path(sub, super, length, noreg,
++                                     NULL, &miss,
++                                     /*set_cond_codes:*/ true);
++    // Refer to X86_64's RDI
++    __ movl(result, R0);
++    __ beq_l(R0, done);
++
++    __ BIND(miss);
++    __ movl(result, 1);
++    __ BIND(done);
++  %}
++
++%}
++
++
++//---------SW64 FRAME--------------------------------------------------------------
++// Definition of frame structure and management information.
++//
++//  S T A C K   L A Y O U T    Allocators stack-slot number
++//                             |   (to get allocators register number
++//  G  Owned by    |        |  v    add SharedInfo::stack0)
++//  r   CALLER     |        |
++//  o     |        +--------+      pad to even-align allocators stack-slot
++//  w     V        |  pad0  |        numbers; owned by CALLER
++//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
++//  h     ^        |   in   |  5
++//        |        |  args  |  4   Holes in incoming args owned by SELF
++//  |     |    old |        |  3
++//  |     |     SP-+--------+----> Matcher::_old_SP, even aligned
++//  v     |        |  ret   |  3   return address
++//     Owned by    +--------+
++//      Self       |  pad2  |  2   pad to align old SP
++//        |        +--------+  1
++//        |        | locks  |  0
++//        |        +--------+----> SharedInfo::stack0, even aligned
++//        |        |  pad1  | 11   pad to align new SP
++//        |        +--------+
++//        |        |        | 10
++//        |        | spills |  9   spills
++//        V        |        |  8   (pad0 slot for callee)
++//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
++//        ^        |  out   |  7
++//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
++//   Owned by  new |        |
++//    Callee    SP-+--------+----> Matcher::_new_SP, even aligned
++//                 |        |
++//
++// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
++//         known from SELF's arguments and the Java calling convention.
++//         Region 6-7 is determined per call site.
++// Note 2: If the calling convention leaves holes in the incoming argument
++//         area, those holes are owned by SELF.  Holes in the outgoing area
++//         are owned by the CALLEE.  Holes should not be nessecary in the
++//         incoming area, as the Java calling convention is completely under
++//         the control of the AD file.  Doubles can be sorted and packed to
++//         avoid holes.  Holes in the outgoing arguments may be nessecary for
++//         varargs C calling conventions.
++// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
++//         even aligned with pad0 as needed.
++//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
++//         region 6-11 is even aligned; it may be padded out more so that
++//         the region from SP to FP meets the minimum stack alignment.
++// Note 4: For I2C adapters, the incoming FP may not meet the minimum stack
++//         alignment.  Region 11, pad1, may be dynamically extended so that
++//         SP meets the minimum alignment.
++
++
++frame 
++%{
++// What direction does stack grow in (assumed to be same for C & Java)
++  stack_direction(TOWARDS_LOW);
++
++  // These three registers define part of the calling convention
++  // between compiled code and the interpreter.
++  inline_cache_reg(V0);                // Inline Cache Register  x86 is rax, sw64 is v0 check lsp?
++  interpreter_method_oop_reg(S3);      // Method Oop Register when
++                                        // calling interpreter
++
++  // Optional: name the operand used by cisc-spilling to access
++  // [stack_pointer + offset]
++  cisc_spilling_operand_name(indOffset32);
++
++  // Number of stack slots consumed by locking an object
++  sync_stack_slots(2);
++
++  // Compiled code's Frame Pointer
++  frame_pointer(SP);
++
++  // Interpreter stores its frame pointer in a register which is
++  // stored to the stack by I2CAdaptors.
++  // I2CAdaptors convert from interpreted java to compiled java.
++  interpreter_frame_pointer(FP);
++
++  // Stack alignment requirement
++  stack_alignment(StackAlignmentInBytes); // Alignment size in bytes (128-bit -> 16 bytes) TODO:check jzy
++
++  // Number of stack slots between incoming argument block and the start of
++  // a new frame.  The PROLOG must add this many slots to the stack.  The
++  // EPILOG must remove this many slots. sw64 needs two slots for
++  // return address and fp.
++  in_preserve_stack_slots(4);//to check lsp
++
++  // Number of outgoing stack slots killed above the out_preserve_stack_slots
++  // for calls to C.  Supports the var-args backing area for register parms.
++  varargs_C_out_slots_killed(frame::arg_reg_save_area_bytes/BytesPerInt);
++
++  // The after-PROLOG location of the return address.  Location of
++  // return address specifies a type (REG or STACK) and a number
++  // representing the register number (i.e. - use a register name) or
++  // stack slot.
++  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
++  // Otherwise, it is above the locks and verification slot and alignment word
++  //return_addr(STACK -1+ round_to(1+VerifyStackAtCalls+Compile::current()->memb()*Compile::current()->sync_stack_slots(),WordsPerLong));
++    return_addr(REG RA); // to check lsp
++//    return_addr(STACK - 2 +
++//              align_up((Compile::current()->in_preserve_stack_slots() +
++//                        Compile::current()->fixed_slots()),
++//                       stack_alignment_in_slots()));
++
++  // Body of function which returns an integer array locating
++  // arguments either in registers or in stack slots.  Passed an array
++  // of ideal registers called "sig" and a "length" count.  Stack-slot
++  // offsets are based on outgoing arguments, i.e. a CALLER setting up
++  // arguments for a CALLEE.  Incoming stack arguments are
++  // automatically biased by the preserve_stack_slots field above.
++
++  calling_convention
++  %{
++    // No difference between ingoing/outgoing just pass false
++    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
++  %}
++
++  c_calling_convention
++  %{
++    // This is obviously always outgoing
++    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
++  %}
++
++
++  // Location of C & interpreter return values
++  // register(s) contain(s) return value for Op_StartI2C and Op_StartOSR.
++  // SEE Matcher::match.
++  // Location of compiled Java return values.  Same as C for now.
++  return_value %{
++    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
++
++    static const int lo[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      V0_num,                            // Op_RegN
++      V0_num,                            // Op_RegI
++      V0_num,                            // Op_RegP
++      F0_num,                            // Op_RegF
++      F0_num,                            // Op_RegD
++      V0_num                             // Op_RegL
++    };
++
++    static const int hi[Op_RegL + 1] = { // enum name
++      0,                                 // Op_Node
++      0,                                 // Op_Set
++      OptoReg::Bad,                      // Op_RegN
++      OptoReg::Bad,                      // Op_RegI
++      V0_H_num,                          // Op_RegP
++      OptoReg::Bad,                      // Op_RegF
++      F0_H_num,                          // Op_RegD
++      V0_H_num                           // Op_RegL
++    };
++
++    return OptoRegPair(hi[ideal_reg], lo[ideal_reg]);
++  %}
++
++%}
++
++//----------ATTRIBUTES---------------------------------------------------------
++//----------Operand Attributes-------------------------------------------------
++op_attrib op_cost(0);        // Required cost attribute
++
++//----------Instruction Attributes---------------------------------------------
++ins_attrib ins_cost(100);       // Required cost attribute
++ins_attrib ins_size(32);        // Required size attribute (in bits)
++ins_attrib ins_pc_relative(0);  // Required PC Relative flag
++ins_attrib ins_short_branch(0); // Required flag: is this instruction a
++                                // non-matching short branch variant of some
++                                                            // long branch?
++ins_attrib ins_alignment(4);    // Required alignment attribute (must be a power of 2)
++                                // specifies the alignment that some part of the instruction (not
++                                // necessarily the start) requires.  If > 1, a compute_padding()
++                                // function must be provided for the instruction
++
++//----------OPERANDS-----------------------------------------------------------
++// Operand definitions must precede instruction definitions for correct parsing
++// in the ADLC because operands constitute user defined types which are used in
++// instruction definitions.
++
++// Vectors
++operand vecD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(VecD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand rFlagsReg() %{
++  constraint(ALLOC_IN_RC(sw64_flags));
++  match(RegFlags);
++
++  format %{ "RFLAGS" %}
++  interface(REG_INTER);
++%}
++
++// Flags register, used as output of compare instructions
++operand rFlagsRegU() %{
++  constraint(ALLOC_IN_RC(sw64_flags));
++  match(RegFlags);
++
++  format %{ "RFLAGS_U" %}
++  interface(REG_INTER);
++%}
++
++operand immI_MaxI() %{
++  predicate(n->get_int() == 2147483647);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI16_sub() %{
++  predicate((-32767 <= n->get_int()) && (n->get_int() <= 32768));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immU8() %{
++  predicate( n->get_int() >= 0 && n->get_int() <= 255 );
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_le_4()
++%{
++  predicate(n->get_int() <= 4);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++
++// Pointer for polling page
++operand immP_poll() %{
++  predicate(n->get_ptr() != 0 && n->get_ptr() == (intptr_t)os::get_polling_page());
++  match(ConP);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL16() %{
++  predicate((-32768 <= n->get_long()) && (n->get_long() <= 32767));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immL16_sub() %{
++  predicate((-32767 <= n->get_long()) && (n->get_long() <= 32768));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++//----------Simple Operands----------------------------------------------------
++// Immediate Operands
++// Integer Immediate
++operand immI() 
++%{
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for test vs zero
++operand immI0() 
++%{
++  predicate(n->get_int() == 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for increment
++operand immI1()
++%{
++  predicate(n->get_int() == 1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_2()
++%{
++  predicate(n->get_int() == 2);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_3()
++%{
++  predicate(n->get_int() == 3);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immI_M1() 
++%{
++  predicate(n->get_int() == -1);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Valid scale values for addressing modes
++operand immI2() 
++%{
++  predicate(0 <= n->get_int() && (n->get_int() <= 3));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI8()
++%{
++  predicate((-0x80 <= n->get_int()) && (n->get_int() < 0x80));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI16() 
++%{
++  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Int Immediate non-negative
++operand immU31()
++%{
++  predicate(n->get_int() >= 0);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_32()
++%{
++  predicate( n->get_int() == 32 );
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for long shifts
++operand immI_64()
++%{
++  predicate( n->get_int() == 64 );
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immP()
++%{
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immP0() 
++%{
++  predicate(n->get_ptr() == 0);
++  match(ConP);
++  
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Pointer Immediate
++operand immN() %{
++  match(ConN);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immNKlass() %{
++  match(ConNKlass);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// NULL Pointer Immediate
++operand immN0() %{
++  predicate(n->get_narrowcon() == 0);
++  match(ConN);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immP31()
++%{
++  predicate(n->as_Type()->type()->reloc() == relocInfo::none
++            && (n->get_ptr() >> 31) == 0);
++  match(ConP);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++
++// Long Immediate
++operand immL() 
++%{
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immUL8() %{
++  predicate( n->get_long() >= 0 && n->get_long() <= 255 );
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit unsigned
++operand immUL32()
++%{
++  predicate(n->get_long() == (unsigned int) (n->get_long()));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate 32-bit signed
++operand immL32()
++%{
++  predicate(n->get_long() == (int) (n->get_long()));
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate zero
++operand immL0() 
++%{
++  predicate(n->get_long() == 0L);
++  match(ConL);
++  
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for increment
++operand immL1()
++%{
++  predicate(n->get_long() == 1);
++  match(ConL);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for decrement
++operand immL_M1()
++%{
++  predicate(n->get_long() == -1);
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate: the value 10
++operand immL10()
++%{
++  predicate(n->get_long() == 10);
++  match(ConL);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Long Immediate: low 32-bit mask
++operand immL_32bits() 
++%{
++  predicate(n->get_long() == 0xFFFFFFFFL);
++  match(ConL);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Float Immediate zero
++operand immF0()
++%{
++  predicate(jint_cast(n->getf()) == 0);
++  match(ConF);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Float Immediate
++operand immF()
++%{
++  match(ConF);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double Immediate zero
++operand immD0() 
++%{
++  predicate(jlong_cast(n->getd()) == 0);
++  match(ConD);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Double Immediate
++operand immD() 
++%{
++  match(ConD);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Immediates for special shifts (sign extend)
++
++// Constants for increment
++operand immI_16()
++%{
++  predicate(n->get_int() == 16);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++operand immI_24()
++%{
++  predicate(n->get_int() == 24);
++  match(ConI);
++  op_cost(0);
++
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immI_255()
++%{
++  predicate(n->get_int() == 255);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for short-wide masking
++operand immI_65535()
++%{
++  predicate(n->get_int() == 65535);
++  match(ConI);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for byte-wide masking
++operand immL_255()
++%{
++  predicate(n->get_long() == 255);
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Constant for short-wide masking
++operand immL_65535()
++%{
++  predicate(n->get_long() == 65535);
++  match(ConL);
++
++  op_cost(0);
++  format %{ %}
++  interface(CONST_INTER);
++%}
++
++// Register Operands
++// Integer Register
++operand rRegI() 
++%{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegI);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t10_RegI() %{
++  constraint(ALLOC_IN_RC(t10_reg));
++  match(RegI);
++  match(rRegI);
++
++  format %{ "T10" %}
++  interface(REG_INTER);
++%}
++
++//operand t11_RegI() %{
++//  constraint(ALLOC_IN_RC(t11_reg));
++//  match(RegI);
++//  match(rRegI);
++//  
++//    format %{ "T11" %}
++//    interface(REG_INTER);
++//  %}
++
++operand a0_RegI() %{
++  constraint(ALLOC_IN_RC(a0_reg));
++  match(RegI);
++  match(rRegI);
++
++  format %{ "A0" %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegI() %{
++  constraint(ALLOC_IN_RC(a1_reg));
++  match(RegI);
++  match(rRegI);
++
++  format %{ "A1" %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegI() %{
++  constraint(ALLOC_IN_RC(a2_reg));
++  match(RegI);
++  match(rRegI);
++
++  format %{ "A2" %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegI() %{
++  constraint(ALLOC_IN_RC(a3_reg));
++  match(RegI);
++  match(rRegI);
++
++  format %{ "A3" %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegI() %{
++  constraint(ALLOC_IN_RC(a4_reg));
++  match(RegI);
++  match(rRegI);
++
++  format %{ "A4" %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegI() 
++%{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegI);
++  match(rRegI);
++
++  format %{ "V0" %}
++  interface(REG_INTER);
++%}
++
++operand rRegN() %{
++  constraint(ALLOC_IN_RC(int_reg));
++  match(RegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegN() %{
++  constraint(ALLOC_IN_RC(v0_reg));
++  match(RegN);
++  match(rRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t10_RegN() %{
++  constraint(ALLOC_IN_RC(t10_reg));
++  match(RegN);
++  match(rRegN);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Pointer Register
++operand any_RegP() %{
++    constraint(ALLOC_IN_RC(any_reg));
++    match(RegP);
++    match(a0_RegP);
++    match(s2_RegP);
++    match(rRegP);
++
++    format %{  %}
++    interface(REG_INTER);
++%}
++
++operand rRegP() %{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(RegP);
++  match(a0_RegP);
++  match(s2_RegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++//TODO:why no T11 jzy
++operand no_T11_rRegP() %{
++  constraint(ALLOC_IN_RC(no_T11_p_reg));
++  match(RegP);
++  match(rRegP);
++
++  format %{  %}
++  interface(REG_INTER);
++%}
++
++operand s2_RegP()
++%{
++  constraint(ALLOC_IN_RC(s2_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand s3_RegP()
++%{
++  constraint(ALLOC_IN_RC(s3_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t8_RegP()
++%{
++  constraint(ALLOC_IN_RC(t8_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegP()
++%{
++  constraint(ALLOC_IN_RC(t9_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t10_RegP()
++%{
++  constraint(ALLOC_IN_RC(t10_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a0_RegP()
++%{
++  constraint(ALLOC_IN_RC(a0_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a1_RegP()
++%{
++  constraint(ALLOC_IN_RC(a1_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a2_RegP()
++%{
++  constraint(ALLOC_IN_RC(a2_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a3_RegP()
++%{
++  constraint(ALLOC_IN_RC(a3_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand a4_RegP()
++%{
++  constraint(ALLOC_IN_RC(a4_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++operand a5_RegP()
++%{
++  constraint(ALLOC_IN_RC(a5_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegP()
++%{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegP);
++  match(rRegP);
++  match(no_T11_rRegP);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand rRegL() %{
++  constraint(ALLOC_IN_RC(long_reg));
++  match(RegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand v0_RegL() %{
++  constraint(ALLOC_IN_RC(v0_long_reg));
++  match(RegL);
++  match(rRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand t9_RegL() %{
++  constraint(ALLOC_IN_RC(t9_long_reg));
++  match(RegL);
++  match(rRegL);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Float register operands
++operand regF() %{
++  constraint(ALLOC_IN_RC(flt_reg));
++  match(RegF);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++// Double register operands
++operand regD() %{
++  constraint(ALLOC_IN_RC(dbl_reg));
++  match(RegD);
++
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++operand f27_RegD()
++%{
++  constraint(ALLOC_IN_RC(dbl_tmp_f27));
++  match(RegD);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++operand f28_RegD()
++%{
++  constraint(ALLOC_IN_RC(dbl_tmp_f28));
++  match(RegD);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++operand f29_RegD()
++%{
++  constraint(ALLOC_IN_RC(dbl_tmp_f29));
++  match(RegD);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++operand f30_RegD()
++%{
++  constraint(ALLOC_IN_RC(dbl_tmp_f30));
++  match(RegD);
++  op_cost(0);
++  format %{ %}
++  interface(REG_INTER);
++%}
++
++
++//----------Memory Operands----------------------------------------------------
++// Direct Memory Operand
++// operand direct(immP addr)
++// %{
++//   match(addr);
++
++//   format %{ "[$addr]" %}
++//   interface(MEMORY_INTER) %{
++//     base(0xFFFFFFFF);
++//     index(0x4);
++//     scale(0x0);
++//     disp($addr);
++//   %}
++// %}
++
++// Indirect Memory Operand
++operand indirect(any_RegP reg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(reg);
++
++  format %{ "[$reg] @ indirect" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x1e);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset16(any_RegP reg, immL16 off)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg off);
++
++  format %{ "[$reg + $off (16-bit)] @ indOffset16" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x1e);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Long Offset Operand
++//operand indOffset32(rRegP reg, immL32 off)
++//%{
++//  constraint(ALLOC_IN_RC(ptr_reg));
++//  match(AddP reg off);
++//
++//  format %{ "[$reg + $off (32-bit)]" %}
++//  interface(MEMORY_INTER) %{
++//    base($reg);
++//    index(0x1e);
++//    scale(0x0);
++//    disp($off);
++//  %}
++//%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexOffset(any_RegP reg, rRegL lreg, immL16 off)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (AddP reg lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndex(any_RegP reg, rRegL lreg)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg lreg);
++
++  op_cost(10);
++  format %{"[$reg + $lreg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register
++operand indIndexScale(any_RegP reg, rRegL lreg, immI2 scale)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP reg (LShiftL lreg scale));
++
++  op_cost(10);
++  format %{"[$reg + $lreg << $scale]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++operand indPosIndexScale(any_RegP reg, rRegI idx, immI2 scale)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  predicate(n->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  match(AddP reg (LShiftL (ConvI2L idx) scale));
++
++  op_cost(10);
++  format %{"[$reg + pos $idx << $scale]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($idx);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
++operand indIndexScaleOffset(any_RegP reg, immL16 off, rRegL lreg, immI2 scale)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (AddP reg (LShiftL lreg scale)) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg << $scale]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Positive Index Register Plus Offset Operand
++operand indPosIndexOffset(any_RegP reg, immL16 off, rRegI idx)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  predicate(n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
++  match(AddP (AddP reg (ConvI2L idx)) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $idx]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($idx);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
++operand indPosIndexScaleOffset(any_RegP reg, immL16 off, rRegI idx, immI2 scale)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  predicate(n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  match(AddP (AddP reg (LShiftL (ConvI2L idx) scale)) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $idx << $scale]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($idx);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++// Indirect Narrow Oop Plus Offset Operand
++// Note: x86 architecture doesn't support "scale * index + offset" without a base
++// we can't free r12 even with Universe::narrow_oop_base() == NULL. TODO:why r12? jzy
++//lsp todo check sw is s5??
++operand indCompressedOopOffset(rRegN reg, immL16 off) %{
++  predicate(UseCompressedOops && (Universe::narrow_oop_shift() == Address::times_8));
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++
++  op_cost(10);
++  format %{"[S5 + $reg << 3 + $off] (compressed oop addressing)" %}
++  interface(MEMORY_INTER) %{
++    base(0xe); // S5 fo SW64
++    index($reg);
++    scale(0x3);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Operand
++operand indirectNarrow(rRegN reg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(DecodeN reg);
++
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x1e);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Plus Short Offset Operand
++operand indOffset16Narrow(rRegN reg, immL16 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) off);
++
++  format %{ "[$reg + $off (16-bit)]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index(0x1e);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Long Offset Operand
++//operand indOffset32Narrow(rRegN reg, immL32 off)
++//%{
++//  predicate(Universe::narrow_oop_shift() == 0);
++//  constraint(ALLOC_IN_RC(ptr_reg));
++//  match(AddP (DecodeN reg) off);
++//
++//  format %{ "[$reg + $off (32-bit)]" %}
++//  interface(MEMORY_INTER) %{
++//    base($reg);
++//    index(0x1e);
++//    scale(0x0);
++//    disp($off);
++//  %}
++//%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexOffsetNarrow(rRegN reg, rRegL lreg, immL16 off)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (AddP (DecodeN reg) lreg) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Plus Index Register Plus Offset Operand
++operand indIndexNarrow(rRegN reg, rRegL lreg)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) lreg);
++
++  op_cost(10);
++  format %{"[$reg + $lreg]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale(0x0);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register
++operand indIndexScaleNarrow(rRegN reg, rRegL lreg, immI2 scale)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (DecodeN reg) (LShiftL lreg scale));
++
++  op_cost(10);
++  format %{"[$reg + $lreg << $scale]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp(0x0);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
++operand indIndexScaleOffsetNarrow(rRegN reg, immL16 off, rRegL lreg, immI2 scale)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  constraint(ALLOC_IN_RC(ptr_reg));
++  match(AddP (AddP (DecodeN reg) (LShiftL lreg scale)) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $lreg << $scale]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($lreg);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Plus Positive Index Register Plus Offset Operand
++operand indPosIndexOffsetNarrow(rRegN reg, immL16 off, rRegI idx)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->as_Type()->type()->is_long()->_lo >= 0);
++  match(AddP (AddP (DecodeN reg) (ConvI2L idx)) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $idx]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($idx);
++    scale(0x0);
++    disp($off);
++  %}
++%}
++
++// Indirect Memory Times Scale Plus Positive Index Register Plus Offset Operand
++operand indPosIndexScaleOffsetNarrow(rRegN reg, immL16 off, rRegI idx, immI2 scale)
++%{
++  constraint(ALLOC_IN_RC(ptr_reg));
++  predicate(Universe::narrow_oop_shift() == 0 && n->in(2)->in(3)->in(1)->as_Type()->type()->is_long()->_lo >= 0);
++  match(AddP (AddP (DecodeN reg) (LShiftL (ConvI2L idx) scale)) off);
++
++  op_cost(10);
++  format %{"[$reg + $off + $idx << $scale]" %}
++  interface(MEMORY_INTER) %{
++    base($reg);
++    index($idx);
++    scale($scale);
++    disp($off);
++  %}
++%}
++
++//----------Special Memory Operands--------------------------------------------
++// Stack Slot Operand - This operand is used for loading and storing temporary
++//                      values on the stack where a match requires a value to
++//                      flow through memory.
++operand stackSlotP(sRegP reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1e);   // SP
++    index(0x1e);  // No Index
++    scale(0x0);   // No Scale
++    disp($reg);   // Stack Offset
++  %}
++%}
++
++operand stackSlotI(sRegI reg) 
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1e);   // SP
++    index(0x1e);  // No Index
++    scale(0x0);   // No Scale
++    disp($reg);   // Stack Offset
++  %}
++%}
++
++operand stackSlotF(sRegF reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1e);   // SP
++    index(0x1e);  // No Index
++    scale(0x0);   // No Scale
++    disp($reg);   // Stack Offset
++  %}
++%}
++
++operand stackSlotD(sRegD reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1e);   // SP
++    index(0x1e);  // No Index
++    scale(0x0);   // No Scale
++    disp($reg);   // Stack Offset
++  %}
++%}
++
++operand stackSlotL(sRegL reg)
++%{
++  constraint(ALLOC_IN_RC(stack_slots));
++  // No match rule because this operand is only generated in matching
++
++  format %{ "[$reg]" %}
++  interface(MEMORY_INTER) %{
++    base(0x1e);   // SP
++    index(0x1e);  // No Index
++    scale(0x0);   // No Scale
++    disp($reg);   // Stack Offset
++  %}
++%}
++
++//----------Conditional Branch Operands----------------------------------------
++// Comparison Op  - This is the operation of the comparison, and is limited to
++//                  the following set of codes:
++//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
++//
++// Other attributes of the comparison, such as unsignedness, are specified
++// by the comparison instruction that sets a condition code flags register.
++// That result is represented by a flags operand whose subtype is appropriate
++// to the unsignedness (etc.) of the comparison.
++//
++// Later, the instruction which matches both the Comparison Op (a Bool) and
++// the flags (produced by the Cmp) specifies the coding of the comparison op
++// by matching a specific subtype of Bool operand below, such as cmpOpU.
++
++// Comparision Code
++operand cmpOp() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x4, "e");
++    not_equal(0x5, "ne");
++    less(0xC, "l");
++    greater_equal(0xD, "ge");
++    less_equal(0xE, "le");
++    greater(0xF, "g");
++    overflow(0x0, "o");
++    no_overflow(0x1, "no");
++  %}
++%}
++
++// Comparison Code, unsigned compare.  Used by FP also, with
++// C2 (unordered) turned into GT or LT already.  The other bits
++// C0 and C3 are turned into Carry & Zero flags.
++operand cmpOpU() %{
++  match(Bool);
++
++  format %{ "" %}
++  interface(COND_INTER) %{
++    equal(0x4, "e");
++    not_equal(0x5, "ne");
++    less(0x2, "b");
++    greater_equal(0x3, "nb");
++    less_equal(0x6, "be");
++    greater(0x7, "nbe");
++    overflow(0x0, "o");
++    no_overflow(0x1, "no");
++  %}
++%}
++
++
++//----------OPERAND CLASSES----------------------------------------------------
++// Operand Classes are groups of operands that are used as to simplify
++// instruction definitions by not requiring the AD writer to specify separate
++// instructions for every form of operand when the instruction accepts
++// multiple operand types with the same basic encoding and format.  The classic
++// case of this is memory operands.
++
++opclass memory(indirect, indOffset16, indIndexOffset, indIndex, indIndexScale, indIndexScaleOffset,
++               indPosIndexScale,  indPosIndexOffset, indPosIndexScaleOffset,
++               indCompressedOopOffset,
++               indirectNarrow, indOffset16Narrow, indIndexOffsetNarrow, indIndexNarrow, indIndexScaleNarrow, indIndexScaleOffsetNarrow,
++               indPosIndexOffsetNarrow, indPosIndexScaleOffsetNarrow);
++
++//----------PIPELINE-----------------------------------------------------------
++// Rules which define the behavior of the target architectures pipeline.
++pipeline %{
++
++//----------ATTRIBUTES---------------------------------------------------------
++attributes %{
++   fixed_size_instructions;             // Fixed size instructions
++   branch_has_delay_slot;               // branch have delay slot in gs2
++   max_instructions_per_bundle = 1;     // 1 instruction per bundle
++   max_bundles_per_cycle = 4;           // Up to 4 bundles per cycle
++   bundle_unit_size=4;
++   instruction_unit_size = 4;           // An instruction is 4 bytes long
++   instruction_fetch_unit_size = 16;    // The processor fetches one line
++   instruction_fetch_units = 1;         // of 16 bytes
++
++   // List of nop instructions
++   nops( MachNop );
++ %}
++
++ //----------RESOURCES----------------------------------------------------------
++ // Resources are the functional units available to the machine
++
++ resources(D1, D2, D3, D4, DECODE = D1 | D2 | D3| D4,  ALU1, ALU2,  ALU = ALU1 | ALU2,  FPU1, FPU2, FPU = FPU1 | FPU2,  MEM,  BR);
++
++ //----------PIPELINE DESCRIPTION-----------------------------------------------
++ // Pipeline Description specifies the stages in the machine's pipeline
++
++ // IF: fetch
++ // ID: decode
++ // RD: read
++ // CA: caculate
++ // WB: write back
++ // CM: commit
++
++ pipe_desc(IF, ID, RD, CA, WB, CM);
++
++
++ //----------PIPELINE CLASSES---------------------------------------------------
++ // Pipeline Classes describe the stages in which input and output are
++ // referenced by the hardware pipeline.
++
++ //No.1 Integer ALU reg-reg operation : dst <-- reg1 op reg2
++ pipe_class ialu_regI_regI(rRegI dst, rRegI src1, rRegI src2) %{
++   single_instruction;
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write)+1;
++   DECODE : ID;
++   ALU    : CA;
++ %}
++
++ //No.19 Integer mult operation : dst <-- reg1 mult reg2
++ pipe_class ialu_mult(rRegI dst, rRegI src1, rRegI src2) %{
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write)+5;
++   DECODE : ID;
++   ALU2   : CA;
++ %}
++
++ pipe_class mulL_reg_reg(rRegL dst, rRegL src1, rRegL src2) %{
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write)+10;
++   DECODE : ID;
++   ALU2   : CA;
++ %}
++
++ pipe_class ialu_mult_imm(rRegI dst, rRegI src1, immU8 src2) %{
++   src1   : RD(read);
++   dst    : WB(write)+5;
++   DECODE : ID;
++   ALU2   : CA;
++ %}
++
++ pipe_class mulL_reg_imm(rRegL dst, rRegL src1, immUL8 src2) %{
++   src1   : RD(read);
++   dst    : WB(write)+10;
++   DECODE : ID;
++   ALU2   : CA;
++ %}
++
++ //No.19 Integer div operation : dst <-- reg1 div reg2
++ pipe_class ialu_div(rRegI dst, rRegI src1, rRegI src2) %{
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write)+10;
++   DECODE : ID;
++   ALU2   : CA;
++ %}
++
++ //No.19 Integer mod operation : dst <-- reg1 mod reg2
++ pipe_class ialu_mod(rRegI dst, rRegI src1, rRegI src2) %{
++   instruction_count(2);
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write)+10;
++   DECODE : ID;
++   ALU2   : CA;
++ %}
++
++ //No.15 Long ALU reg-reg operation : dst <-- reg1 op reg2
++ pipe_class ialu_regL_regL(rRegL dst, rRegL src1, rRegL src2) %{
++   instruction_count(2);
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   ALU    : CA;
++ %}
++
++  //No.18 Long ALU reg-imm operation : dst <-- reg1 op immUL8
++ pipe_class ialu_regL_imm(rRegL dst, rRegL src) %{
++   instruction_count(2);
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   ALU    : CA;
++ %}
++
++ //No.18 Long ALU reg-imm16 operation : dst <-- reg1 op imm16
++ pipe_class ialu_regL_imm16(rRegL dst, rRegL src) %{
++   instruction_count(2);
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   ALU    : CA;
++ %}
++
++ //no.16 load Long from memory :
++ pipe_class ialu_loadL(rRegL dst, memory mem) %{
++   instruction_count(2);
++   mem    : RD(read);
++   dst    : WB(write)+5;
++   DECODE : ID;
++   MEM    : RD;
++ %}
++
++ //No.17 Store Long to Memory :
++ pipe_class ialu_storeL(rRegL src, memory mem) %{
++   instruction_count(2);
++   mem    : RD(read);
++   src    : RD(read);
++   DECODE : ID;
++   MEM    : RD;
++ %}
++
++ //No.2 Integer ALU reg-imm16 operation : dst <-- reg1 op imm16
++ pipe_class ialu_regI_imm16(rRegI dst, rRegI src) %{
++   single_instruction;
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   ALU    : CA;
++ %}
++
++ //No.3 Integer move operation : dst <-- reg
++ pipe_class ialu_regI_mov(rRegI dst, rRegI src) %{
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   ALU    : CA;
++ %}
++
++ //No.4 No instructions : do nothing
++ pipe_class empty( ) %{
++  instruction_count(0);
++ %}
++
++ //No.5 UnConditional branch
++ pipe_class pipe_jmp( label labl ) %{
++  multiple_bundles;
++  DECODE : ID;
++  BR     : RD;
++ %}
++ 
++ //No.6 ALU Conditional branch :
++ pipe_class pipe_alu_branch(rRegI src1, rRegI src2, label labl ) %{
++  multiple_bundles;
++  src1   : RD(read);
++  src2   : RD(read);
++  DECODE : ID;
++  BR     : RD;
++ %}
++
++ //no.7 load integer from memory :
++ pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
++   mem    : RD(read);
++   dst    : WB(write)+3;
++   DECODE : ID;
++   MEM    : RD;
++ %}
++
++ //No.8 Store Integer to Memory :
++ pipe_class ialu_storeI(rRegI src, memory mem) %{
++   mem    : RD(read);
++   src    : RD(read);
++   DECODE : ID;
++   MEM    : RD;
++ %}
++
++
++ //No.10 Floating FPU reg-reg operation : dst <-- reg1 op reg2
++ pipe_class fpu_regF_regF(regF dst, regF src1, regF src2) %{
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   FPU    : CA;
++ %}
++
++ //No.22 Floating div operation : dst <-- reg1 div reg2
++ pipe_class fpu_div(regF dst, regF src1, regF src2) %{
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   FPU2   : CA;
++ %}
++
++ pipe_class fcvt_I2D(regD dst, rRegI src) %{
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   FPU1   : CA;
++ %}
++
++ pipe_class fcvt_D2I(rRegI dst, regD src) %{
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   FPU1   : CA;
++ %}
++
++ pipe_class pipe_mfc1(rRegI dst, regD src) %{
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   MEM    : RD;
++ %}
++
++ pipe_class pipe_mtc1(regD dst, rRegI src) %{
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   MEM    : RD(5);
++ %}
++
++ //No.23 Floating sqrt operation : dst <-- reg1 sqrt reg2
++ pipe_class fpu_sqrt(regF dst, regF src1, regF src2) %{
++   multiple_bundles;
++   src1   : RD(read);
++   src2   : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   FPU2   : CA;
++ %}
++
++ //No.11 Load Floating from Memory :
++ pipe_class fpu_loadF(regF dst, memory mem) %{
++   instruction_count(1);
++   mem    : RD(read);
++   dst    : WB(write)+3;
++   DECODE : ID;
++   MEM    : RD;
++ %}
++
++ //No.12 Store Floating to Memory :
++ pipe_class fpu_storeF(regF src, memory mem) %{
++   instruction_count(1);
++   mem    : RD(read);
++   src    : RD(read);
++   DECODE : ID;
++   MEM    : RD;
++ %}
++
++ //No.13 FPU Conditional branch :
++ pipe_class pipe_fpu_branch(regF src1, regF src2, label labl ) %{
++  multiple_bundles;
++  src1   : RD(read);
++  src2   : RD(read);
++  DECODE : ID;
++  BR     : RD;
++ %}
++
++//No.14 Floating FPU reg operation : dst <-- op reg
++ pipe_class fpu1_regF(regF dst, regF src) %{
++   src    : RD(read);
++   dst    : WB(write);
++   DECODE : ID;
++   FPU    : CA;
++ %}
++
++ pipe_class long_memory_op() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(30);
++ %}
++
++ pipe_class simple_call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++   BR     : RD;
++ %}
++
++ pipe_class call() %{
++   instruction_count(10); multiple_bundles; force_serialization;
++   fixed_latency(200);
++ %}
++
++ //FIXME:
++ //No.9 Piple slow : for multi-instructions
++ pipe_class pipe_slow(  ) %{
++   instruction_count(20);
++   force_serialization;
++   multiple_bundles;
++   fixed_latency(50);
++ %}
++
++%}
++
++
++//----------INSTRUCTIONS-------------------------------------------------------
++//
++// match      -- States which machine-independent subtree may be replaced
++//               by this instruction.
++// ins_cost   -- The estimated cost of this instruction is used by instruction
++//               selection to identify a minimum cost tree of machine
++//               instructions that matches a tree of machine-independent
++//               instructions.
++// format     -- A string providing the disassembly for this instruction.
++//               The value of an instruction's operand may be inserted
++//               by referring to it with a '$' prefix.
++// opcode     -- Three instruction opcodes may be provided.  These are referred
++//               to within an encode class as $primary, $secondary, and $tertiary
++//               rrspectively.  The primary opcode is commonly used to
++//               indicate the type of machine instruction, while secondary
++//               and tertiary are often used for prefix options or addressing
++//               modes.
++// ins_encode -- A list of encode classes with parameters. The encode class
++//               name must have been defined in an 'enc_class' specification
++//               in the encode section of the architecture description.
++
++//-------- only swjdk8--------
++instruct s4AddLp(rRegP dst, rRegI index, immI_2 dis, rRegP base) %{
++  match(Set dst (AddP base (LShiftL (ConvI2L index) dis)));
++  ins_cost(10);
++  format %{ " s4addl  $index,$base,$dst @ s4AddLp " %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register op1 = $index$$Register;
++    Register op2 = $base$$Register;
++    __ s4addl(op1, op2, dst);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct s8AddLp(rRegP dst, rRegI index, immI_3 scale, rRegP base) %{
++  match(Set dst (AddP base (LShiftL (ConvI2L index) scale)));
++  ins_cost(10);
++  format %{ " s8addl  $index,$base,$dst @ s8AddLp " %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register op1 = $index$$Register;
++    Register op2 = $base$$Register;
++    __ s8addl(op1, op2, dst);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct s4AddWp(rRegI dst, rRegI index, immI_2 scale, rRegI base) %{
++  match(Set dst (AddI base (LShiftI index scale)));
++  ins_cost(10);
++  format %{ " s4addw  $index,$base,$dst @ s4AddWp " %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register op1 = $index$$Register;
++    Register op2 = $base$$Register;
++    __ s4addw(op1, op2, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct s8AddWp(rRegI dst, rRegI index, immI_3 scale, rRegI base) %{
++  match(Set dst (AddI base (LShiftI index scale)));
++  ins_cost(10);
++  format %{ " s8addw  $index,$base,$dst @ s8AddWp " %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register op1 = $index$$Register;
++    Register op2 = $base$$Register;
++    __ s8addw(op1, op2, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++//----------------------
++
++//----------Load/Store/Move Instructions---------------------------------------
++//----------Load Instructions--------------------------------------------------
++
++// Load Byte (8 bit signed)
++instruct loadB(rRegI dst, memory mem)
++%{
++  match(Set dst (LoadB mem));
++
++  ins_cost(125);
++  format %{ "ldbu   $dst, $mem\t# byte\t@loadB\n\t"
++            "\tsextb  $dst, $dst"
++          %}
++  
++  ins_encode %{
++    __  ldbu ($dst$$Register, $mem$$Address);
++    __  sextb($dst$$Register, $dst$$Register);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Byte (8 bit signed) into Long Register
++instruct loadB2L(rRegL dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadB mem)));
++
++  ins_cost(125);
++  format %{ "ldbu    $dst, $mem\t# byte -> long\t@loadB2L\n\t"
++            "\tsextb   $dst, $dst" %}
++
++  ins_encode %{
++    __  ldbu ($dst$$Register, $mem$$Address);
++    __  sextb($dst$$Register, $dst$$Register);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Byte (8 bit UNsigned)
++instruct loadUB(rRegI dst, memory mem)
++%{
++  match(Set dst (LoadUB mem));
++
++  ins_cost(125);
++  format %{ "ldbu  $dst, $mem\t# ubyte\t@loadUB" %}
++
++  ins_encode %{
++    __ ldbu($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Byte (8 bit UNsigned) into Long Register
++instruct loadUB2L(rRegL dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadUB mem)));
++
++  ins_cost(125);
++  format %{ "ldbu  $dst, $mem\t# ubyte -> long\t@loadUB2L" %}
++
++  ins_encode %{
++    __ ldbu($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Byte (8 bit UNsigned) with 32-bit mask into Long Register
++instruct loadUB2L_immI(rRegL dst, memory mem, immI mask) %{
++  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
++  //effect(KILL cr);
++
++  format %{ "ldbu  $dst, $mem\t# ubyte & 32-bit mask -> long\t@loadUB2L_immI\n\t"
++            "andw    $dst, right_n_bits($mask, 8)" %}
++  ins_encode %{
++    Register Rdst = $dst$$Register;
++    __ ldbu(Rdst, $mem$$Address);
++    __ andw(Rdst, $mask$$constant & right_n_bits(8), Rdst);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Short (16 bit signed)
++instruct loadS(rRegI dst, memory mem)
++%{
++  match(Set dst (LoadS mem));
++
++  ins_cost(125);
++  format %{ "ldhu  $dst, $mem\t# short\t@loadS\n\t"
++            "sexth $dst, $dst" %}
++
++  ins_encode %{
++    __ ldhu ($dst$$Register, $mem$$Address);
++    __ sexth($dst$$Register, $dst$$Register);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Short (16 bit signed) to Byte (8 bit signed)
++instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ldbu $dst, $mem\t# short -> byte\t@loadS2B\n\t"
++            "sextb $dst, $dst" %}
++  ins_encode %{
++    __ ldbu ($dst$$Register, $mem$$Address);
++    __ sextb($dst$$Register, $dst$$Register);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Short (16 bit signed) into Long Register
++instruct loadS2L(rRegL dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadS mem)));
++
++  ins_cost(125);
++  format %{ "ldhu  $dst, $mem\t# short\t@loadS2L\n\t"
++            "sexth $dst, $dst" %}
++
++  ins_encode %{
++    __ ldhu ($dst$$Register, $mem$$Address);
++    __ sexth($dst$$Register, $dst$$Register);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Short/Char (16 bit UNsigned)
++instruct loadUS(rRegI dst, memory mem)
++%{
++  match(Set dst (LoadUS mem));
++
++  ins_cost(125);
++  format %{ "ldhu  $dst, $mem\t# ushort/char\t@loadUS" %}
++
++  ins_encode %{
++    __ ldhu($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
++instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ldbu   $dst, $mem\t# ushort -> byte\t@loadUS2B\n\t"
++            "sextb  $dst, $mem" %}
++  ins_encode %{
++    __ ldbu ($dst$$Register, $mem$$Address);
++    __ sextb($dst$$Register, $dst$$Register);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
++instruct loadUS2L(rRegL dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadUS mem)));
++
++  ins_cost(125);
++  format %{ "ldhu  $dst, $mem\t# ushort/char -> long\t@loadUS2L" %}
++
++  ins_encode %{
++    __ ldhu($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
++instruct loadUS2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
++  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
++
++  format %{ "ldbu  $dst, $mem\t# ushort/char & 0xFF -> long\t@loadUS2L_immI_255" %}
++  ins_encode %{
++    __ ldbu($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Short/Char (16 bit UNsigned) with 32-bit mask into Long Register
++instruct loadUS2L_immI(rRegL dst, memory mem, immI mask) %{
++  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
++  //effect(KILL cr);
++
++  format %{ "ldhu  $dst, $mem\t# ushort/char & 32-bit mask -> long\t@loadUS2L_immI\n\t"
++            "andw    $dst, right_n_bits($mask, 16), $dst" %}
++  ins_encode %{
++    Register Rdst = $dst$$Register;
++    __ ldhu(Rdst, $mem$$Address);
++    __ andw(Rdst, $mask$$constant & right_n_bits(16), Rdst);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer
++instruct loadI(rRegI dst, memory mem)
++%{
++  match(Set dst (LoadI mem));
++
++  ins_cost(125);
++  format %{ "ldws    $dst, $mem\t# int\t@loadI" %}
++
++  ins_encode %{
++    __ ldws($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer (32 bit signed) to Byte (8 bit signed)
++instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
++
++  ins_cost(125);
++  format %{ "ldbu  $dst, $mem\t# int -> byte\t@loadI2B\n\t"
++            "sextb $dst, $dst" %}
++  ins_encode %{
++    __ ldbu($dst$$Register, $mem$$Address);
++    __ sextb($dst$$Register, $dst$$Register);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
++instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ldbu  $dst, $mem\t# int -> ubyte\t@loadI2UB" %}
++  ins_encode %{
++    __ ldbu($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer (32 bit signed) to Short (16 bit signed)
++instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
++  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
++
++  ins_cost(125);
++  format %{ "ldhu  $dst, $mem\t# int -> short\t@loadI2S\n\t"
++            "sexth $dst, $dst" %}
++  ins_encode %{
++    __ ldhu ($dst$$Register, $mem$$Address);
++    __ sexth($dst$$Register, $dst$$Register);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
++instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
++  match(Set dst (AndI (LoadI mem) mask));
++
++  ins_cost(125);
++  format %{ "ldhu  $dst, $mem\t# int -> ushort/char\t@loadI2US" %}
++  ins_encode %{
++    __ ldhu($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer into Long Register
++instruct loadI2L(rRegL dst, memory mem)
++%{
++  match(Set dst (ConvI2L (LoadI mem)));
++
++  ins_cost(100);
++  format %{ "ldws  $dst, $mem\t# int -> long\t@loadI2L" %}
++
++  ins_encode %{
++    __ ldws($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer with mask 0xFF into Long Register
++instruct loadI2L_immI_255(rRegL dst, memory mem, immI_255 mask) %{
++  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
++
++  format %{ "ldbu  $dst, $mem\t# int & 0xFF -> long\t@loadI2L_immI_255" %}
++  ins_encode %{
++    __ ldbu($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer with mask 0xFFFF into Long Register
++instruct loadI2L_immI_65535(rRegL dst, memory mem, immI_65535 mask) %{
++  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
++
++  format %{ "ldhu  $dst, $mem\t# int & 0xFFFF -> long\t@loadI2L_immI_65535" %}
++  ins_encode %{
++    __ ldhu($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Integer with a 31-bit mask into Long Register TODO:jzy mask length s is OK? andw's immediate length -si 8-bit
++instruct loadI2L_immU31(rRegL dst, memory mem, immU31 mask) %{
++  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
++  //effect(KILL cr);
++
++  format %{ "ldwu    $dst, $mem\t# int & 31-bit mask -> long\t@loadI2L_immU31\n\t"
++            "andw    $dst, $mask, $dst" %}
++  ins_encode %{
++    Register Rdst = $dst$$Register;
++    __ ldw(Rdst, $mem$$Address);
++    __ andw(Rdst, $mask$$constant, Rdst);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Unsigned Integer into Long Register
++instruct loadUI2L(rRegL dst, memory mem, immL_32bits mask)
++%{
++  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
++
++  ins_cost(125);
++  format %{ "ldwu    $dst, $mem\t# uint -> long\t@loadUI2L" %}
++
++  ins_encode %{
++    __ ldwu($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Long
++//TODO implicit null check LSP
++instruct loadL(rRegL dst, memory mem)
++%{
++  match(Set dst (LoadL mem));
++
++  ins_cost(125);
++  format %{ "ldl    $dst, $mem\t# long\t@loadL" %}
++
++  ins_encode %{
++    __ ldl($dst$$Register, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_reg_mem); // XXX
++%}
++
++// Load Long - UNaligned
++instruct loadL_unaligned(rRegL dst, memory mem)
++%{
++  match(Set dst (LoadL_unaligned mem));
++
++  // FIXME: Need more effective ldl/ldr
++  ins_cost(450);
++  format %{ "loadL_unaligned    $dst, $mem   #@loadL_unaligned" %}
++  ins_encode %{
++    __ ldl($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe( ialu_loadL );
++%}
++
++// Load Range
++//TODO CHECK LSP
++instruct loadRange(rRegI dst, memory mem)
++%{
++  match(Set dst (LoadRange mem));
++
++  ins_cost(125); // XXX
++  format %{ "ldws    $dst, $mem\t# range\t@loadRange" %}
++  ins_encode %{ 
++    __ ldws($dst$$Register, $mem$$Address);   
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++// Load Pointer
++instruct loadP(rRegP dst, memory mem)
++%{
++  match(Set dst (LoadP mem));
++
++  ins_cost(125); // XXX
++  format %{ "ldptr    $dst, $mem\t# ptr\t@loadP" %}
++  ins_encode (load_P_enc(dst, mem));
++  ins_pipe(ialu_reg_mem); // XXX
++%}
++
++// Load Compressed Pointer
++instruct loadN(rRegN dst, memory mem)
++%{
++   match(Set dst (LoadN mem));
++
++   ins_cost(125); // XXX
++   format %{ "ldwu    $dst, $mem\t# compressed ptr\t@loadN" %}
++   ins_encode (load_N_enc(dst, mem));
++   ins_pipe(ialu_reg_mem); // XXX
++%}
++
++
++// Load Klass Pointer
++instruct loadKlass(rRegP dst, memory mem)
++%{
++  match(Set dst (LoadKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "ldptr    $dst, $mem\t# class\t@loadKlass" %}
++  ins_encode (load_P_enc(dst, mem));
++  ins_pipe(ialu_reg_mem); // XXX
++%}
++
++// Load narrow Klass Pointer
++instruct loadNKlass(rRegN dst, memory mem)
++%{
++  match(Set dst (LoadNKlass mem));
++
++  ins_cost(125); // XXX
++  format %{ "ldwu    $dst, $mem\t# compressed klass ptr\t@loadNKlass" %}
++  ins_encode (load_N_enc(dst, mem));
++  ins_pipe(ialu_reg_mem); // XXX
++%}
++
++// Load Float
++instruct loadF(regF dst, memory mem)
++%{
++  match(Set dst (LoadF mem));
++  //effect(KILL rscratch1_GP);
++
++  ins_cost(145); // XXX
++  format %{ "load_float   $dst, $mem\t# float\t@loadF" %}
++  ins_encode %{
++    __ load_float($dst$$FloatRegister, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem); // XXX
++%}
++
++// Load Float TODO:jzy 4 bytes?
++instruct MoveF2VL(regF dst, regF src) %{
++  match(Set dst src);
++  format %{ "fcpys $src, $src, $dst\t! load float (4 bytes)\t@MoveF2VL" %}
++  ins_encode %{
++    __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Load Float TODO: jzy need this?
++instruct MoveVL2F(regF dst, regF src) %{
++  match(Set dst src);
++  format %{ "fcpys $src, $src, $dst\t! load float (4 bytes)\t@MoveVL2F" %}
++  ins_encode %{
++    __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF);
++%}
++
++// Load Double
++/*instruct loadD_partial(regD dst, memory mem)
++%{
++//  predicate(!UseXmmLoadAndClearUpper);
++  match(Set dst (LoadD mem));
++
++  ins_cost(145); // XXX
++  format %{ "movlpd  $dst, $mem\t# double" %}
++  ins_encode %{
++//    __ movdbl($dst$$XMMRegister, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem); // XXX
++%}*/
++
++instruct loadD(regD dst, memory mem)
++%{
++//  predicate(UseXmmLoadAndClearUpper);
++  match(Set dst (LoadD mem));
++
++  ins_cost(145); // XXX
++  format %{ "load_double   $dst, $mem\t# double\t@loadD" %}
++  ins_encode %{
++    __ load_double($dst$$FloatRegister, $mem$$Address);
++  %}
++  ins_pipe(ialu_reg_mem); // XXX
++%}
++
++instruct loadD_unaligned(regD dst, memory mem ) %{
++  match(Set dst (LoadD_unaligned mem));
++  ins_cost(250);
++  // FIXME: Need more effective ldl/ldr
++  ins_encode %{
++    __ load_double($dst$$FloatRegister, $mem$$Address);
++  %}
++  ins_pipe( ialu_reg_mem );
++%}
++
++// Load Double
++// TODO CHECK LSP
++instruct MoveD2VL(regD dst, regD src) %{
++  match(Set dst src);
++  format %{ "fcpys $src, $src, $dst\t! load double (8 bytes)\t@MoveD2VL" %}
++  ins_encode %{
++    __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Load Double
++// TODO CHECK LSP
++instruct MoveVL2D(regD dst, regD src) %{
++  match(Set dst src);
++  format %{ "fcpys $src, $src, $dst\t! load double (8 bytes)\t@MoveVL2D" %}
++  ins_encode %{
++    __ fcpys($src$$FloatRegister, $src$$FloatRegister, $dst$$FloatRegister);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Load Effective Address
++instruct leaP16(rRegP dst, indOffset16 mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110); // XXX
++  format %{ "lea    $dst, $mem\t# ptr 16\t@leaP16" %}
++  ins_encode %{ 
++   __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov); //TODO check
++%}
++
++//instruct leaP32(rRegP dst, indOffset32 mem)
++//%{
++//  match(Set dst mem);
++//
++//  ins_cost(110);
++//  format %{ "lea    $dst, $mem\t# ptr 32\t@leaP32" %}
++//  ins_encode %{ 
++//   __ lea($dst$$Register, $mem$$Address);
++//  %}
++//  ins_pipe(ialu_regI_mov);//TODO check
++//%}
++
++instruct leaPIdxOff(rRegP dst, indIndexOffset mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr idxoff\t@leaPIdxOff" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPIdxScale(rRegP dst, indIndexScale mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr idxscale\t@leaPIdxScale" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPPosIdxScale(rRegP dst, indPosIndexScale mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr idxscale\t@leaPPosIdxScale" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPIdxScaleOff(rRegP dst, indIndexScaleOffset mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr idxscaleoff\t@leaPIdxScaleOff" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPPosIdxOff(rRegP dst, indPosIndexOffset mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr posidxoff\t@leaPPosIdxOff" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPPosIdxScaleOff(rRegP dst, indPosIndexScaleOffset mem)
++%{
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr posidxscaleoff\t@leaPPosIdxScaleOff" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++// Load Effective Address which uses Narrow (32-bits) oop
++instruct leaPCompressedOopOffset(rRegP dst, indCompressedOopOffset mem)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_shift() != 0));
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr compressedoopoff32\t@leaPCompressedOopOffset" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaP16Narrow(rRegP dst, indOffset16Narrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110); // XXX
++  format %{ "lea    $dst, $mem\t# ptr off8narrow\t@leaP8Narrow" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++//instruct leaP32Narrow(rRegP dst, indOffset32Narrow mem)
++//%{
++//  predicate(Universe::narrow_oop_shift() == 0);
++//  match(Set dst mem);
++//
++//  ins_cost(110);
++//  format %{ "lea    $dst, $mem\t# ptr off32narrow\t@leaP32Narrow" %}
++//  ins_encode %{ 
++//    __ lea($dst$$Register, $mem$$Address);
++//  %}
++//  ins_pipe(ialu_regI_mov);
++//%}
++
++instruct leaPIdxOffNarrow(rRegP dst, indIndexOffsetNarrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr idxoffnarrow\t@leaPIdxOffNarrow" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPIdxScaleNarrow(rRegP dst, indIndexScaleNarrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr idxscalenarrow\t@leaPIdxScaleNarrow" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPIdxScaleOffNarrow(rRegP dst, indIndexScaleOffsetNarrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr idxscaleoffnarrow\t@leaPIdxScaleOffNarrow" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPPosIdxOffNarrow(rRegP dst, indPosIndexOffsetNarrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr posidxoffnarrow\t@leaPPosIdxOffNarrow" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct leaPPosIdxScaleOffNarrow(rRegP dst, indPosIndexScaleOffsetNarrow mem)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst mem);
++
++  ins_cost(110);
++  format %{ "lea    $dst, $mem\t# ptr posidxscaleoffnarrow\t@leaPPosIdxScaleOffNarrow" %}
++  ins_encode %{ 
++    __ lea($dst$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++instruct loadConI(rRegI dst, immI src)
++%{
++  match(Set dst src);
++
++  format %{ "mov_immediate32s    $dst, $src\t# int\t@loadConI" %}
++  ins_encode %{ 
++    __ mov_immediate32s($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct loadConI0(rRegI dst, immI0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "movl    $dst, R0\t# int\t@loadConI0" %}
++  ins_encode %{ 
++    __ movl($dst$$Register, R0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct loadConL(rRegL dst, immL src)
++%{
++  match(Set dst src);
++
++  ins_cost(150);
++  format %{ "mov_immediate64    $dst, $src\t# long\t@loadConL" %}
++  ins_encode %{ 
++    __ mov_immediate64($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConL0(rRegL dst, immL0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "movl    $dst, R0\t# int\t@loadConL0" %}
++  ins_encode %{ 
++    __ movl($dst$$Register, R0);
++  %}
++  ins_pipe(ialu_regL_regL); 
++%}
++
++//instruct loadConUL32(rRegL dst, immUL32 src)
++//%{
++//  match(Set dst src);
++//
++//  ins_cost(60);
++//  format %{ "mov_immediate32u    $dst, $src\t# long (unsigned 32-bit)\t@loadConUL32" %}
++//  ins_encode %{ 
++//    __ mov_immediate32u($dst$$Register, $src$$constant);
++//  %}
++//  ins_pipe(ialu_regL_regL);
++//%}
++
++
++instruct loadConL32(rRegL dst, immL32 src)
++%{
++  match(Set dst src);
++
++  ins_cost(70);
++  format %{ "mov_immediate32s    $dst, $src\t# long (32-bit)\t@loadConL32" %}
++  ins_encode %{ 
++    __ mov_immediate32s($dst$$Register, (int)$src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++//use in swjdk8 need to check lsp?
++instruct loadConL16(rRegL dst, immL16 src)
++%{
++  match(Set dst src);
++
++  ins_cost(70);
++  format %{ "ldi    $dst, $src, R0\t# long (16-bit)\t@loadConL16" %}
++  ins_encode %{ 
++    __ ldi($dst$$Register, (int)$src$$constant, R0);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConP(rRegP dst, immP con) %{
++  match(Set dst con);
++
++  format %{ "mov_immediate64    $dst, $con\t# ptr\t@loadConP" %}
++  ins_encode %{ 
++    Register dst = $dst$$Register;
++    long* value = (long*)$con$$constant;
++
++    if($con->constant_reloc() == relocInfo::metadata_type){
++        int klass_index = __ oop_recorder()->find_index((Klass*)value);
++        RelocationHolder rspec = metadata_Relocation::spec(klass_index);
++
++        __ relocate(rspec);
++        __ prepare_patch_li48(dst, (long)value);
++    }else if($con->constant_reloc() == relocInfo::oop_type){
++        int oop_index = __ oop_recorder()->find_index((jobject)value);
++        RelocationHolder rspec = oop_Relocation::spec(oop_index);
++    
++        __ relocate(rspec);
++        __ prepare_patch_li48(dst, (long)value);
++    } else if ($con->constant_reloc() == relocInfo::none) {
++        __ mov_immediate64(dst, (long)value);
++    }
++  %}
++  ins_pipe(ialu_regL_regL); // XXX
++%}
++
++instruct loadConP0(rRegP dst, immP0 src)
++%{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "movl    $dst, R0\t# ptr\t@loadConP0" %}
++  ins_encode %{ 
++    __ movl($dst$$Register, R0);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConP31(rRegP dst, immP31 src)
++%{
++  match(Set dst src);
++
++  ins_cost(60);
++  format %{ "mov_immediate32u    $dst, $src\t# ptr (positive 32-bit)\t@loadConP31" %}
++  ins_encode %{ 
++    __ mov_immediate32u($dst$$Register, $src$$constant);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct loadConP_poll(rRegP dst, immP_poll src) %{
++  match(Set dst src);
++
++  ins_cost(50);
++  format %{ "mov_immediate64   $dst, $src #@loadConP_poll" %}
++
++  ins_encode %{
++    Register dst = $dst$$Register;
++    intptr_t value = (intptr_t)$src$$constant;
++
++    __ mov_immediate64(dst, (long)value);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct loadConF(regF dst, immF con) %{
++  match(Set dst con);
++  ins_cost(125);
++  format %{ "load_float   $dst, [$constantaddress]\t# load from constant table: float=$con\t@loadConF" %}
++  ins_encode %{
++    __ load_float($dst$$FloatRegister, $constantaddress($con));
++  %}
++  ins_pipe(pipe_slow);
++%}
++//TODO:jzy which is immN0?
++instruct loadConN0(rRegN dst, immN0 src) %{
++  match(Set dst src);
++  
++  format %{ "movl    $dst, $src\t# compressed NULL ptr\t@loadConN0" %}
++  ins_encode %{
++    __ movl($dst$$Register, R0);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++//TODO:jzy compressed ptr?
++instruct loadConN(rRegN dst, immN src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "set_narrow_oop    $dst, $src\t# compressed ptr\t@loadConN" %}
++  ins_encode %{
++    address con = (address)$src$$constant;
++    if (con == NULL) {
++      ShouldNotReachHere();
++    } else {
++      __ set_narrow_oop($dst$$Register, (jobject)$src$$constant);
++    }
++  %}
++  ins_pipe(ialu_regI_regI); // XXX
++%}
++
++instruct loadConNKlass(rRegN dst, immNKlass src) %{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "set_narrow_klass    $dst, $src\t# compressed klass ptr\t@loadConNKlass" %}
++  ins_encode %{
++    address con = (address)$src$$constant;
++    if (con == NULL) {
++      ShouldNotReachHere();
++    } else {
++      __ set_narrow_klass($dst$$Register, (Klass*)$src$$constant);
++    }
++  %}
++  ins_pipe(ialu_regI_regI); // XXX
++%}
++
++instruct loadConF0(regF dst, immF0 src)
++%{
++  match(Set dst src);
++  ins_cost(100);
++
++  format %{ "fcpys   f31, f31, $dst\t# float 0.0\t@loadConF0" %}
++  ins_encode %{
++    __ fcpys(f31, f31, $dst$$FloatRegister);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadConD(regD dst, immD con) %{
++  match(Set dst con);
++  ins_cost(125);
++  format %{ "load_double   $dst, [$constantaddress]\t# load from constant table: double=$con\t@loadConD" %}
++  ins_encode %{
++    __ load_double($dst$$FloatRegister, $constantaddress($con));
++  %}
++  ins_pipe(fpu_loadF);
++%}
++
++instruct loadConD0(regD dst, immD0 src)
++%{
++  match(Set dst src);
++  ins_cost(100);
++
++  format %{ "fcpys   f31, f31, $dst\t# double 0.0\t@loadConD0" %}
++  ins_encode %{
++    __ fcpys(f31, f31, $dst$$FloatRegister);
++  %}
++  ins_pipe(fpu_loadF);
++%}
++
++instruct loadSSI(rRegI dst, stackSlotI src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldws    $dst, $src\t# int stk\t@loadSSI" %}
++  ins_encode %{ 
++    __ ldws($dst$$Register, Address(esp, $src$$disp));
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++instruct loadSSL(rRegL dst, stackSlotL src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldl    $dst, $src\t# long stk\t@loadSSL" %}
++  ins_encode %{ 
++    __ ldl($dst$$Register, Address(esp, $src$$disp));
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++instruct loadSSP(rRegP dst, stackSlotP src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "ldl    $dst, $src\t# ptr stk\t@loadSSP" %}
++  ins_encode %{ 
++    __ ldl($dst$$Register, Address(esp, $src$$disp));
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++
++instruct loadSSF(regF dst, stackSlotF src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "load_float   $dst, $src\t# float stk\t@loadSSF" %}
++  ins_encode %{
++    __ load_float($dst$$FloatRegister, Address(esp, $src$$disp));
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++// Use the same format since predicate() can not be used here.
++instruct loadSSD(regD dst, stackSlotD src)
++%{
++  match(Set dst src);
++
++  ins_cost(125);
++  format %{ "load_double   $dst, $src\t# double stk\t@loadSSD" %}
++  ins_encode  %{
++    __ load_double($dst$$FloatRegister, Address(esp, $src$$disp));
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++// Prefetch instructions for allocation.
++// Must be safe to execute with invalid address (cannot fault).
++
++instruct prefetchAlloc( memory mem ) %{
++  predicate(AllocatePrefetchInstr==3);
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++
++  format %{ "PREFETCHW $mem\t# Prefetch allocation into level 1 cache and mark modified" %}
++  ins_encode %{
++//    __ prefetchw($mem$$Address);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct prefetchAllocNTA( memory mem ) %{
++  predicate(AllocatePrefetchInstr==0);
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++
++  format %{ "PREFETCHNTA $mem\t# Prefetch allocation to non-temporal cache for write" %}
++  ins_encode %{
++     Register dst = R0;
++     __ load_float(f31, $mem$$Address); // fillde
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct prefetchAllocT0( memory mem ) %{
++  predicate(AllocatePrefetchInstr==1);
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++
++  format %{ "PREFETCHT0 $mem\t# Prefetch allocation to level 1 and 2 caches for write" %}
++  ins_encode %{
++//    __ prefetcht0($mem$$Address);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct prefetchAllocT2( memory mem ) %{
++  predicate(AllocatePrefetchInstr==2);
++  match(PrefetchAllocation mem);
++  ins_cost(125);
++
++  format %{ "PREFETCHT2 $mem\t# Prefetch allocation to level 2 cache for write" %}
++  ins_encode %{
++//    __ prefetcht2($mem$$Address);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++//----------Store Instructions-------------------------------------------------
++
++// Store Byte
++instruct storeB(memory mem, rRegI src)
++%{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(125); // XXX
++  format %{ "stb    $src, $mem\t# byte\t@storeB" %}
++  ins_encode %{
++    __ stb($src$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++// Store Char/Short
++instruct storeC(memory mem, rRegI src)
++%{
++  match(Set mem (StoreC mem src));
++
++  ins_cost(125); // XXX
++  format %{ "sth    $src, $mem\t# char/short\t@storeC" %}
++  ins_encode %{
++    __ sth($src$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++// Store Integer
++instruct storeI(memory mem, rRegI src)
++%{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(125); // XXX
++  format %{ "stw    $src, $mem\t# int\t@storeI" %}
++
++  ins_encode %{
++    __ stw($src$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++// Store Long
++//TODO implicit null check LSP
++instruct storeL(memory mem, rRegL src)
++%{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(125); // XXX
++  format %{ "stl    $src, $mem\t# long\t@storeL" %}
++
++  ins_encode %{
++    __ stl($src$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI); // XXX
++%}
++
++// Store Pointer
++instruct storeP(memory mem, any_RegP src)
++%{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(125); // XXX
++  format %{ "stl    $src, $mem\t# ptr\t@storeP" %}
++  ins_encode %{
++    __ stl($src$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeImmP0(memory mem, immP0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set mem (StoreP mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "stl    S5, $mem\t# ptr (r12_heapbase==0)\t@storeImmP0" %}
++
++  ins_encode %{
++//TODO:where set r12_heapbase? jzy
++    __ stl(r12_heapbase, $mem$$Address);
++  %}
++
++  ins_pipe(ialu_storeI);
++%}
++
++/*no immdiate operand in swjdk8
++// Store NULL Pointer, mark word, or other simple pointer constant. TODO:jzy immP31 is NULL
++instruct storeImmP(memory mem, immP31 src)
++%{
++  match(Set mem (StoreP mem src));
++
++  ins_cost(150); // XXX
++  format %{"movwu  rscratch3, $src\t# ptr\t@storeImmP"
++           "stl    rscratch3, $mem" %}
++
++  ins_encode %{
++    __ mov_immediate32(rscratch3, $src$$constant);
++    __ stl(rscratch3, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}*/
++
++// Store Compressed Pointer
++instruct storeN(memory mem, rRegN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(125); // XXX
++  format %{ "stw    $src, $mem\t# compressed ptr\t@storeN" %}
++  ins_encode %{
++    __ stw($src$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeNKlass(memory mem, rRegN src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(125); // XXX
++  format %{ "stw    $src, $mem\t# compressed klass ptr\t@storeNKlass" %}
++  ins_encode %{
++    __ stw($src$$Register, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeImmN0(memory mem, immN0 zero)
++%{
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_klass_base() == NULL);
++  match(Set mem (StoreN mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "stw    $mem, r12_heapbase\t# compressed ptr (R12_heapbase==0)\t@storeImmN0" %}
++  ins_encode %{
++    __ stw(r12_heapbase, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++/*no immdiate operand in swjdk8
++instruct storeImmN(memory mem, immN src)
++%{
++  match(Set mem (StoreN mem src));
++
++  ins_cost(150); // XXX
++  format %{ "set_narrow_oop    $src, $mem\t# compressed ptr\t@storeImmN" %}
++  ins_encode %{
++    address con = (address)$src$$constant;
++    if (con == NULL) {
++      __ stw(R0, $mem$$Address);
++    } else {
++      __ set_narrow_oop($mem$$Address, (jobject)$src$$constant);
++    }
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeImmNKlass(memory mem, immNKlass src)
++%{
++  match(Set mem (StoreNKlass mem src));
++
++  ins_cost(150); // XXX
++  format %{ "set_narrow_klass    $src, $mem\t# compressed klass ptr\t@storeImmNKlass" %}
++  ins_encode %{
++    __ set_narrow_klass($mem$$Address, (Klass*)$src$$constant);
++  %}
++  ins_pipe(ialu_storeI);
++%}*/
++
++// Store Integer Immediate
++instruct storeImmI0(memory mem, immI0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set mem (StoreI mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "stw    r12_heapbase, $mem\t# int (r12_heapbase==0)\t@storeImmI0" %}
++  ins_encode %{
++    __ stw(r12_heapbase, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++/*no immdiate operand in swjdk8
++instruct storeImmI(memory mem, immI src)
++%{
++  match(Set mem (StoreI mem src));
++
++  ins_cost(150);
++  format %{ "movwu    rscratch3, $src\t# int\t@storeImmI\n\t"
++            "stw    rscratch3, $mem" %}
++
++  ins_encode %{
++    __ mov_immediate32(rscratch3, $src$$constant);
++    __ stw(rscratch3, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}*/
++
++// Store Long Immediate
++//TODO implicit null check LSP
++instruct storeImmL0(memory mem, immL0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set mem (StoreL mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "stl    r12_heapbase, $mem\t# long (r12_heapbase==0)\t@storeImmL0" %}
++  ins_encode %{
++    __ stl(r12_heapbase, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++/*no immdiate operand in swjdk8
++instruct storeImmL(memory mem, immL32 src)
++%{
++  match(Set mem (StoreL mem src));
++
++  ins_cost(150);
++  format %{ "movws    rscratch3, $src\t# long\t@storeImmL\n\t"
++            "stl    rscratch3, $mem" %}
++  ins_encode %{
++    __ movws(rscratch3, (u_int32_t)$src$$constant);
++    __ stl(rscratch3, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}*/
++
++// Store Short/Char Immediate
++instruct storeImmC0(memory mem, immI0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set mem (StoreC mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "sth    r12_heapbase, $mem\t# short/char (r12_heapbase==0)\t@storeImmC0" %}
++  ins_encode %{
++    __ sth(r12_heapbase, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++/*no immdiate operand in swjdk8
++instruct storeImmI16(memory mem, immI16 src)
++%{
++//  predicate(UseStoreImmI16);
++  match(Set mem (StoreC mem src));
++
++  ins_cost(150);
++    format %{ "ldi    rscratch3, $src\t# short/char\t@storeImmI16\n\t"
++              "sth    rscratch3, $mem" %}
++  ins_encode %{
++     __ ldi(rscratch3, $src$$constant, R0);
++     __ sth(rscratch3, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}*/
++
++// Store Byte Immediate
++instruct storeImmB0(memory mem, immI0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set mem (StoreB mem zero));
++
++  ins_cost(125); // XXX
++  format %{ "stb    r12_heapbase, $mem\t# short/char (r12_heapbase==0)\t@storeImmB0" %}
++  ins_encode %{
++    __ stb(r12_heapbase, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++/*no immdiate operand in swjdk8
++instruct storeImmB(memory mem, immI8 src)
++%{
++  match(Set mem (StoreB mem src));
++
++  ins_cost(150); // XXX
++  format %{ "ldi    rscratch3, $src\t# byte\t@storeImmB\n\t"
++            "stb    rscratch3, $mem" %}
++  ins_encode %{
++      __ ldi(rscratch3, $src$$constant, R0);
++      __ stb(rscratch3, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}*/
++
++// Store CMS card-mark Immediate
++instruct storeImmCM0(memory mem, immI0 zero)
++%{
++  match(Set mem (StoreCM mem zero));
++  predicate(unnecessary_storestore(n));
++  
++  ins_cost(125); // XXX
++  format %{ "stb   R0, $mem\t# CMS card-mark byte 0\t@storeImmCM0" %}
++  
++  ins_encode %{
++    __ stb(R0, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeImmCM0_ordered(memory mem, immI0 src)
++%{
++  match(Set mem (StoreCM mem src));
++
++  ins_cost(150); // XXX
++  format %{ "stb   R0, $mem\t# CMS card-mark byte 0\t@storeImmCM0 MEMB" %}
++
++  ins_encode %{
++      if(UseWmemb)
++        __ wmemb();
++      else
++        __ memb();
++      __ stb(R0, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++// Store Float
++instruct storeF(memory mem, regF src)
++%{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(95); // XXX
++  format %{ "store_float   $src, $mem\t# float\t@storeF" %}
++  ins_encode %{
++    __ store_float($src$$FloatRegister, $mem$$Address);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++// Store immediate Float value (it is faster than store from XMM register)
++instruct storeF0(memory mem, immF0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set mem (StoreF mem zero));
++
++  ins_cost(25); // XXX
++  format %{ "store_float    f31, $mem\t# float 0. (r12_heapbase==0)\t@storeF0" %}
++  
++  ins_encode %{
++    __ store_float(f31, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++/*no immdiate operand in swjdk8
++//TODO:it's ok todo this ? jzy
++instruct storeF_imm(memory mem, immF src)
++%{
++  match(Set mem (StoreF mem src));
++
++  ins_cost(50);
++  format %{ "mov_immdiate32    rscratch3, $src\t# float\t@storeF_imm\n\t"
++            "stw    rscratch3, $mem\t# float" %}
++
++  ins_encode %{
++    __ mov_immediate32(rscratch3, $src$$constant);
++    __ stw(rscratch3, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++*/
++// Store Double
++instruct storeD(memory mem, regD src)
++%{
++  match(Set mem (StoreD mem src));
++
++  ins_cost(95); // XXX
++  format %{ "store_double   $src, $mem\t# double\t@storeD" %}
++  ins_encode %{
++    __ store_double($src$$FloatRegister, $mem$$Address);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++// Store immediate double 0.0 (it is faster than store from XMM register) TODO:is zero? jzy
++instruct storeD0_imm(memory mem, immD0 src)
++%{
++  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));// lsp todo check
++  match(Set mem (StoreD mem src));
++
++  ins_cost(50);
++  format %{ "store_double    f31, $mem\t# double 0.\t@storeD0_imm" %}
++
++  ins_encode %{
++    __ store_double(f31, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeD0(memory mem, immD0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set mem (StoreD mem zero));
++
++  ins_cost(25); // XXX
++  format %{ "store_double    f31, $mem\t# double 0. \t@storeD0" %}
++
++  ins_encode %{
++    __ store_double(f31, $mem$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeSSI(stackSlotI dst, rRegI src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "stw    $src, $dst\t# int stk\t@storeSSI" %}
++
++  ins_encode %{
++    __ stw($src$$Register, $dst$$Address);
++  %}
++  ins_pipe( ialu_storeI );
++%}
++
++instruct storeSSL(stackSlotL dst, rRegL src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "stl    $src, $dst\t# long stk\t@storeSSL" %}
++
++  ins_encode %{
++    __ stl($src$$Register, $dst$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeSSP(stackSlotP dst, rRegP src)
++%{
++  match(Set dst src);
++
++  ins_cost(100);
++  format %{ "stl    $src, $dst\t# ptr stk\t@storeSSP" %}
++
++  ins_encode %{
++    __ stl($src$$Register, $dst$$Address);
++  %}
++  ins_pipe(ialu_storeI);
++%}
++
++instruct storeSSF(stackSlotF dst, regF src)
++%{
++  match(Set dst src);
++
++  ins_cost(95); // XXX
++  format %{ "store_float   $src, $dst\t# float stk\t@storeSSF" %}
++  ins_encode %{
++    __ store_float($src$$FloatRegister, Address(esp, $dst$$disp));
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++instruct storeSSD(stackSlotD dst, regD src)
++%{
++  match(Set dst src);
++
++  ins_cost(95); // XXX
++  format %{ "store_double   $src, $dst\t# double stk\t@storeSSD" %}
++  ins_encode %{
++    __ store_double($src$$FloatRegister, Address(esp, $dst$$disp));
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++//----------BSWAP Instructions-------------------------------------------------
++instruct bytes_reverse_int(rRegI dst) %{
++  match(Set dst (ReverseBytesI dst));
++
++  format %{ "bswapw  $dst @bytes_reverse_int" %}
++  //opcode(0x0F, 0xC8);  /*Opcode 0F /C8 */
++  ins_encode %{
++    Register dst = $dst$$Register;      
++      __ swap(dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct bytes_reverse_long(rRegL dst) %{
++  match(Set dst (ReverseBytesL dst));
++
++  format %{ "bswapl  $dst @bytes_reverse_long" %}
++  
++  ins_encode %{
++    Register dst = $dst$$Register;
++    if(UseSW8A) {
++      __ revbl(dst, dst);
++    } else {
++      int zap1 = 0x1;
++      int zap2 = 0x80;
++      int count = 0x38;
++      int zap3 = 0x7E;
++      //__ stop("bytes_reverse_long");
++      assert(dst != rscratch3 && dst != rscratch2_AT, "dst should not equal to AT and rscratch3");
++      __ slll(dst, count, rscratch3);
++      __ srll(dst, count, rscratch2_AT);
++      __ bis(rscratch3, rscratch2_AT, rscratch2_AT);
++      __ zapnot(dst, zap3, dst);//set the highest and lowest bit to zero
++      __ bis(dst, rscratch2_AT, dst);
++
++      for(int i=1; i<4; i++){
++        zap1 = zap1<<1;
++        zap2 = zap2>>1;
++        count = count - 16;
++        zap3 = 0xff - zap1 -zap2;
++        __ zapnot(dst, zap1, rscratch3);
++        __ slll(rscratch3, count, rscratch3);
++        __ zapnot(dst, zap2, rscratch2_AT);
++        __ srll(rscratch2_AT, count, rscratch2_AT);
++        __ bis(rscratch3, rscratch2_AT, rscratch2_AT);
++        __ zapnot(dst, zap3, dst);
++        __ bis(dst, rscratch2_AT, dst);
++      }
++    }
++  %}
++  ins_pipe( pipe_slow);
++%}
++
++instruct bytes_reverse_unsigned_short(rRegI dst) %{
++  match(Set dst (ReverseBytesUS dst));
++
++  format %{ "zapnot $dst, #0x3, $dst  $dst @bytes_reverse_unsigned_short\n\t"
++            "huswap    $dst" %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      __ zapnot(dst, 0x3, dst);
++      __ huswap(dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct bytes_reverse_short(rRegI dst) %{
++  match(Set dst (ReverseBytesS dst));
++
++  format %{ "zapnot $dst, #0x3, $dst  $dst @bytes_reverse_unsigned_short\n\t"
++            "hswap    $dst" %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      __ zapnot(dst, 0x3, dst);
++      __ hswap(dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++//---------- Zeros Count Instructions ------------------------------------------
++// CountLeadingZerosINode CountTrailingZerosINode
++instruct countLeadingZerosI(rRegI dst, rRegI src) %{
++  predicate(UseCountLeadingZerosInstruction);
++  match(Set dst (CountLeadingZerosI src));
++
++  format %{ "CTLZ   $dst, $dst #@countLeadingZerosI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ zapnot(src, 0xf, dst);
++    __ ctlz(dst, dst);
++    __ subw(dst, 32, dst);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countLeadingZerosL(rRegI dst, rRegL src) %{
++  predicate(UseCountLeadingZerosInstruction);
++  match(Set dst (CountLeadingZerosL src));
++
++  format %{ "CTLZ    $src,$dst #@countLeadingZerosL" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ ctlz(src, dst);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct countTrailingZerosI(rRegI dst, rRegI src) %{
++  predicate(UseCountTrailingZerosInstruction);
++  match(Set dst (CountTrailingZerosI src));
++
++  format %{ "CTTZ   $src, $dst\n\t #@countTrailingZerosI"%}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ slll(src, 32, dst);
++    __ cttz(dst, dst);
++    __ subw(dst, 32, dst);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct countTrailingZerosL(rRegI dst, rRegL src) %{
++  predicate(UseCountTrailingZerosInstruction);
++  match(Set dst (CountTrailingZerosL src));
++
++  format %{ "CTTZ    $src,$dst #@countTrailingZerosL" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ cttz(src, dst);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++//---------- Population Count Instructions -------------------------------------
++
++instruct popCountI(rRegI dst, rRegI src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountI src));
++
++  format %{ "popcnt  $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ zapnot(src, 0xf, dst);
++    __ ctpop(dst, dst);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++/* memory operands no use in sw64
++instruct popCountI_mem(rRegI dst, memory mem) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountI (LoadI mem)));
++
++  format %{ "popcnt  $dst, $mem" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ ldw(rscratch2_AT, $mem$$Address);
++    __ zapnot(rscratch2_AT, 0xf, dst);
++    __ ctpop(dst, dst);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}*/
++
++// Note: Long.bitCount(long) returns an int.
++instruct popCountL(rRegI dst, rRegL src) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountL src));
++
++  format %{ "CTPOP  $dst, $src" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ ctpop(src, dst);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++/* memory operands no use in sw64
++// Note: Long.bitCount(long) returns an int.
++instruct popCountL_mem(rRegI dst, memory mem) %{
++  predicate(UsePopCountInstruction);
++  match(Set dst (PopCountL (LoadL mem)));
++
++  format %{ "popcnt  $dst, $mem" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++
++    __ ldl(rscratch2_AT, $mem$$Address);
++    __ ctpop(rscratch2_AT, dst);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}*/
++
++//----------MemBar Instructions-----------------------------------------------
++// Memory barrier flavors
++
++instruct membar_acquire() %{
++  match(MemBarAcquire);
++  ins_cost(0);
++
++  size(4);
++  format %{ "MEMBAR-acquire @ membar_acquire" %}
++  ins_encode %{
++    if (UseNecessaryMembar) {
++      __ memb();
++    }
++  %}
++  ins_pipe(empty);
++%}
++
++instruct load_fence() %{
++  match(LoadFence);
++  ins_cost(400);
++
++  format %{ "MEMBAR @ load_fence" %}
++  ins_encode %{
++    __ memb();
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_acquire_lock() %{
++  match(MemBarAcquireLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-acquire (acquire as part of CAS in prior FastLock so empty encoding) @ membar_acquire_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct unnecessary_membar_release() %{
++  predicate(unnecessary_release(n));
++  match(MemBarRelease);
++  ins_cost(0);
++
++  format %{ "membar_release (elided)" %}
++
++  ins_encode %{
++    __ block_comment("membar_release (elided)");
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release() %{
++  match(MemBarRelease);
++  match(StoreFence);
++  ins_cost(400);//0
++
++  format %{ "MEMBAR-release StoreFence @ membar_release" %}
++
++  ins_encode %{
++//    // Attention: DO NOT DELETE THIS GUY!
++    __ memb();
++  %}
++
++  ins_pipe(pipe_slow);
++%}
++
++instruct membar_release_lock() %{
++  match(MemBarReleaseLock);
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-release-lock (release in FastUnlock so empty) @ membar_release_lock" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct membar_volatile() %{
++  match(MemBarVolatile);
++  ins_cost(400);
++
++  format %{ "MEMBAR-volatile" %}
++  ins_encode %{
++    if( !os::is_MP() ) return;     // Not needed on single CPU
++    __ memb();
++
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct unnecessary_membar_volatile() %{
++  match(MemBarVolatile);
++  predicate(Matcher::post_store_load_barrier(n));
++  ins_cost(0);
++
++  size(0);
++  format %{ "MEMBAR-volatile (unnecessary so empty encoding) @ unnecessary_membar_volatile" %}
++  ins_encode( );
++  ins_pipe(empty);
++%}
++
++instruct membar_storestore() %{
++  match(MemBarStoreStore);
++
++  ins_cost(0);
++  size(4);
++  format %{ "MEMBAR-storestore @ membar_storestore" %}
++  ins_encode %{
++    if (UseWmemb && UseNecessaryMembar) {
++      __ wmemb();
++    } else if (UseNecessaryMembar) {
++      __ memb();
++    }
++  %}
++  ins_pipe(empty);
++%}
++
++//----------Move Instructions--------------------------------------------------
++
++instruct castX2P(rRegP dst, rRegL src)
++%{
++  match(Set dst (CastX2P src));
++
++  format %{ "movl    $dst, $src\t# long->ptr @castX2P" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    if(src != dst)
++    __ movl(dst, src);
++  %}
++  ins_pipe(ialu_regI_mov); 
++%}
++
++instruct castP2X(rRegL dst, rRegP src)
++%{
++  match(Set dst (CastP2X src));
++
++  format %{ "movl    $dst, $src\t# ptr -> long@castP2X" %}
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    if(src != dst);
++    __ movl(dst, src);
++  %}
++  ins_pipe(ialu_regI_mov);
++%}
++
++// Convert oop into int for vectors alignment masking
++instruct convP2I(rRegI dst, rRegP src)
++%{
++  match(Set dst (ConvL2I (CastP2X src)));
++
++  format %{ "movwu    $dst, $src\t# ptr -> int" %}
++  ins_encode %{
++    __ movwu($dst$$Register, $src$$Register); //LSP CHECK?? OK
++  %}
++  ins_pipe(ialu_regI_regI); // XXX
++%}
++
++// Convert compressed oop into int for vectors alignment masking
++// in case of 32bit oops (heap < 4Gb).
++instruct convN2I(rRegI dst, rRegN src)
++%{
++  predicate(Universe::narrow_oop_shift() == 0);
++  match(Set dst (ConvL2I (CastP2X (DecodeN src))));
++
++  format %{ "movwu    $dst, $src\t# compressed ptr -> int" %}
++  ins_encode %{
++    __ movwu($dst$$Register, $src$$Register);//LSP CHECK?? OK
++  %}
++  ins_pipe(ialu_regI_regI); // XXX
++%}
++
++// Convert oop pointer into compressed form
++instruct encodeHeapOop(rRegN dst, rRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() != TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++//  effect(KILL cr);
++  format %{ "encode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++//    __ movl(d, s);
++    __ encode_heap_oop(d, s);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct encodeHeapOop_not_null(rRegN dst, rRegP src) %{
++  predicate(n->bottom_type()->make_ptr()->ptr() == TypePtr::NotNull);
++  match(Set dst (EncodeP src));
++  format %{ "encode_heap_oop_not_null $dst,$src" %}
++  ins_encode %{
++    __ encode_heap_oop_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct decodeHeapOop(rRegP dst, rRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() != TypePtr::NotNull &&
++            n->bottom_type()->is_ptr()->ptr() != TypePtr::Constant);
++  match(Set dst (DecodeN src));
++//  effect(KILL cr);
++  format %{ "decode_heap_oop $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++//      __ movl(d, s);
++      __ decode_heap_oop(d, s);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct decodeHeapOop_not_null(rRegP dst, rRegN src) %{
++  predicate(n->bottom_type()->is_ptr()->ptr() == TypePtr::NotNull ||
++            n->bottom_type()->is_ptr()->ptr() == TypePtr::Constant);
++  match(Set dst (DecodeN src));
++  format %{ "decode_heap_oop_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_heap_oop_not_null(d, s);
++    } else {
++      __ decode_heap_oop_not_null(d);
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct encodeKlass_not_null(rRegN dst, rRegP src) %{
++  match(Set dst (EncodePKlass src));
++//  effect(KILL cr);
++  format %{ "encode_klass_not_null $dst,$src" %}
++  ins_encode %{
++    __ encode_klass_not_null($dst$$Register, $src$$Register);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct decodeKlass_not_null(rRegP dst, rRegN src) %{
++  match(Set dst (DecodeNKlass src));
++//  effect(KILL cr);
++  format %{ "decode_klass_not_null $dst,$src" %}
++  ins_encode %{
++    Register s = $src$$Register;
++    Register d = $dst$$Register;
++    if (s != d) {
++      __ decode_klass_not_null(d, s);
++    } else {
++      __ decode_klass_not_null(d);
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++/*
++
++//----------Conditional Move---------------------------------------------------
++// Jump
++// dummy instruction for generating temp registers
++instruct jumpXtnd_offset(rRegL switch_val, immI2 shift, rRegI dest) %{
++  match(Jump (LShiftL switch_val shift));
++  ins_cost(350);
++  predicate(false);
++  effect(TEMP dest);
++
++  format %{ "leaq    $dest, [$constantaddress]\n\t"
++            "jmp     [$dest + $switch_val << $shift]\n\t" %}
++  ins_encode %{
++    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
++    // to do that and the compiler is using that register as one it can allocate.
++    // So we build it all by hand.
++    // Address index(noreg, switch_reg, (Address::ScaleFactor)$shift$$constant);
++    // ArrayAddress dispatch(table, index);
++    Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant);
++    __ lea($dest$$Register, $constantaddress);
++    __ jmp(dispatch);
++  %}
++  ins_pipe(pipe_jmp);
++%}
++
++instruct jumpXtnd_addr(rRegL switch_val, immI2 shift, immL32 offset, rRegI dest) %{
++  match(Jump (AddL (LShiftL switch_val shift) offset));
++  ins_cost(350);
++  effect(TEMP dest);
++
++  format %{ "leaq    $dest, [$constantaddress]\n\t"
++            "jmp     [$dest + $switch_val << $shift + $offset]\n\t" %}
++  ins_encode %{
++    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
++    // to do that and the compiler is using that register as one it can allocate.
++    // So we build it all by hand.
++    // Address index(noreg, switch_reg, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
++    // ArrayAddress dispatch(table, index);
++    Address dispatch($dest$$Register, $switch_val$$Register, (Address::ScaleFactor) $shift$$constant, (int) $offset$$constant);
++    __ lea($dest$$Register, $constantaddress);
++    __ jmp(dispatch);
++  %}
++  ins_pipe(pipe_jmp);
++%}
++
++instruct jumpXtnd(rRegL switch_val, rRegI dest) %{
++  match(Jump switch_val);
++  ins_cost(350);
++  effect(TEMP dest);
++
++  format %{ "leaq    $dest, [$constantaddress]\n\t"
++            "jmp     [$dest + $switch_val]\n\t" %}
++  ins_encode %{
++    // We could use jump(ArrayAddress) except that the macro assembler needs to use r10
++    // to do that and the compiler is using that register as one it can allocate.
++    // So we build it all by hand.
++    // Address index(noreg, switch_reg, Address::times_1);
++    // ArrayAddress dispatch(table, index);
++    Address dispatch($dest$$Register, $switch_val$$Register, Address::times_1);
++    __ lea($dest$$Register, $constantaddress);
++    __ jmp(dispatch);
++  %}
++  ins_pipe(pipe_jmp);
++%}
++*/
++
++// Conditional move
++instruct cmovI_cmpI_reg_reg(rRegI dst, rRegI src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpI_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpws(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpI_reg_imm(rRegI dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++  ins_cost(80);
++  format %{
++             "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpI_reg_imm"
++         %}
++
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int src = $src$$constant;
++    int     flag = $cop$$cmpcode;
++    __ cmpws(flag, op1, op2, rcc);
++    __ selne(rcc, src, dst, dst);
++     
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovI_cmpL_reg_reg(rRegI dst, rRegI src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpL_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++    __ cmpls(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpL_reg_imm(rRegI dst, immU8 src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpL_reg_imm" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int      src = $src$$constant;
++    int     flag = $cop$$cmpcode;
++    __ cmpls(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpN_reg_reg(rRegI dst, rRegI src, rRegN tmp1, rRegN tmp2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpN_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpN_reg_imm(rRegI dst, immU8 src, rRegN tmp1, rRegN tmp2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpN_reg_imm" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int      src = $src$$constant;
++    int     flag = $cop$$cmpcode;
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpP_reg_reg(rRegI dst, rRegI src, rRegP tmp1, rRegP tmp2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpP_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode; 
++    __ cmplu(flag, op1, op2, rcc);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpP_reg_imm(rRegI dst, immU8 src, rRegP tmp1, rRegP tmp2, cmpOpU cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# unsigned, int @cmovI_cmpP_reg_imm" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int      src = $src$$constant;
++    int     flag = $cop$$cmpcode; 
++    __ cmplu(flag, op1, op2, rcc);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpU_reg_reg(rRegI dst, rRegI src, rRegI tmp1, rRegI tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpU_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpU_reg_imm(rRegI dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveI (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, int @cmovI_cmpU_reg_imm" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int      src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_cmpD_reg_reg(rRegI dst, rRegI src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpD_reg_reg" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovI_cmpD_reg_imm(rRegI dst, immU8 src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpD_reg_imm" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    int      src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovI_cmpF_reg_reg(rRegI dst, rRegI src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpF_reg_reg" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_s(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_s(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_s(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_s(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovI_cmpF_reg_imm(rRegI dst, immU8 src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveI (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovI$cop $dst, $src\t# signed, ptr @cmovI_cmpF_reg_imm" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    int      src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_s(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_s(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_s(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_s(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++/*
++instruct cmovI_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, rRegI src) %{
++  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
++  ins_cost(200);
++  expand %{
++    cmovI_regU(cop, cr, dst, src);
++  %}
++%}
++*/
++/* memory operands no need for SW64
++// Conditional move
++instruct cmovI_mem1(rRegI dst, memory src, rRegI tmp1, rRegI tmp2, cmpOp cop, rFlagsReg cr) %{
++  match(Set dst (CMoveI (Binary cop (CmpI tmp1 tmp2)) (Binary dst (LoadI src))));
++  effect(KILL cr);
++  ins_cost(250); // XXX
++  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldw(rscratch3, src);
++    __ cmpws(flag, op1, op2, cr);
++    __ selne(cr, rscratch3, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_mem2(rRegI dst, memory src, rRegL tmp1, rRegL tmp2, cmpOp cop) %{
++  match(Set dst (CMoveI (Binary cop (CmpL tmp1 tmp2)) (Binary dst (LoadI src))));
++
++  ins_cost(250); // XXX
++  format %{ "cmovl$cop $dst, $src\t# signed, int" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldw(rscratch2_AT, src);
++    __ cmpls(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Conditional move
++instruct cmovI_memU1(rRegI dst, memory src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadI src))));
++
++  ins_cost(250); // XXX
++  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldw(rscratch2_AT, src);
++    __ cmpwu(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovI_memU2(rRegI dst, memory src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveI (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadI src))));
++
++  ins_cost(250); // XXX
++  format %{ "cmovl$cop $dst, $src\t# unsigned, int" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;
++    __ ldw(rscratch2_AT, src);
++    __ cmplu(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}*/
++/*
++instruct cmovI_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegI dst, memory src) %{
++  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
++  ins_cost(250);
++  expand %{
++    cmovI_memU(cop, cr, dst, src);
++  %}
++%}
++*/
++
++// Conditional move
++instruct cmovN_cmpI_reg_reg(rRegN dst, rRegN src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpI_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpws(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpI_reg_imm(rRegN dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpI_reg_imm" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    __ cmpws(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpL_reg_reg(rRegN dst, rRegN src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpL_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpls(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpU_reg_reg(rRegN dst, rRegN src, rRegI tmp1, rRegI tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# signed, compressed ptr @cmovN_cmpU_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// Conditional move
++instruct cmovN_cmpN_reg_reg(rRegN dst, rRegN src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpN_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode; 
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpN_reg_imm(rRegN dst, immU8 src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpN_reg_imm" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int src = $src$$constant;
++    int     flag = $cop$$cmpcode; 
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpP_reg_reg(rRegN dst, rRegN src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpP_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmplu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovN_cmpP_reg_imm(rRegN dst, immU8 src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveN (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovN$cop $dst, $src\t# unsigned, compressed ptr @cmovN_cmpP_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int src = $src$$constant & ((1<<8)-1);
++    int     flag = $cop$$cmpcode;  
++    __ cmplu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++/*
++instruct cmovN_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegN dst, rRegN src) %{
++  match(Set dst (CMoveN (Binary cop cr) (Binary dst src)));
++  ins_cost(200);
++  expand %{
++    cmovN_regU(cop, cr, dst, src);
++  %}
++%}
++*/
++
++// Conditional move
++instruct cmovP_cmpI_reg_reg(rRegP dst, rRegP src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpI_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpws(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovP_cmpU_reg_reg(rRegP dst, rRegP src, rRegI tmp1, rRegI tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpU_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovP_cmpF_reg_reg(rRegP dst, rRegP src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpF_reg_reg" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovs(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovs(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_s(op2, op1);
++      __ fimovs(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_s(op2, op1);
++      __ fimovs(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_s(op2, op1);
++        __ fimovs(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_s(op2, op1);
++        __ fimovs(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovP_cmpD_reg_reg(rRegP dst, rRegP src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveP (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpD_reg_reg" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++
++instruct cmovP_cmpL_reg_reg(rRegP dst, rRegP src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovP$cop $dst, $src\t# signed, ptr @cmovP_cmpL_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpls(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++// Conditional move
++instruct cmovP_cmpN_reg_reg(rRegP dst, rRegP src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovP$cop $dst, $src\t# unsigned, ptr @cmovP_cmpN_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++instruct cmovP_cmpP_reg_reg(rRegP dst, rRegP src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovP$cop $dst, $src\t# unsigned, ptr @cmovP_cmpP_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmplu(flag, op1, op2 );
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++/*
++instruct cmovP_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegP dst, rRegP src) %{
++  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
++  ins_cost(200);
++  expand %{
++    cmovP_regU(cop, cr, dst, src);
++  %}
++%}
++
++// DISABLED: Requires the ADLC to emit a bottom_type call that
++// correctly meets the two pointer arguments; one is an incoming
++// register but the other is a memory operand.  ALSO appears to
++// be buggy with implicit null checks.
++//*/
++
++/* memory operands no need for SW64
++// Conditional move
++instruct cmovP_mem1(rRegP dst, memory src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpI tmp1 tmp2)) (Binary dst (LoadP src))));
++  ins_cost(250);
++  format %{ "CMOV$cop $dst,$src\t# ptr" %}
++  opcode(0x0F,0x40);
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmpws(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_mem2(rRegP dst, memory src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpL tmp1 tmp2)) (Binary dst (LoadP src))));
++  ins_cost(250);
++  format %{ "CMOV$cop $dst,$src\t# ptr" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmpls(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Conditional move
++instruct cmovP_memU1(rRegP dst, memory src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadP src))));
++  ins_cost(250);
++  format %{ "CMOV$cop $dst,$src\t# ptr" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmpwu(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmovP_memU2(rRegP dst, memory src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveP (Binary cop (CmpP tmp1 tmp2)) (Binary dst (LoadP src))));
++  ins_cost(250);
++  format %{ "CMOV$cop $dst,$src\t# ptr" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmplu(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++*/
++
++instruct cmovL_cmpI_reg_reg(rRegL dst, rRegL src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# signed, long @cmovL_cmpI_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpws(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_cmpI_reg_imm(rRegL dst, immU8 src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# signed, long @cmovL_cmpI_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    __ cmpws(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_cmpL_reg_reg(rRegL dst, rRegL src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# signed, long@cmovL_cmpL_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpls(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_cmpL_reg_imm(rRegL dst, immU8 src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# signed, long@cmovL_cmpL_reg_imm" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    __ cmpls(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_cmpF_reg_reg(rRegL dst, rRegL src, regF tmp1, regF tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# signed, ptr @cmovL_cmpF_reg_reg" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovs(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovs(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_s(op2, op1);
++      __ fimovs(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_s(op2, op1);
++      __ fimovs(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_s(op2, op1);
++        __ fimovs(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_s(op2, op1);
++        __ fimovs(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_cmpD_reg_reg(rRegL dst, rRegL src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# signed, ptr @cmovL_cmpD_reg_reg" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_cmpD_reg_imm(rRegL dst, immU8 src, regD tmp1, regD tmp2, cmpOp cop ) %{
++  match(Set dst (CMoveL (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# signed, ptr @cmovL_cmpD_reg_imm" %}
++  ins_encode%{
++    FloatRegister op1 = $tmp1$$FloatRegister;
++    FloatRegister op2 = $tmp2$$FloatRegister;
++    Register dst = $dst$$Register;
++    int src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fimovd(FcmpRES, rcc);
++      __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_d(op2, op1);
++      __ fimovd(FcmpRES, rcc);
++      __ selne(rcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_d(op2, op1);
++        __ fimovd(FcmpRES, rcc);
++        __ seleq(rcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_cmpU_reg_reg(rRegL dst, rRegL src, rRegI tmp1, rRegI tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpU tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpU_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++instruct cmovL_cmpN_reg_reg(rRegL dst, rRegL src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpN_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++instruct cmovL_cmpN_reg_imm(rRegL dst, immU8 src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpN_reg_imm" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    int src = $src$$constant;
++    int     flag = $cop$$cmpcode;  
++    __ cmpwu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++instruct cmovL_cmpP_reg_reg(rRegL dst, rRegL src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovL$cop $dst, $src\t# unsigned, long @cmovL_cmpP_reg_reg" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int     flag = $cop$$cmpcode;  
++    __ cmplu(flag, op1, op2);
++    __ selne(rcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++/*
++instruct cmovL_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, rRegL src) %{
++  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
++  ins_cost(200);
++  expand %{
++    cmovL_regU(cop, cr, dst, src);
++  %}
++%}
++*/
++/* memory operands no need for SW64
++instruct cmovL_mem1(rRegL dst, memory src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpI tmp1 tmp2)) (Binary dst (LoadL src))));
++
++  ins_cost(200); // XXX
++  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmpws(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_mem2(rRegL dst, memory src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpL tmp1 tmp2)) (Binary dst (LoadL src))));
++
++  ins_cost(200); // XXX
++  format %{ "cmovq$cop $dst, $src\t# signed, long" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmpls(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe(pipe_slow);  // XXX
++%}
++
++instruct cmovL_memU1(rRegL dst, memory src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpN tmp1 tmp2)) (Binary dst (LoadL src))));
++
++  ins_cost(200); // XXX
++  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmpwu(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++instruct cmovL_memU2(rRegL dst, memory src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveL (Binary cop (CmpP tmp1 tmp2)) (Binary dst (LoadL src))));
++
++  ins_cost(200); // XXX
++  format %{ "cmovq$cop $dst, $src\t# unsigned, long" %}
++  ins_encode%{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    Register dst = $dst$$Register;
++    Address  src = $src$$Address;
++    int     flag = $cop$$cmpcode;  
++    __ ldl(rscratch2_AT, src);
++    __ cmplu(flag, op1, op2, rscratch1_GP);
++    __ selne(rscratch1_GP, rscratch2_AT, dst, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}*/
++/*
++instruct cmovL_memUCF(cmpOpUCF cop, rFlagsRegUCF cr, rRegL dst, memory src) %{
++  match(Set dst (CMoveL (Binary cop cr) (Binary dst (LoadL src))));
++  ins_cost(200);
++  expand %{
++    cmovL_memU(cop, cr, dst, src);
++  %}
++%}
++*/
++instruct cmovF_cmpI_reg_reg(regF dst, regF src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveF (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovF$cop $dst, $src\t# signed, float @cmovF_cmpI_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);    
++    int     flag = $cop$$cmpcode;
++    __ cmpws(flag, op1, op2);
++    __ ifmovd(rcc, fcc);
++    __ fselne(fcc, src, dst, dst);
++    
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovF_cmpL_reg_reg(regF dst, regF src, rRegL tmp1, rRegL tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveF (Binary cop (CmpL tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpL_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    __ cmpls(flag, op1, op2);
++    __ ifmovd(rcc, fcc);
++    __ fselne(fcc, src, dst, dst);
++    
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovF_cmpF_reg_reg(regF dst, regF src, regF tmp1, regF tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveF (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpF_reg_reg" %}
++  ins_encode %{
++    FloatRegister op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fseleq(fcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_s(op2, op1);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_s(op2, op1);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_s(op2, op1);
++        __ fseleq(fcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_s(op2, op1);
++        __ fseleq(fcc, src, dst, dst);
++      break;
++  }
++    
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++ //no in sw8 ?? TODO djx
++instruct cmovF_cmpU_reg_reg(regF dst, regF src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveF (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpU_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    __ cmpwu(flag, op1, op2);
++    __ ifmovd(rcc, fcc);
++    __ fselne(fcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++ //no in sw8 ?? TODO djx
++instruct cmovF_cmpP_reg_reg(regF dst, regF src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveF (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovF$cop $dst, $src\t# @cmovF_cmpP_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    __ cmplu(flag, op1, op2);
++    __ ifmovd(rcc, fcc);
++    __ fselne(fcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++/*
++instruct cmovF_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regF dst, regF src) %{
++  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
++  ins_cost(200);
++  expand %{
++    cmovF_regU(cop, cr, dst, src);
++  %}
++%}
++*/
++
++instruct cmovD_cmpI_reg_reg(regD dst, regD src, rRegI tmp1, rRegI tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveD (Binary cop (CmpI tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpI_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    __ cmpws(flag, op1, op2);
++    __ ifmovd(rcc, fcc);
++    __ fselne(fcc, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovD_cmpD_reg_reg(regD dst, regD src, regD tmp1, regD tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveD (Binary cop (CmpD tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpD_reg_reg" %}
++  ins_encode %{
++    FloatRegister op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fseleq(fcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_d(op2, op1);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_d(op2, op1);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_d(op2, op1);
++        __ fseleq(fcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_d(op2, op1);
++        __ fseleq(fcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovD_cmpF_reg_reg(regD dst, regD src, regF tmp1, regF tmp2, cmpOp cop)
++%{
++  match(Set dst (CMoveD (Binary cop (CmpF tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpF_reg_reg" %}
++  ins_encode %{
++    FloatRegister op1 = as_FloatRegister($tmp1$$reg);
++    FloatRegister op2 = as_FloatRegister($tmp2$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ fseleq(fcc, src, dst, dst);
++      break;
++    case Assembler::greater:
++      __ c_olt_s(op2, op1);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_s(op2, op1);
++      __ fselne(fcc, src, dst, dst);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_s(op2, op1);
++        __ fseleq(fcc, src, dst, dst);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_s(op2, op1);
++        __ fseleq(fcc, src, dst, dst);
++      break;
++  }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovD_cmpN_reg_reg(regD dst, regD src, rRegN tmp1, rRegN tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveD (Binary cop (CmpN tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpN_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    __ cmpwu(flag, op1, op2);
++    __ ifmovd(rcc, FcmpRES);
++    __ fselne(FcmpRES, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmovD_cmpP_reg_reg(regD dst, regD src, rRegP tmp1, rRegP tmp2, cmpOpU cop)
++%{
++  match(Set dst (CMoveD (Binary cop (CmpP tmp1 tmp2)) (Binary dst src)));
++
++  ins_cost(200); // XXX
++  format %{ "cmovD$cop $dst, $src\t# @cmovD_cmpP_reg_reg" %}
++  ins_encode %{
++    Register op1 = $tmp1$$Register;
++    Register op2 = $tmp2$$Register;
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    int     flag = $cop$$cmpcode;
++    __ cmplu(flag, op1, op2);
++    __ ifmovd(rcc, FcmpRES);
++    __ fselne(FcmpRES, src, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++/*
++instruct cmovD_regUCF(cmpOpUCF cop, rFlagsRegUCF cr, regD dst, regD src) %{
++  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
++  ins_cost(200);
++  expand %{
++    cmovD_regU(cop, cr, dst, src);
++  %}
++%}
++*/
++//----------Arithmetic Instructions--------------------------------------------
++//----------Addition Instructions----------------------------------------------
++
++instruct addI_rReg(rRegI dst, rRegI src1, rRegI src2)
++%{
++  match(Set dst (AddI src1 src2));
++
++  format %{ "addw   $src1, $src2, $dst\t# int @addI_rReg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ addw(src1, src2, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct addI_rReg_imm(rRegI dst, rRegI src1, immU8 src2)
++%{
++  match(Set dst (AddI src1 src2));
++  ins_cost(80);
++  format %{ "addw    $src1, $src2, $dst\t# int @addI_rReg_imm" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++       __ addw(src1, imm, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*memory operands no need for SW64
++instruct addI_rReg_mem(rRegI dst, rRegI src1, memory src2)
++%{
++  match(Set dst (AddI src1 (LoadI src2)));
++
++  ins_cost(150); // XXX
++  format %{ "ldw    $dst, $src2\t# int @addI_rReg_mem\n\t"
++            "addw   $src1, $dst, $dst"%}
++  ins_encode %{
++    __ ldw($dst$$Register, $src2$$Address);
++    __ addw($src1$$Register, rscratch2_AT, $dst$$Register);
++  %}
++//  ins_pipe( ialu_reg_mem );
++%}
++
++instruct addI_mem_rReg(memory dst, rRegI src)
++%{
++  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
++
++  ins_cost(150); // XXX
++  format %{ "ldw    rscratch1_GP, $dst\t# int @addI_mem_rReg\n\t"
++            "addw   rscratch1_GP, $src, rscratch1_GP\n\t"
++            "stw    rscratch1_GP, $dst"%}
++  ins_encode %{
++    __ ldw(rscratch1_GP, $dst$$Address);
++    __ addw(rscratch1_GP, $src$$Register, rscratch1_GP);
++    __ stw(rscratch1_GP, $dst$$Address);
++  %}
++//  ins_pipe(ialu_mem_reg);
++%}
++
++instruct addI_mem_imm(memory dst, immI src)
++%{
++  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ "addw    $dst, $src\t# int @addI_mem_imm" %}
++  ins_encode %{
++    int imm = $src$$constant;
++    
++    __ ldw(rscratch2_AT, $dst$$Address);
++    if(MacroAssembler::is_uimm8(imm)) {
++       __ addw(rscratch2_AT, imm, rscratch2_AT);
++    } else {
++       __ mov_immediate32(rscratch1_GP, imm);
++       __ addw(rscratch2_AT, rscratch1_GP, rscratch2_AT);
++    }
++    __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_mem_imm);
++%}*/
++
++instruct incI_rReg(rRegI dst, rRegI src1, immI1 src2)
++%{
++  match(Set dst (AddI src1 src2));
++  ins_cost(60);
++  format %{ "addw    $src1, #1, $dst\t# int @incI_rReg" %}
++  ins_encode %{
++     __ addw($src1$$Register, 1, $dst$$Register);
++  %}
++  ins_pipe(ialu_regI_imm16);
++%}
++
++/*memory operands no need for SW64
++instruct incI_mem(memory dst, immI1 src)
++%{
++  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ "ldw    rscratch2_AT, $dst\t# int @incI_mem\n\t"
++            "addw   rscratch2_AT, #1, rscratch2_AT\n\t"
++            "stw    rscratch2_AT, $dst"%}
++  ins_encode %{
++    __ ldw(rscratch2_AT, $dst$$Address);
++    __ addw(rscratch2_AT, 1, rscratch2_AT);
++    __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_mem_imm);
++%}*/
++
++// XXX why does that use AddI
++instruct decI_rReg(rRegI dst, rRegI src1, immI_M1 src2)
++%{
++  match(Set dst (AddI src1 src2));
++  ins_cost(60);
++  format %{ "subw    $src1, #1, $dst\t# int @decI_rReg" %}
++  ins_encode %{
++     __ subw($src1$$Register, 1, $dst$$Register);
++  %}
++  ins_pipe(ialu_regI_imm16);
++%}
++
++/*memory operands no need for SW64
++// XXX why does that use AddI
++instruct decI_mem(memory dst, immI_M1 src)
++%{
++  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ "ldw    rscratch2_AT, $dst\t# int @decI_mem\n\t"
++            "subw   rscratch2_AT, #1, rscratch2_AT\n\t"
++            "stw    rscratch2_AT, $dst"%}
++  ins_encode %{
++    __ ldw(rscratch2_AT, $dst$$Address);
++    __ subw(rscratch2_AT, 1, rscratch2_AT);
++    __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_mem_imm);
++%}*/
++
++// the same as addI_rReg_imm
++//instruct leaI_rReg_immI(rRegI dst, rRegI src0, immI src1)
++//%{
++//  match(Set dst (AddI src0 src1));
++//
++//  ins_cost(110);
++//  format %{ "addw    $src0, $src1, $dst\t# int @leaI_rReg_immI" %}
++//  ins_encode %{
++//    Register  dst = $dst$$Register;
++//    Register src = $src0$$Register;
++//    int       imm = $src1$$constant;
++//
++//    if(MacroAssembler::is_uimm8(imm)) {
++//       __ addw(src, imm, dst);
++//    } else {
++//       __ mov_immediate32(rscratch2_AT, imm);
++//       __ addw(src, rscratch2_AT, dst);
++//    }
++//  %}
++//  ins_pipe(ialu_regL_imm16);
++//%}
++
++instruct addL_rReg(rRegL dst, rRegL src1, rRegL src2)
++%{
++  match(Set dst (AddL src1 src2));
++
++  format %{ "addl    $src1, $src2, $dst\t# long @addL_rReg" %}
++  ins_encode %{
++    __ addl($src1$$Register, $src2$$Register, $dst$$Register);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct addL_rReg_imm(rRegL dst, rRegL src1, immU8 src2)
++%{
++  match(Set dst (AddL src1 src2));
++  ins_cost(80);
++  format %{ "addptr    $src1, $src2, $dst\t# long @addL_rReg_imm" %}
++  ins_encode %{
++    __ addl($src1$$Register, (int)$src2$$constant, $dst$$Register);
++  %}
++  ins_pipe( ialu_regL_imm );
++%}
++
++/*memory operands no need for SW64
++instruct addL_rReg_mem(rRegL dst, rRegL src1, memory src2)
++%{
++  match(Set dst (AddL src1 (LoadL src2)));
++
++  ins_cost(125); // XXX
++  format %{ "ldl    $dst, $src2\t# long @addL_rReg_mem\n\t"
++            "addl   src1, $dst, $dst"%}
++  ins_encode %{
++    __ ldl($dst$$Register, $src2$$Address);
++    __ addl($src1$$Register, $dst$$Register, $dst$$Register);
++  %}
++  //ins_pipe(ialu_reg_mem);
++%}
++
++instruct addL_mem_rReg(memory dst, rRegL src)
++%{
++  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
++
++  ins_cost(150); // XXX
++  format %{ "ldl    rscratch2_AT, $dst\t# long @addL_mem_rReg\n\t"
++            "addl   rscratch2_AT, $src, rscratch2_AT\n\t"
++            "stl    rscratch2_AT, $dst"%}
++  ins_encode %{
++    __ ldl(rscratch2_AT, $dst$$Address);
++    __ addl(rscratch2_AT, $src$$Register, rscratch2_AT);
++    __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_mem_reg);
++%}
++
++instruct addL_mem_imm(memory dst, immL32 src)
++%{
++  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ "ldl    rscratch2_AT, $dst\t# long @addL_mem_imm\n\t"
++            "addptr rscratch2_AT, $src, rscratch2_AT\n\t"
++            "stl    rscratch2_AT, $dst"%}
++  ins_encode %{  
++    __ ldl(rscratch2_AT, $dst$$Address);
++    __ addptr(rscratch2_AT, (int)$src$$constant, rscratch2_AT);
++    __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++instruct incL_rReg(rRegL dst, rRegL src1, immL1 src2)
++%{
++  match(Set dst (AddL src1 src2));
++  ins_cost(40);
++  format %{ "addl    $src1, #1, $dst\t# int @incL_rReg" %}
++  ins_encode %{
++     __ addl($src1$$Register, 1, $dst$$Register);
++  %}
++  ins_pipe(ialu_regL_imm);
++%}
++
++/*memory operands no need for SW64
++instruct incL_mem(memory dst, immL1 src)
++%{
++  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ "ldl    rscratch2_AT, $dst\t# long @incL_mem\n\t"
++            "addl   rscratch2_AT, #1, rscratch2_AT\n\t"
++            "stl    rscratch2_AT, $dst"%}
++  ins_encode %{
++    __ ldl(rscratch2_AT, $dst$$Address);
++    __ addl(rscratch2_AT, 1, rscratch2_AT);
++    __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_mem_imm);
++%}*/
++
++// XXX why does that use AddL
++instruct decL_rReg(rRegL dst, rRegL src1, immL_M1 src2)
++%{
++  match(Set dst (AddL src1 src2));
++  ins_cost(60);
++  format %{ "subl    $src1, #1, $dst\t# int @decL_rReg" %}
++  ins_encode %{
++     __ subl($src1$$Register, 1, $dst$$Register);
++  %}
++  ins_pipe(ialu_regL_imm);
++%}
++
++/*memory operands no need for SW64
++// XXX why does that use AddL
++instruct decL_mem(memory dst, immL_M1 src)
++%{
++  match(Set dst (StoreL dst (AddL (LoadL dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ "ldl    rscratch2_AT, $dst\t# int @decL_mem\n\t"
++            "subl   rscratch2_AT, #1, rscratch2_AT\n\t"
++            "stl    rscratch2_AT, $dst"%}
++  ins_encode %{
++    __ ldl(rscratch2_AT, $dst$$Address);
++    __ subl(rscratch2_AT, 1, rscratch2_AT);
++    __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++//the same as addL_rReg_imm
++//instruct leaL_rReg_immL(rRegL dst, rRegL src0, immL32 src1)
++//%{
++//  match(Set dst (AddL src0 src1));
++//
++//  ins_cost(110);
++//  format %{ "addptr    $src0, $src1, $dst\t# long @leaL_rReg_immL" %}
++//  ins_encode %{
++//    __ addptr($src0$$Register, (int)$src1$$constant, $dst$$Register);
++//  %}
++//  ins_pipe(ialu_regL_regL);
++//%}
++
++instruct addP_rReg(rRegP dst, rRegP src1, rRegP src2)
++%{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "addl    $src1, $src2, $dst\t# ptr @addP_rReg" %}
++  ins_encode %{
++    __ addl($src1$$Register, $src2$$Register, $dst$$Register);
++  %}
++  ins_pipe(ialu_regL_regL); //in 8 this is ialu_regI_regI?? TODO djx
++%}
++
++instruct addP_reg_reg(rRegP dst, rRegP src1, rRegL src2) %{
++  match(Set dst (AddP src1 src2));
++
++  format %{ "addl  $src1, $src2, $dst #@addP_reg_reg" %}
++  ins_encode %{
++    __ addl($src1$$Register, $src2$$Register, $dst$$Register);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct addP_rReg_imm(rRegP dst, rRegP src1, immUL8 src2)
++%{
++  match(Set dst (AddP src1 src2));
++  ins_cost(40);
++  format %{ "addptr    $src1, $src2, $dst\t# long @addP_rReg_imm" %}
++  ins_encode %{
++    __ addl($src1$$Register, (int)$src2$$constant, $dst$$Register);
++  %}
++  ins_pipe( ialu_regL_imm );
++%}
++
++//the same as addP_rReg_imm
++// XXX addP mem ops ????
++
++//instruct leaP_rReg_imm(rRegP dst, rRegP src0, immL32 src1)
++//%{
++//  match(Set dst (AddP src0 src1));
++//
++//  ins_cost(110);
++//  format %{ "addptr    $src0, $src1, $dst\t# long @leaP_rReg_imm" %}
++//  ins_encode %{
++//    __ addptr($src0$$Register, (int)$src1$$constant, $dst$$Register);
++//  %}
++////  ins_pipe(ialu_reg_reg);
++//%}
++
++instruct checkCastPP(rRegP dst)
++%{
++  match(Set dst (CheckCastPP dst));
++
++  size(0); //?? TODO djx
++  format %{ "#checkcastPP of $dst (empty encoding)\t# @chekCastPP" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe( empty );
++%}
++
++instruct castPP(rRegP dst)
++%{
++  match(Set dst (CastPP dst));
++
++  size(0);
++  format %{ "#castPP of $dst (empty encoding)\t# @castPP" %}
++  ins_encode(/* empty encoding */);
++  ins_pipe(empty);
++%}
++
++instruct castII(rRegI dst)
++%{
++  match(Set dst (CastII dst));
++
++  size(0);
++  format %{ "#castII of $dst (empty encoding)\t# @castII" %}
++  ins_encode( /*empty encoding*/ );
++  ins_cost(0);
++  ins_pipe( empty );
++%}
++
++
++// LoadP-locked same as a regular LoadP when used with compare-swap
++instruct loadPLocked(rRegP dst, memory mem)
++%{
++  match(Set dst (LoadPLocked mem));
++
++  ins_cost(125);
++  format %{ "ldptr    $dst, $mem #@loadPLocked" %}
++  ins_encode (load_P_enc(dst, mem));
++  ins_pipe( ialu_reg_mem );
++%}
++/*
++// Conditional-store of the updated heap-top.
++// Used during allocation of the shared heap.
++// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
++
++instruct storePConditional(memory heap_top_ptr,
++                           rax_RegP oldval, rRegP newval,
++                           rFlagsReg cr)
++%{
++  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
++
++  format %{ "cmpxchgq $heap_top_ptr, $newval\t# (ptr) "
++            "If rax == $heap_top_ptr then store $newval into $heap_top_ptr" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem_wide(newval, heap_top_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, heap_top_ptr));
++  ins_pipe(pipe_cmpxchg);
++%}
++
++// Conditional-store of an int value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeIConditional(memory mem, rax_RegI oldval, rRegI newval, rFlagsReg cr)
++%{
++  match(Set cr (StoreIConditional mem (Binary oldval newval)));
++  effect(KILL oldval);
++
++  format %{ "cmpxchgl $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem(newval, mem),
++             OpcP, OpcS,
++             reg_mem(newval, mem));
++  ins_pipe(pipe_cmpxchg);
++%}
++
++// Conditional-store of a long value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++instruct storeLConditional(memory mem, rax_RegL oldval, rRegL newval, rFlagsReg cr)
++%{
++  match(Set cr (StoreLConditional mem (Binary oldval newval)));
++  effect(KILL oldval);
++
++  format %{ "cmpxchgq $mem, $newval\t# If rax == $mem then store $newval into $mem" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem_wide(newval, mem),
++             OpcP, OpcS,
++             reg_mem(newval, mem));
++  ins_pipe(pipe_cmpxchg);
++%}
++
++
++// XXX No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
++instruct compareAndSwapP(rRegI res,
++                         memory mem_ptr,
++                         rax_RegP oldval, rRegP newval,
++                         rFlagsReg cr)
++%{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++
++  format %{ "cmpxchgq $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
++            "sete    $res\n\t"
++            "movzbl  $res, $res" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem_wide(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr),
++             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
++             REX_reg_breg(res, res), // movzbl
++             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndSwapL(rRegI res,
++                         memory mem_ptr,
++                         rax_RegL oldval, rRegL newval,
++                         rFlagsReg cr)
++%{
++  predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++
++  format %{ "cmpxchgq $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
++            "sete    $res\n\t"
++            "movzbl  $res, $res" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem_wide(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr),
++             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
++             REX_reg_breg(res, res), // movzbl
++             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndSwapI(rRegI res,
++                         memory mem_ptr,
++                         rax_RegI oldval, rRegI newval,
++                         rFlagsReg cr)
++%{
++  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++
++  format %{ "cmpxchgl $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
++            "sete    $res\n\t"
++            "movzbl  $res, $res" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr),
++             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
++             REX_reg_breg(res, res), // movzbl
++             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndSwapB(rRegI res,
++                         memory mem_ptr,
++                         rax_RegI oldval, rRegI newval,
++                         rFlagsReg cr)
++%{
++  match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++
++  format %{ "cmpxchgb $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
++            "sete    $res\n\t"
++            "movzbl  $res, $res" %}
++  opcode(0x0F, 0xB0);
++  ins_encode(lock_prefix,
++             REX_breg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr),
++             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
++             REX_reg_breg(res, res), // movzbl
++             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndSwapS(rRegI res,
++                         memory mem_ptr,
++                         rax_RegI oldval, rRegI newval,
++                         rFlagsReg cr)
++%{
++  match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++
++  format %{ "cmpxchgw $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
++            "sete    $res\n\t"
++            "movzbl  $res, $res" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             SizePrefix,
++             REX_reg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr),
++             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
++             REX_reg_breg(res, res), // movzbl
++             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndSwapN(rRegI res,
++                          memory mem_ptr,
++                          rax_RegN oldval, rRegN newval,
++                          rFlagsReg cr) %{
++  match(Set res (CompareAndSwapN mem_ptr (Binary oldval newval)));
++  match(Set res (WeakCompareAndSwapN mem_ptr (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++
++  format %{ "cmpxchgl $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"
++            "sete    $res\n\t"
++            "movzbl  $res, $res" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr),
++             REX_breg(res), Opcode(0x0F), Opcode(0x94), reg(res), // sete
++             REX_reg_breg(res, res), // movzbl
++             Opcode(0xF), Opcode(0xB6), reg_reg(res, res));
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndExchangeB(
++                         memory mem_ptr,
++                         rax_RegI oldval, rRegI newval,
++                         rFlagsReg cr)
++%{
++  match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
++  effect(KILL cr);
++
++  format %{ "cmpxchgb $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
++  opcode(0x0F, 0xB0);
++  ins_encode(lock_prefix,
++             REX_breg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr) // lock cmpxchg
++             );
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndExchangeS(
++                         memory mem_ptr,
++                         rax_RegI oldval, rRegI newval,
++                         rFlagsReg cr)
++%{
++  match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
++  effect(KILL cr);
++
++  format %{ "cmpxchgw $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             SizePrefix,
++             REX_reg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr) // lock cmpxchg
++             );
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndExchangeI(
++                         memory mem_ptr,
++                         rax_RegI oldval, rRegI newval,
++                         rFlagsReg cr)
++%{
++  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
++  effect(KILL cr);
++
++  format %{ "cmpxchgl $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr) // lock cmpxchg
++             );
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndExchangeL(
++                         memory mem_ptr,
++                         rax_RegL oldval, rRegL newval,
++                         rFlagsReg cr)
++%{
++  predicate(VM_Version::supports_cx8());
++  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
++  effect(KILL cr);
++
++  format %{ "cmpxchgq $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t"  %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem_wide(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr)  // lock cmpxchg
++            );
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndExchangeN(
++                          memory mem_ptr,
++                          rax_RegN oldval, rRegN newval,
++                          rFlagsReg cr) %{
++  match(Set oldval (CompareAndExchangeN mem_ptr (Binary oldval newval)));
++  effect(KILL cr);
++
++  format %{ "cmpxchgl $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr)  // lock cmpxchg
++          );
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct compareAndExchangeP(
++                         memory mem_ptr,
++                         rax_RegP oldval, rRegP newval,
++                         rFlagsReg cr)
++%{
++  predicate(VM_Version::supports_cx8());
++  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
++  effect(KILL cr);
++
++  format %{ "cmpxchgq $mem_ptr,$newval\t# "
++            "If rax == $mem_ptr then store $newval into $mem_ptr\n\t" %}
++  opcode(0x0F, 0xB1);
++  ins_encode(lock_prefix,
++             REX_reg_mem_wide(newval, mem_ptr),
++             OpcP, OpcS,
++             reg_mem(newval, mem_ptr)  // lock cmpxchg
++          );
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddB_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddB mem add));
++  effect(KILL cr);
++  format %{ "ADDB  [$mem],$add" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ addb($mem$$Address, $add$$constant);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddB( memory mem, rRegI newval, rFlagsReg cr) %{
++  match(Set newval (GetAndAddB mem newval));
++  effect(KILL cr);
++  format %{ "XADDB  [$mem],$newval" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ xaddb($mem$$Address, $newval$$Register);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddS_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddS mem add));
++  effect(KILL cr);
++  format %{ "ADDW  [$mem],$add" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ addw($mem$$Address, $add$$constant);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddS( memory mem, rRegI newval, rFlagsReg cr) %{
++  match(Set newval (GetAndAddS mem newval));
++  effect(KILL cr);
++  format %{ "XADDW  [$mem],$newval" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ xaddw($mem$$Address, $newval$$Register);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddI_no_res( memory mem, Universe dummy, immI add, rFlagsReg cr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddI mem add));
++  effect(KILL cr);
++  format %{ "ADDL  [$mem],$add" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ addl($mem$$Address, $add$$constant);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddI( memory mem, rRegI newval, rFlagsReg cr) %{
++  match(Set newval (GetAndAddI mem newval));
++  effect(KILL cr);
++  format %{ "XADDL  [$mem],$newval" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ xaddl($mem$$Address, $newval$$Register);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddL_no_res( memory mem, Universe dummy, immL32 add, rFlagsReg cr) %{
++  predicate(n->as_LoadStore()->result_not_used());
++  match(Set dummy (GetAndAddL mem add));
++  effect(KILL cr);
++  format %{ "ADDQ  [$mem],$add" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ addq($mem$$Address, $add$$constant);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xaddL( memory mem, rRegL newval, rFlagsReg cr) %{
++  match(Set newval (GetAndAddL mem newval));
++  effect(KILL cr);
++  format %{ "XADDQ  [$mem],$newval" %}
++  ins_encode %{
++    if (os::is_MP()) { __ lock(); }
++    __ xaddq($mem$$Address, $newval$$Register);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xchgB( memory mem, rRegI newval) %{
++  match(Set newval (GetAndSetB mem newval));
++  format %{ "XCHGB  $newval,[$mem]" %}
++  ins_encode %{
++    __ xchgb($newval$$Register, $mem$$Address);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xchgS( memory mem, rRegI newval) %{
++  match(Set newval (GetAndSetS mem newval));
++  format %{ "XCHGW  $newval,[$mem]" %}
++  ins_encode %{
++    __ xchgw($newval$$Register, $mem$$Address);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xchgI( memory mem, rRegI newval) %{
++  match(Set newval (GetAndSetI mem newval));
++  format %{ "XCHGL  $newval,[$mem]" %}
++  ins_encode %{
++    __ xchgl($newval$$Register, $mem$$Address);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xchgL( memory mem, rRegL newval) %{
++  match(Set newval (GetAndSetL mem newval));
++  format %{ "XCHGL  $newval,[$mem]" %}
++  ins_encode %{
++    __ xchgq($newval$$Register, $mem$$Address);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xchgP( memory mem, rRegP newval) %{
++  match(Set newval (GetAndSetP mem newval));
++  format %{ "XCHGQ  $newval,[$mem]" %}
++  ins_encode %{
++    __ xchgq($newval$$Register, $mem$$Address);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++
++instruct xchgN( memory mem, rRegN newval) %{
++  match(Set newval (GetAndSetN mem newval));
++  format %{ "XCHGL  $newval,$mem]" %}
++  ins_encode %{
++    __ xchgl($newval$$Register, $mem$$Address);
++  %}
++  ins_pipe( pipe_cmpxchg );
++%}
++*/
++
++// lsp check the T11 register? replace to t12??
++instruct partialSubtypeCheck( rRegP result, no_T11_rRegP sub, no_T11_rRegP super) %{
++  match(Set result (PartialSubtypeCheck sub super));
++  //effect(KILL tmp);
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "partialSubtypeCheck result=$result, sub=$sub, super=$super, tmp=rscratch3 " %}
++
++  ins_encode( enc_PartialSubtypeCheck(result, sub, super) );
++  ins_pipe( pipe_slow );
++%}
++
++instruct storePConditional(indirect mem, v0_RegP oldval, rRegP newval, rFlagsReg cr) %{
++  match(Set cr(StorePConditional mem(Binary oldval newval)));
++  effect(KILL oldval);
++  //size(56);
++  format %{ "StorePConditional  cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr = $cr$$Register;
++    
++    __ storeLcon(oldval, $mem$$Address, newval);
++    //__ movl(cr, AT);
++
++    %}
++  ins_pipe(long_memory_op);
++%}
++
++// Conditional-store of an int value.
++// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
++instruct storeIConditional(indirect mem, v0_RegI oldval, rRegI newval, rFlagsReg cr) %{
++  match(Set cr(StoreIConditional mem(Binary oldval newval)));
++  effect(KILL oldval);
++  format %{ "CMPXCHG32  $newval, $mem, $oldval \t# @storeIConditional" %}
++
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr = $cr$$Register;
++    guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding storeIConditional");
++    __ storeIcon(oldval, $mem$$Address, newval);
++    //__ movl(cr, AT);
++  %}
++      ins_pipe(long_memory_op);
++  %}
++  
++
++    // Conditional-store of a long value.
++    // ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG.
++
++instruct storeLConditional(indirect mem, v0_RegL oldval, rRegL newval, rFlagsReg cr) %{
++  match(Set cr(StoreLConditional mem (Binary oldval newval)));
++  effect(KILL oldval);//TODO:kill oldval? jzy
++  //size(56);
++  format %{ "StoreLConditional cmpxchg $mem, $newval\t# If $oldval == $mem then store $newval into $mem" %}
++  ins_encode %{
++    Register oldval = $oldval$$Register;
++    Register newval = $newval$$Register;
++    Register cr = $cr$$Register;
++    guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding storeLConditional");
++
++    __ storeLcon(oldval, $mem$$Address, newval);
++
++  %}
++  ins_pipe(long_memory_op);
++  %}
++
++//FIXME:
++instruct compareAndSwapP( rRegI res, indirect mem, v0_RegP oldval, rRegP newval, rFlagsReg cr) %{
++  match(Set res (CompareAndSwapP mem (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++  //size(60);
++  format %{
++    "CMPXCHG $newval, $mem, $oldval @ compareAndSwapP\n\t"
++    "If $oldval == $mem then store $newval into $mem\n\t"
++    "sete    $res "
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++//    Address  addr($mem_ptr$$Register, 0);
++    guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapP");
++    //SizedScope sc(&_masm, 100);
++    __ cmpxchg(newval, $mem$$Address, oldval);
++    __ seleq(rcc, 1, R0, res);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++
++instruct compareAndSwapL( rRegI res, indirect mem, v0_RegL oldval, rRegL newval, rFlagsReg cr) %{
++  //predicate(VM_Version::supports_cx8());
++  match(Set res (CompareAndSwapL mem (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++//  effect(TEMP tmpt10, USE_KILL oldval);
++  //size(60);  //TODO: ZHJ20180613
++  format %{ 
++    "CMPXCHG $newval, $mem, $oldval @ compareAndSwapL\n\t"
++    "If $oldval == $mem then store $newval into $mem\n\t"
++    "sete    $res "
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;    
++//    Address  addr($mem_ptr$$Register, 0);
++
++    //SizedScope sc(&_masm, 100);
++    guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapL");
++    __ cmpxchg(newval, $mem$$Address, oldval);
++    __ seleq(rcc, 1, R0, res);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++instruct compareAndSwapI( rRegI res, indirect mem, v0_RegI oldval, rRegI newval, rFlagsReg cr) %{
++  match(Set res (CompareAndSwapI mem (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++  //size(60);
++//  match(CompareAndSwapI mem_ptr (Binary oldval newval));
++  format %{ 
++    "CMPXCHG32 $newval, $mem, $oldval @ compareAndSwapI\n\t"
++    "If $oldval == $mem then store $newval into $mem\n\t"
++    "sete    $res "
++ %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++//    Address  addr($mem_ptr$$Register, 0);
++    guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapI");
++
++    //SizedScope sc(&_masm, 100);
++    __ cmpxchg32(newval, $mem$$Address, oldval);
++    __ seleq(rcc, 1, R0, res);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++instruct compareAndSwapN( rRegI res, indirect mem, v0_RegN oldval, rRegN newval, rFlagsReg cr) %{
++  match(Set res (CompareAndSwapN mem (Binary oldval newval)));
++  effect(KILL cr, KILL oldval);
++  //effect(KILL cr, USE_KILL oldval);
++//  effect(TEMP tmpT10, USE_KILL oldval);
++  //size(64);
++  format %{
++    "CMPXCHG32 $newval, $mem, $oldval @ compareAndSwapI\n\t"
++    "If $oldval == $mem then store $newval into $mem\n\t"
++    "sete    $res"
++  %}
++  ins_encode %{
++    Register newval = $newval$$Register;
++    Register oldval = $oldval$$Register;
++    Register res    = $res$$Register;
++//    Address  addr($mem_ptr$$Register, 0);
++//    Label L;
++    guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding compareAndSwapN");
++    // cmpxchg32 is implemented with ll/sc, which will do sign extension.
++    // Thus, we should extend oldval's sign for correct comparision.
++
++    // __ stop("?compareAndSwapN jzy");
++    __ addw(oldval, 0, oldval);
++    __ cmpxchg32(newval, $mem$$Address, oldval);
++//    __ selne(AT, 1, AT, res);
++    __ seleq(rcc, 1, R0, res);
++  %}
++  ins_pipe( long_memory_op );
++%}
++
++instruct getAndAddI(indirect mem, rRegI add, rRegI val, rFlagsReg cr) %{
++ // predicate( n->get_int() == 1 &&  n->get_int() == -1);
++ // val = *mem   &  *mem = *mem + add
++  match(Set val (GetAndAddI mem add));
++  effect(KILL cr);
++  format %{ "xaddI  [$mem],$add\t@getAndAddI" %}
++  ins_encode %{
++       Register base = as_Register($mem$$base);
++       int disp = $mem$$disp;
++       Register value = $val$$Register;
++       Register add = $add$$Register;
++       Label again;
++       guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding getAndAddI");
++       SizedScope sc(&_masm, 40);
++       if(UseSW8A) {
++         __ BIND(again);
++         __ lldw(AT, disp, base);
++         __ addw(AT, add, GP);
++         __ lstw(GP, disp, base);
++         __ beq_l(GP, again);
++         __ movl(value, AT);
++       } else {
++         __ BIND(again);
++         __ lldw(AT, disp, base);
++         __ ldi(GP, 1, R0);
++         __ wr_f(GP);
++         __ addw(AT, add, GP);
++         __ align(8);      // must align
++         __ lstw(GP, disp, base);
++         __ rd_f(GP);
++         __ beq_l(GP, again);
++         __ movl(value, AT);
++       }
++   %}
++  ins_pipe( long_memory_op );
++%}
++
++instruct getAndAddL( indirect mem, rRegL add, rRegL val, rFlagsReg cr) %{
++   // val = *mem   &  *mem = *mem + add
++  match(Set val (GetAndAddL mem add));
++  effect(KILL cr);
++  format %{ "xaddL  [$mem],$add\t@ getAndAddL" %}
++  ins_encode %{
++       Register base = as_Register($mem$$base);
++       int disp = $mem$$disp;
++       Register value = $val$$Register;
++       Register add = $add$$Register;
++       Label again;
++       guarantee($mem$$index == sp->encoding() && $mem$$disp == 0, "impossible encoding getAndAddL");
++       SizedScope sc(&_masm, 40);
++       if(UseSW8A) {
++         __ BIND(again);
++         __ lldl(AT, disp, base);
++         __ addl( AT, add, GP);
++         __ lstl(GP, disp, base);
++         __ beq_l(GP, again);
++         __ movl(value, AT);
++       } else {
++         __ BIND(again);
++         __ lldl(AT, disp, base);
++         __ ldi(GP, 1, R0);
++         __ wr_f(GP);
++         __ addl( AT, add, GP);
++         __ align(8);      // must align
++         __ lstl(GP, disp, base);
++         __ rd_f(GP);
++         __ beq_l(GP, again);
++         __ movl(value, AT);
++       }
++   %}
++  ins_pipe( long_memory_op );
++%}
++//----------Subtraction Instructions-------------------------------------------
++
++// Integer Subtraction Instructions
++instruct subI_rReg(rRegI dst, rRegI src1, rRegI src2)
++%{
++  match(Set dst (SubI src1 src2));
++
++  format %{ "subw    $src1, $src2, $dst\t# int\t@subI_rReg" %}
++
++  ins_encode%{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    
++    __ subw(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct subI_rReg_imm(rRegI dst, rRegI src1, immU8 src2)
++%{
++  match(Set dst (SubI src1 src2));
++  ins_cost(80);
++  format %{ "subw    $src1, $src2, $dst\t# int\t@subI_rReg_imm" %}
++  ins_encode%{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int       imm = $src2$$constant;
++
++      __ subw(src1, imm, dst);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++/* memory operands no need for SW64
++instruct subI_rReg_mem(rRegI dst, memory src1, rRegI src2)
++%{
++  match(Set dst (SubI src2 (LoadI src1)));
++
++  ins_cost(125);
++  format %{ 
++      "ldw    $dst, $src1\t# int\t@subI_rReg_mem\n\t"
++      "subw     $src2, $dst, $dst"
++      %}
++  ins_encode%{
++    Register  dst  = $dst$$Register;
++    Address   src1 = $src1$$Address;
++    Register  src2 = $src2$$Register;    
++    __ ldw(dst, src1);
++    __ subw(src2, dst, dst);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct subI_mem_rReg(memory dst, rRegI src)
++%{
++  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
++
++  ins_cost(150);
++  format %{ 
++      "ldw      rscratch2_AT, $dst\t# int\t@subI_mem_rReg\n\t" 
++      "subw     rscratch2_AT, $src, $dst\n\t" 
++      "stw      rscratch2_AT, $dst, rscratch1_GP"
++  %}
++  
++  ins_encode%{
++    Address  dst = $dst$$Address;
++    Register src = $src$$Register;
++    __ ldw(rscratch2_AT, dst);
++    __ subw(rscratch2_AT, src, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %}
++  //ins_pipe(ialu_mem_reg);
++%}
++
++instruct subI_mem_imm(memory dst, immI src)
++%{
++  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ 
++      "ldw    rscratch2_AT, $dst\t# int\t@subI_mem_imm\n\t"
++      "subw   rscratch2_AT, $src, rscratch2_AT\n\t"
++      "stw    rscratch2_AT, $dst"
++  %}
++  
++  ins_encode%{
++    Address  dst = $dst$$Address;
++    int      src = $src$$constant;
++    __ ldw(rscratch2_AT, dst);
++    __ subw(rscratch2_AT, src, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);    
++  %}
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++instruct subL_rReg(rRegL dst, rRegL src1, rRegL src2)
++%{
++  match(Set dst (SubL src1 src2));
++
++  format %{ "subl    $src1, $src2, $dst\t# long\t@subL_rReg" %}
++  
++  ins_encode%{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++     __ subl(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct subL_rReg_imm(rRegI dst, rRegI src1, immUL8 src2)
++%{
++  match(Set dst (SubL src1 src2));
++
++  format %{"subl    $src1, $src2, $dst\t# long\t@subL_rReg_imm" %}
++  
++  ins_encode%{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int      src2 = $src2$$constant;
++
++      __ subl(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regL_imm);
++%}
++
++/* memory operands no need for SW64
++instruct subL_rReg_mem(rRegL dst, rRegL src1, memory src2)
++%{
++  match(Set dst (SubL src1 (LoadL src2)));
++
++  ins_cost(125);
++  format %{ 
++      "ldl    rscratch2_AT, $src2\t# long\t@subL_rReg_mem\n\t"
++      "subl     $src1, rscratch2_AT, rscratch2_AT\n\t"
++      "stl    rscratch2_AT, $dst"
++  %}
++
++  ins_encode%{
++    Register  dst  = $dst$$Register;
++    Register  src1 = $src1$$Register;
++    Address   src2 = $src2$$Address;
++     __ ldl(rscratch2_AT, src2);
++     __ subl(src1, rscratch2_AT, dst);
++  %}
++  //ins_pipe(ialu_reg_mem);
++%}
++
++instruct subL_mem_rReg(memory dst, rRegL src)
++%{
++  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
++
++  ins_cost(150);
++  format %{ 
++      "ldl    rscratch2_AT, $dst\t# long\t@subL_mem_rReg\n\t"
++      "subl     rscratch2_AT, $src, rscratch2_AT\n\t"
++      "stl    rscratch2_AT, $dst"
++      %}
++  
++  ins_encode%{
++    Address  dst = $dst$$Address;
++    Register src = $src$$Register;
++    __ ldl(rscratch2_AT, dst);
++    __ subl(rscratch2_AT, src, rscratch2_AT);
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %}
++  //ins_pipe(ialu_mem_reg);
++%}
++
++instruct subL_mem_imm(memory dst, immL32 src)
++%{
++  match(Set dst (StoreL dst (SubL (LoadL dst) src)));
++
++  ins_cost(125); // XXX
++  format %{ 
++      "ldptr    rscratch2_AT, $dst\t# long\t@subL_mem_imm\n\t"
++      "subptr   rscratch2_AT, $src, rscratch2_AT\n\t"
++      "stptr    rscratch2_AT, $dst"
++      %}
++  
++  ins_encode%{
++    Address  dst = $dst$$Address;
++    int      src = $src$$constant;
++    __ ldl(rscratch2_AT, dst);
++    __ mov_immediate32s(rscratch1_GP, src); //lsp to check sign-extend??
++    __ subl(rscratch2_AT, rscratch1_GP, rscratch2_AT);
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %}
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Subtract from a pointer
++// XXX hmpf???
++instruct subP_rReg(rRegP dst, rRegP src1, rRegI src2, immI0 zero)
++%{
++  match(Set dst (AddP src1 (SubI zero src2)));
++
++  format %{ "subw    R0, $src2, $dst\t# ptr - int\t@subP_rReg\n\t"
++            "addl    $src1, $dst, $dst"
++  %}
++  
++  ins_encode%{
++    Register   dst = $dst$$Register;
++    Register  src1 = $src1$$Register;
++    Register  src2 = $src2$$Register;
++    __ subw(R0, src2, dst);
++    __ addl(src1, dst, dst);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct negI_rReg(rRegI dst, rRegI src, immI0 zero)
++%{
++  match(Set dst (SubI zero src));
++
++  format %{ "subw    R0, $src,$dst\t# int\t@negI_rReg" %}
++  
++  ins_encode%{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    __ subw(R0, src, dst);
++  %}
++  ins_pipe(ialu_regI_imm16);
++%}
++
++/* memory operands no need for SW64
++instruct negI_mem(memory dst, immI0 zero)
++%{
++  match(Set dst (StoreI dst (SubI zero (LoadI dst))));
++
++  format %{ "ldw  rscratch2_AT, $dst\t# int\t@negI_mem\n\t"
++            "subw R0, rscratch2_AT, rscratch2_AT\n\t"
++            "stw  rscratch2_AT, $dst, rscratch1_GP"
++  %}
++
++  ins_encode%{
++    Address  dst = $dst$$Address;
++    __ ldw(rscratch2_AT, dst);
++    __ subw(R0, rscratch2_AT, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %}
++  //ins_pipe(ialu_reg);
++%}*/
++
++instruct negL_rReg(rRegL dst, rRegL src, immL0 zero)
++%{
++  match(Set dst (SubL zero src));
++
++  format %{ "subl    R0, $src, $dst\t# long \t@negL_rReg" %}
++  
++  ins_encode%{
++      Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    __ subl(R0, src, dst);
++  %}
++  ins_pipe(ialu_regL_imm);
++%}
++
++/* memory operands no need for SW64
++instruct negL_mem(memory dst, immL0 zero)
++%{
++  match(Set dst (StoreL dst (SubL zero (LoadL dst))));
++
++  format %{ "ldl  rscratch2_AT, $dst\t# long\t@negL_mem\n\t" 
++            "subl R0, rscratch2_AT, rscratch2_AT\n\t"
++            "stl( rscratch2_AT, $dst, rscratch1_GP"
++  %}
++  
++  ins_encode%{
++    Address  dst = $dst$$Address;
++    __ ldl(rscratch2_AT, dst);
++    __ subl(R0, rscratch2_AT, rscratch2_AT);
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %}
++  //ins_pipe(ialu_reg);
++%}*/
++
++
++//----------Multiplication/Division Instructions-------------------------------
++// Integer Multiplication Instructions
++// Multiply Register
++
++instruct mulI_rReg(rRegI dst, rRegI src1, rRegI src2)
++%{
++  match(Set dst (MulI src1 src2));
++
++  ins_cost(300);
++  format %{ "mulw   $src1, $src2, $dst\t# int @mulI_rReg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mulw(src1, src2, dst);
++  %}    
++  ins_pipe( ialu_mult );
++%}
++
++instruct mulI_rReg_imm(rRegI dst, rRegI src, immU8 imm)
++%{
++  match(Set dst (MulI src imm));
++
++  ins_cost(300);
++  format %{ "mulw   $src, $dst, $dst \t# int @mulI_rReg_imm\n\t" %}
++  ins_encode %{
++     Register src1 = $src$$Register;
++     int      src2 = $imm$$constant;
++     Register dst  = $dst$$Register;
++       __ mulw(src1, src2, dst);        
++  %}    
++  ins_pipe( ialu_mult_imm );
++%}
++
++/* memory operands no need for SW64
++instruct mulI_mem(rRegI dst, memory src1, rRegI src2)
++%{
++  match(Set dst (MulI src2 (LoadI src1)));
++
++  ins_cost(350);
++  format %{ "ldw $dst, $src1\t# int @mulI_mem \n\t"
++            "mulw   $dst, $src2, $dst" %}
++  ins_encode %{
++      Register dst  = $dst$$Register;
++      Address  src1 = $src1$$Address;
++      Register src2 = $src2$$Register;
++      __ ldw(dst, src1);
++      __ mulw(src2, dst, dst);
++  %}    
++//  ins_pipe(ialu_reg_mem_alu0);
++%}
++
++instruct mulI_mem_imm(rRegI dst, memory src, immI imm)
++%{
++  match(Set dst (MulI (LoadI src) imm));
++
++  ins_cost(300);
++  format %{ "ldw   rscratch2_AT, $src, $imm\t# int @mulI_mem_imm \n\t"
++            "mov_immediate32  rscratch1_GP, $imm\n\t"
++            "mulw  rscratch2_AT, $imm, $dst"%}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Address  src = $src$$Address;
++      int      val = $imm$$constant;
++      __ ldw(rscratch2_AT, src);
++      __ mov_immediate32(rscratch1_GP, val);
++      __ mulw(rscratch2_AT, rscratch1_GP, dst);
++  %}
++//  ins_pipe(ialu_reg_mem_alu0);
++%}*/
++
++instruct mulL_rReg(rRegL dst, rRegL src1, rRegL src2)
++%{
++  match(Set dst (MulL src1 src2));
++
++  ins_cost(300);
++  format %{ "mull   $src1, $src2, $dst\t# long @mulL_rReg" %}
++  ins_encode %{
++     Register src1 = $src1$$Register;
++     Register src2 = $src2$$Register;
++     Register dst  = $dst$$Register;
++
++     __ mull(src1, src2, dst);  
++  %}   
++  ins_pipe(pipe_slow);
++%}
++
++instruct mulL_rReg_imm(rRegL dst, rRegL src, immUL8 imm)
++%{
++  match(Set dst (MulL src imm));
++
++  ins_cost(300);
++  format %{ "mull  $src, $imm, $dst \t# long\t@mulL_rReg_imm\n\t" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int      imm = $imm$$constant;
++
++       __ mull(src, imm, dst);
++  %}    
++  ins_pipe( pipe_slow );
++%}
++
++/* memory operands no need for SW64
++instruct mulL_mem(rRegL dst, memory src1, rRegL src2)
++%{
++  match(Set dst (MulL src2 (LoadL src1)));
++
++  ins_cost(350);
++  format %{ "ldptr   $dst, $src1 \t# long\t@mulL_mem\n\t"
++            "mull $src2, $dst, $dst"  %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Address  src1 = $src1$$Address;
++      Register src2 = $src2$$Register;
++      __ ldptr(dst, src1);
++      __ mull(src2, dst, dst);
++  %}
++//  ins_pipe(ialu_reg_mem_alu0);
++%}
++
++instruct mulL_mem_imm(rRegL dst, memory src, immL32 imm)
++%{
++  match(Set dst (MulL (LoadL src) imm));
++
++  ins_cost(300);
++  format %{ "ldptr   $dst, $src\t# long\t@mulL_mem_imm\n\t"
++            "mov_immediate32     rscratch1_GP, $imm\n\t"
++            "mull $dst, rscratch1_GP, $dst"%}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Address  src = $src$$Address;
++      int val = $imm$$constant;  
++      __ ldptr(dst, src);
++      __ mov_immediate32s(rscratch1_GP, val);
++      __ mull(dst, rscratch1_GP, dst);
++  %}    
++//  ins_pipe(ialu_reg_mem_alu0);
++%}*/
++
++/*sw have no such instruct
++instruct mulHiL_rReg(rdx_RegL dst, no_rax_RegL src, rax_RegL rax)//??todo
++%{
++  match(Set dst (MulHiL src rax));
++
++  ins_cost(300);
++  format %{ "imulq   RDX:RAX, RAX, $src\t# mulhi" %}
++  ins_encode %{
++//      (REX_reg_wide(src), OpcP, reg_opc(src));
++  %}    
++//  ins_pipe(ialu_reg_reg_alu0);
++%}
++*/
++
++instruct divI_rReg(rRegI dst, rRegI src, rRegI div)
++%{
++  match(Set dst (DivI src div));
++
++  ins_cost(30*100+10*100); // XXX
++  format %{ "divI $src, $div $dst @divI_rReg" %}//TODO: How to represent the logic written below?jx
++  
++  ins_encode%{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    Register  div = $div$$Register;
++    //__ stop("divI_rReg");
++    
++    if (UseSW8A) {
++      __ corrected_idivw(src, div, dst);
++    } else if (FastIntDiv) {
++      __ idiv_sw(src, div, dst);
++    } else {
++      __ saveTRegisters();
++      if(src == A0){
++       __ movl(rscratch3, src);
++       __ movl(A0, div);
++       __ movl(A1, rscratch3);
++      }else{
++       __ movl(A0, div);
++       __ movl(A1, src);
++      }
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::sdiv), 2);
++      __ movl(pv, V0);
++      __ restoreTRegisters();
++      __ movl(dst, pv);
++    }
++    %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct divL_rReg(rRegL dst, rRegL src, rRegL div)
++%{
++  match(Set dst (DivL src div));
++
++  ins_cost(30*100+10*100); // XXX
++  format %{ "divL $src $div $dst @divL_rReg" %}//TODO: How to represent the logic written below?jx
++  ins_encode%{
++    Register  dst = $dst$$Register;
++    Register  src = $src$$Register;
++    Register  div = $div$$Register;
++    if (UseSW8A) {
++      __ corrected_idivl(src, div, dst);
++    } else if (FastLongDiv) {
++      Label ldiv, exit;
++      //AT does not need to be saved(in pushad function) before calling
++      //since it has been defined as NS
++      __ slll(dst, 0xb, rscratch3); //logically left shift 11-bit
++      __ sral(rscratch3, 0xb, rscratch3); //arithmetically right shift 11-bit
++      
++      // when 1 was put in 53 bit-position, 
++      // the result would be different from the original one
++      
++      // which means when the value of op1 is [0xFFE0000000000000, 0x20000000000000],
++      // the result would be different after slll and sral
++      // why?jx
++      __ cmpeq(dst, rscratch3, rscratch3); 
++      
++      __ bne_l(rscratch3, ldiv);
++      
++      __ saveTRegisters();
++      if(src == A0){
++        __ movl(pv, src);
++        __ movl(A0, div);
++        __ movl(A1, pv);
++      }else{
++        __ movl(A0, div);
++        __ movl(A1, src);
++      }
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), 2);
++      __ movl(pv, V0);
++      __ restoreTRegisters();
++      __ movl(dst, pv);
++      __ beq_l(R0, exit);
++      
++      __ BIND(ldiv);
++      __ ldiv_sw(src, div, dst);
++      
++      __ BIND(exit);
++    } else {
++      __ saveTRegisters();
++      if(src == A0){
++        __ movl(rscratch3, src);
++        __ movl(A0, div);
++        __ movl(A1, rscratch3);
++      }else{
++        __ movl(A0, div);
++        __ movl(A1, src);
++      }
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), 2);
++      __ movl(pv, V0);
++      __ restoreTRegisters();
++      __ movl(dst, pv);
++    }
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++
++
++//-----------------------------------------------------------------------------
++
++instruct modI_rReg(rRegI dst, rRegI src1, rRegI src2)
++%{
++  match(Set dst (ModI src1 src2));
++
++  ins_cost(300); // XXX
++  format %{ "modi  $src1, $src2, $dst @ modI_rReg" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++//    __ stop("modI_rReg");
++
++    if (UseSW8A) {
++      __ remw(src1, src2, dst);
++    } else if (FastIntRem) {
++      __ irem_sw(src1, src2, dst);
++    } else {
++      __ saveTRegisters();
++      if(src1 == A0){
++        __ movl(pv, src1);
++        __ movl(A0, src2);
++        __ movl(A1, pv);
++      }else{
++        __ movl(A0, src2);
++        __ movl(A1, src1);
++      }
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::srem), 2);
++      __ movl(pv, V0);
++      __ restoreTRegisters();
++      __ movl(dst, pv);
++    }  
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct modL_rReg(rRegL dst, rRegL src1, rRegL src2)
++%{
++  match(Set dst (ModL src1 src2));
++
++  ins_cost(300); // XXX
++  format %{ "modL   $src1, $src2, $dst\t@modL_rReg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src1 = as_Register($src1$$reg);
++    Register src2 = as_Register($src2$$reg);
++
++    if (UseSW8A) {
++      __ reml(src1, src2, dst);
++    } else if (FastLongRem) {
++      Label lrem, exit;
++//      Register tem = operand;
++      
++      __ slll(src1, 0xb, rscratch3);
++      __ sral(rscratch3, 0xb, rscratch3);
++      __ cmpeq(src1, rscratch3, rscratch3);
++      __ bne_l(rscratch3, lrem);
++      
++      __ saveTRegisters();
++      if(src1 == A0){
++        __ movl(rscratch3, src1);
++        __ movl(A0, src2);
++        __ movl(A1, rscratch3);
++      }else{
++        __ movl(A0, src2);
++        __ movl(A1, src1);
++      }
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), 2);
++      __ movl(pv, V0);
++      __ restoreTRegisters();
++      __ movl(dst, pv);
++      __ beq_l(R0, exit);
++      
++      __ BIND(lrem);
++      __ lrem_sw(src1, src2, dst);
++      
++      __ BIND(exit);
++    } else {
++       __ saveTRegisters();
++      if(src1 == A0){
++        __ movl(rscratch3, src1);
++        __ movl(A0, src2);
++        __ movl(A1, rscratch3);
++      }else{
++        __ movl(A0, src2);
++        __ movl(A1, src1);
++      }
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), 2);
++      __ movl(pv, V0);
++      __ restoreTRegisters();
++      __ movl(dst, pv);
++    }      
++  %}    
++  ins_pipe( pipe_slow );
++%}
++
++/*No need for SW64
++// Integer Shift Instructions
++// Shift Left by one
++instruct salI_rReg_1(rRegI dst, rRegI src, immI1 shift)
++%{
++  match(Set dst (LShiftI src shift));
++
++  format %{ "slll    $src, #1, $dst\t# @salI_rReg_1" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ slll(src, 1, dst);
++    __ addw(dst, R0, dst);//lsp to check ok??
++      
++  %} 
++  ins_pipe(ialu_regI_imm);
++%}*/
++
++/* memory operands no need for SW64
++// Shift Left by one
++instruct salI_mem_1(memory dst, immI1 shift)
++%{
++  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
++
++  format %{ "ldw  rscratch2_AT, $dst\t# @salI_mem_1\n\t"
++            "slll rscratch2_AT, #1, rscratch2_AT\n\t"
++            "stw  rscratch2_AT, $dst" %}
++  
++  ins_encode %{
++    Address  dst = $dst$$Address;
++    __ ldw(rscratch2_AT, dst);
++    __ slll(rscratch2_AT, 1, rscratch2_AT);     
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Shift Left by 8-bit immediate
++instruct salI_rReg_imm(rRegI dst, rRegI src, immU8 shift)
++%{
++  match(Set dst (LShiftI src shift));
++  ins_cost(80);
++  format %{ "slll $src, $shift&#0x1f, $dst\t# @salI_rReg_imm\n\t"
++            "addw $dst, #0, $dst" %}
++  
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int    shamt = $shift$$constant;
++
++    if(UseSW8A) {
++      __ sllw(src, shamt, dst);
++    } else {
++      __ slll(src, shamt&0x1f, dst);
++      __ addw(dst, 0, dst);
++    }
++  %} 
++  ins_pipe(ialu_regI_imm16);
++%}
++
++/* memory operands no need for SW64
++// Shift Left by 8-bit immediate
++instruct salI_mem_imm(memory dst, immI8 shift)
++%{
++  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
++
++  format %{ "ldw  rscratch2_AT, $dst\t# @salI_mem_imm\n\t"
++            "slll rscratch2_AT, $shift&#0x1f, rscratch2_AT\n\t"
++            "stw  rscratch2_AT, $dst, rscratch1_GP" %}//?shift
++  
++  ins_encode %{
++    Address  dst = $dst$$Address;
++    int    shamt = $shift$$constant;
++    __ ldw(rscratch2_AT, dst);
++    __ slll(rscratch2_AT, shamt&0x1f, rscratch2_AT); 
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}
++*/
++// Shift Left by variable//sny reg_reg
++instruct salI_rReg_CL(rRegI dst, rRegI src, rRegI shift)
++%{
++  match(Set dst (LShiftI src shift));
++
++  format %{ 
++      "and_ins $shift, #0x1f, rscratch3\t #@salI_rReg_CL\n\t"
++      "slll $src, rscratch3, $dst\n\t" 
++      "movws $dst, $dst"
++  %}//?shift
++  
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    Register shamt = $shift$$Register;
++    if (UseSW8A) {
++      __ sllw(src, shamt, dst);
++    } else {
++      __ and_ins(shamt, 0x1f, rscratch3);//31(0x1f)
++      __ slll(src, rscratch3, dst);
++      __ movws(dst, dst);// Do we need this operation?jx  lsp??
++    }
++  %} 
++  ins_pipe(ialu_regI_regI);
++%}
++/* memory operands no need for SW64
++// Shift Left by variable
++instruct salI_mem_CL(memory dst, rRegI shift)
++%{
++  match(Set dst (StoreI dst (LShiftI (LoadI dst) shift)));
++
++  format %{ 
++      "ldw rscratch2_AT, $dst\t #@salI_mem_CL\n\t"
++      "and_ins $shift, 0x1f, $shift\n\t"
++      "slll rscratch2_AT, $shift, rscratch2_AT\n\t"
++      "stw  rscratch2_AT, $dst, rscratch1_GP"
++      %}
++  
++  ins_encode %{
++    Register shamt = $shift$$Register;
++    Address dst = $dst$$Address;
++    __ ldw(rscratch2_AT, dst);
++    __ and_ins(shamt, 0x1f, shamt);
++    __ slll(rscratch2_AT, shamt, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_reg);
++%}
++*/
++
++/* no need for SW64
++// Arithmetic shift right by one
++instruct sarI_rReg_1(rRegI dst, rRegI src, immI1 shift)
++%{
++  match(Set dst (RShiftI src shift));
++
++  format %{ "sral    $src, #1, $dst\t #@sarI_rReg_1" %}
++  
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ sral(src, 1, dst);
++  %} 
++  ins_pipe(ialu_regI_imm);
++%}*/
++
++/* memory operands no need for SW64
++// Arithmetic shift right by one
++instruct sarI_mem_1(memory dst, immI1 shift)
++%{
++  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
++
++  format %{ 
++      "ldw    rscratch2_AT, $dst\t #@sarI_mem_1\n\t" 
++      "sral   rscratch2_AT, #1, rscratch2_AT\n\t"
++      "stw    rscratch2_AT, $dst, rscratch1_GP"
++      %}
++  
++  ins_encode %{
++    Address  dst = $dst$$Address;
++    
++    __ ldw(rscratch2_AT, dst);
++    __ sral(rscratch2_AT, 1, rscratch2_AT);     
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Arithmetic Shift Right by 8-bit immediate
++instruct sarI_rReg_imm(rRegI dst, rRegI src, immU8 shift)
++%{
++  match(Set dst (RShiftI src shift));
++
++  format %{ 
++      "sral    $src, $shift&0x1f, $dst\t #@sarI_rReg_imm"
++      %}
++  
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++    int    shamt = $shift$$constant;
++    if(UseSW8A) {
++      __ sraw(src, shamt, dst);
++    } else {
++      __ sral(src, shamt&0x1f, dst);
++    }
++  %} 
++  ins_pipe(ialu_regI_imm16);
++%}
++
++/* memory operands no need for SW64
++// Arithmetic Shift Right by 8-bit immediate
++instruct sarI_mem_imm(memory dst, immI8 shift)
++%{
++  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
++
++  format %{ 
++      "ldw     rscratch2_AT, $dst\t #@sarI_mem_imm\n\t"
++      "sral    rscratch2_AT, $shift&0x1f, rscratch2_AT\n\t"
++      "stw     rscratch2_AT, $dst, rscratch1_GP"
++      %}
++  
++  ins_encode %{
++    Address  dst = $dst$$Address;
++    int    shamt = $shift$$constant;
++    __ ldw(rscratch2_AT, dst);
++    __ sral(rscratch2_AT, shamt&0x1f, rscratch2_AT); 
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Arithmetic Shift Right by variable
++instruct sarI_rReg_CL(rRegI dst, rRegI src, rRegI shift)
++%{
++  match(Set dst (RShiftI src shift));
++
++  format %{ 
++      "and_ins  $shift, #31, rscratch3\t #@sarI_rReg_CL\n\t\t" 
++      "sral  $src, rscratch3, $dst" 
++      %}
++  
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    Register shamt = $shift$$Register;
++    if(UseSW8A) {
++      __ sraw(src, shamt, dst);
++    } else {
++      __ and_ins(shamt, 0x1f, rscratch3);
++      __ sral(src, rscratch3, dst);
++    }
++  %} 
++  ins_pipe(ialu_regI_regI);
++%}
++
++/* memory operands no need for SW64
++// Arithmetic Shift Right by variable
++instruct sarI_mem_CL(memory dst, rRegI shift)
++%{
++  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
++
++  format %{ 
++      "ldw  rscratch2_AT, $dst\t #@sarI_mem_CL\n\t"
++      "and_ins $shift, #31, $shift\n\t"
++      "sral    rscratch2_AT, $shift, rscratch2_AT\n\t"
++      "stw     rscratch2_AT, $dst, rscratch1_GP"
++      %}
++  
++  ins_encode %{
++    Register shamt = $shift$$Register;
++    Address dst = $dst$$Address;
++    __ ldw(rscratch2_AT, dst);
++    __ and_ins(shamt, 31, shamt);
++    __ sral(rscratch2_AT, shamt, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_reg);
++%}*/
++
++/* no need for SW54
++// Logical shift right by one
++instruct shrI_rReg_1(rRegI dst, rRegI src, immI1 shift)
++%{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "srll    $src, #1, $dst\t #@shrI_rReg_1" %}
++  
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++      __ srll(src, 1, dst);
++  %} 
++  ins_pipe(ialu_regI_imm);
++%}*/
++
++/* memory operands no need for SW64
++// Logical shift right by one
++instruct shrI_mem_1(memory dst, immI1 shift)
++%{
++  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
++
++  format %{ "ldw  rscratch2_AT, $dst\t #@shrI_mem_1\n\t"
++            "srll rscratch2_AT, #1, rscratch2_AT\n\t"
++            "stw rscratch2_AT, $dst, rscratch1_GP" %}
++  
++  ins_encode %{
++    Address dst = $dst$$Address;
++    __ ldw(rscratch2_AT, dst);
++    __ srll(rscratch2_AT, 0x1, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Logical Shift Right by 8-bit immediate
++instruct shrI_rReg_imm(rRegI dst, rRegI src, immU8 shift)
++%{
++  match(Set dst (URShiftI src shift));
++
++  format %{ "srll    $src, $shift&#0x1f, $dst\t #@shrI_rReg_imm" %}
++  
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int    shamt = $shift$$constant;
++    if(UseSW8A) {
++      __ srlw(src, shamt, dst);
++    } else {
++      __ zapnot(src, 0xf, dst);
++      __ srll(dst, shamt&0x1f, dst);
++      __ addw(dst, 0x0, dst); //need to CHECK lsp
++    }
++  %} 
++  ins_pipe(ialu_regI_imm16);
++%}
++
++/* memory operands no need for SW64
++// Logical Shift Right by 8-bit immediate
++instruct shrI_mem_imm(memory dst, immI8 shift)
++%{
++  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
++
++  format %{ "ldw rscratch2_AT, $dst\t #@shrI_mem_imm\n\t"
++            "srll rscratch2_AT, $shift&#0x1f, rscratch2_AT\n\t"
++            "stw rscratch2_AT, $dst, rscratch1_GP" %}
++  
++  ins_encode %{
++    Address  dst = $dst$$Address;
++    int    shamt = $shift$$constant;
++    __ ldw(rscratch2_AT, dst);
++    __ srll(rscratch2_AT, shamt&0x1f, rscratch2_AT); 
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Logical Shift Right by variable
++instruct shrI_rReg_CL(rRegI dst, rRegI src, rRegI shift)
++%{
++  match(Set dst (URShiftI src shift));
++
++  format %{ 
++    "and_ins    $shift, 0x1f, rscratch3\t #@shrI_rReg_CL\n\t\t" 
++    "movwu      $dst, $src\n\t\t" 
++    "srll       $dst, rscratch3, $dst\n\t\t"
++    "movws      $dst, $dst"
++  %}
++  
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    Register shamt = $shift$$Register;
++    if(UseSW8A) {
++      __ srlw(src, shamt, dst);
++    } else {
++      __ and_ins(shamt, 0x1f, rscratch3);
++      __ movwu(dst, src); //need to Check lsp
++      __ srll(dst, rscratch3, dst);
++      __ movws(dst, dst);
++    }
++  %} 
++  ins_pipe(ialu_regI_regI);
++%}
++
++/* memory operands no need for SW64
++// Logical Shift Right by variable
++instruct shrI_mem_CL(memory dst, rRegI shift)
++%{
++  match(Set dst (StoreI dst (URShiftI (LoadI dst) shift)));
++
++  format %{ "shrl    #@shrI_mem_CL" %}
++  
++  ins_encode %{
++    Register shamt = $shift$$Register;
++    Address dst = $dst$$Address;
++    __ ldw(rscratch2_AT, dst);
++    __ and_ins(shamt, 0x1f, shamt);
++    __ srll(rscratch2_AT, shamt, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_reg);
++%}*/
++
++/* No need for SW64
++// Long Shift Instructions
++// Shift Left by one
++instruct salL_rReg_1(rRegL dst, rRegL src, immI1 shift)
++%{
++  match(Set dst (LShiftL src shift));
++
++  format %{ "slll    $src, $shift, $dst\t #@salL_rReg_1 " %}
++  
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    __ slll(src, 1, dst);
++  %} 
++  ins_pipe(ialu_regL_imm);
++%}*/
++
++/* memory operands no need for SW64
++// Shift Left by one
++instruct salL_mem_1(memory dst, immI1 shift)
++%{
++  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
++
++  format %{ "salq    #@salL_mem_1" %}
++  
++  ins_encode %{
++    Address  dst = $dst$$Address;
++    __ ldl(rscratch2_AT, dst);
++    __ slll(rscratch2_AT, 1, rscratch2_AT);     
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Shift Left by 8-bit immediate
++instruct salL_rReg_imm(rRegL dst, rRegL src, immU8 shift)
++%{
++  match(Set dst (LShiftL src shift));
++  ins_cost(80);
++  format %{ "slll $src, $shift, $dst   #@salL_rReg_imm" %}
++  
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++    int    shamt = $shift$$constant;
++
++    __ slll(src, shamt&0x3f, dst); 
++  %} 
++  ins_pipe(ialu_regL_imm);
++%}
++
++/* memory operands no need for SW64
++// Shift Left by 8-bit immediate
++instruct salL_mem_imm(memory dst, immI8 shift)
++%{
++  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
++
++  format %{ "salq    #@salL_mem_imm" %}
++  
++  ins_encode %{
++    Address  dst = $dst$$Address;
++    int    shamt = $shift$$constant;
++    __ ldl(rscratch2_AT, dst);
++    __ slll(rscratch2_AT, shamt&0x3f, rscratch2_AT); 
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Shift Left by variable
++instruct salL_rReg_CL(rRegL dst, rRegL src, rRegI shift)
++%{
++  match(Set dst (LShiftL src shift));
++  ins_cost(80);
++  format %{ "slll $src $shift, $dst    #@salL_rReg_CL" %}
++  
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register   src = $src$$Register;
++    Register shamt = $shift$$Register;
++    //__ and_ins(shamt, 0x3f, shamt);
++    __ slll(src, shamt, dst);
++  %} 
++  ins_pipe(ialu_regI_imm16);
++%}
++
++/* memory operands no need for SW64
++// Shift Left by variable
++instruct salL_mem_CL(memory dst, rRegI shift)
++%{
++  match(Set dst (StoreL dst (LShiftL (LoadL dst) shift)));
++
++  format %{ "salq    #@salL_mem_CL" %}
++  
++  ins_encode %{
++    Register shamt = $shift$$Register;
++    Address   dst = $dst$$Address;
++    __ ldl(rscratch2_AT, dst);
++    __ and_ins(shamt, 0x3f, shamt);
++    __ slll(rscratch2_AT, shamt, rscratch2_AT);
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_reg);
++%}
++//  No need for SW64
++// Arithmetic shift right by one
++instruct sarL_rReg_1(rRegL dst, rRegL src, immI1 shift)
++%{
++  match(Set dst (RShiftL src shift));
++
++  format %{ "sral    $src, #1, $dst\t# long\t@sarL_rReg_1" %}
++  
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      __ sral(src, 1, dst);
++  %} 
++  ins_pipe(ialu_regL_imm);
++%}
++
++// memory operands no need for SW64
++// Arithmetic shift right by one
++instruct sarL_mem_1(memory dst, immI1 shift)
++%{
++  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
++ 
++
++  format %{ 
++      "ldl    rscratch2_AT, $dst\t# long\t@sarL_mem_1\n\t"
++      "sral   rscratch2_AT, #1, rscratch2_AT\n\t"
++      "stl    rscratch2_AT, $dst, rscratch1_GP"
++  %}
++  
++  ins_encode %{
++      Address dst = $dst$$Address;
++      
++      __ ldl(rscratch2_AT, dst);
++      __ sral(rscratch2_AT, 1, rscratch2_AT);
++      __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Arithmetic Shift Right by 8-bit immediate
++instruct sarL_rReg_imm(rRegL dst, rRegL src, immU8 shift)
++%{
++  match(Set dst (RShiftL src shift));
++  ins_cost(80);
++  format %{ "sral    $src, $shift, $dst\t# long\t@sarL_rReg_imm" %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      int    value = $shift$$constant;
++      
++      __ sral(src, value, dst);
++  %} 
++  ins_pipe(ialu_regL_imm);
++%}
++
++/* memory operands no need for SW64
++// Arithmetic Shift Right by 8-bit immediate
++instruct sarL_mem_imm(memory dst, immI8 shift)
++%{
++  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
++ 
++
++  format %{ 
++      "ldl    rscratch2_AT, $dst\t# long\t@sarL_mem_imm\n\t"
++      "sral   rscratch2_AT, $shift, rscratch2_AT\n\t"
++      "stl    rscratch2_AT, $dst, rscratch1_GP" 
++      %}
++  
++  ins_encode %{
++      Address dst   = $dst$$Address;
++      int     value = $shift$$constant;
++      
++      __ ldl(rscratch2_AT, dst);
++      __ sral(rscratch2_AT, value, rscratch2_AT);
++      __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Arithmetic Shift Right by variable
++instruct sarL_rReg_CL(rRegL dst, rRegL src, rRegI shift)
++%{
++  match(Set dst (RShiftL src shift));
++ 
++  format %{ "sral    $src, $shift, $dst\t# long\t@sarL_rReg_CL" %}
++
++  ins_encode %{
++      Register   dst = $dst$$Register;
++      Register   src = $src$$Register;
++      Register shift = $shift$$Register;
++      
++      __ sral(src, shift, dst);
++  %} 
++  ins_pipe(ialu_regL_regL);
++%}
++
++/* memory operands no need for SW64
++// Arithmetic Shift Right by variable
++instruct sarL_mem_CL(memory dst, rRegI shift)
++%{
++  match(Set dst (StoreL dst (RShiftL (LoadL dst) shift)));
++ 
++
++  format %{ 
++      "ldl      rscratch2_AT, $dst\t# long\t@sarL_mem_CL\n\t" 
++      "sral     rscratch2_AT, $shift, rscratch2_AT\n\t"
++      "stl      rscratch2_AT, $dst, rscratch1_GP"
++      %}
++  
++  ins_encode %{
++      Address dst = $dst$$Address;
++      Register shift = $shift$$Register;
++      
++      __ ldl(rscratch2_AT, dst);
++      __ sral(rscratch2_AT, shift, rscratch2_AT);
++      __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_reg);
++%}
++
++// No need for SW64
++// Logical shift right by one
++instruct shrL_rReg_1(rRegL dst, rRegL src, immI1 shift)
++%{
++  match(Set dst (URShiftL src shift));
++ 
++  format %{ "srll    $src, #1, $dst\t# long\t@shrL_rReg_1\n\t" %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      
++      __ srll(src, 1, dst);
++  %} 
++  ins_pipe(ialu_regL_imm);
++%}
++
++// memory operands no need for SW64
++// Logical shift right by one
++instruct shrL_mem_1(memory dst, immI1 shift)
++%{
++  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
++ 
++
++  format %{ 
++      "ldl      rscratch2_AT, $dst\t# long\t@shrL_mem_1\n\t" 
++      "srll     rscratch2_AT, #1, rscratch2_AT\n\t"
++      "stl      rscratch2_AT, $dst, rscratch1_GP" 
++      %}
++  
++  ins_encode %{
++      Address dst = $dst$$Address;
++      
++      __ ldl(rscratch2_AT, dst);
++      __ srll(rscratch2_AT, 1, rscratch2_AT);
++      __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Logical Shift Right by 8-bit immediate
++instruct shrL_rReg_imm(rRegL dst, rRegL src, immU8 shift)
++%{
++  match(Set dst (URShiftL src shift));
++  ins_cost(80);
++  format %{ "srll    $src, $shift&#0x3f, $dst\t# long\t@shrL_rReg_imm" %}
++  
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      int    shamt = $shift$$constant;
++      __ srll(src, shamt&0x3f, dst);
++  %}
++  ins_pipe(ialu_regL_imm);
++%}
++
++/* memory operands no need for SW64
++// Logical Shift Right by 8-bit immediate
++instruct shrL_mem_imm(memory dst, immI8 shift)
++%{
++  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
++ 
++  format %{ 
++      "ldl    rscratch2_AT, $dst\t# long\t@shrL_mem_imm\n\t"
++      "srll   rscratch2_AT, $shift&#0x3f, rscratch2_AT\n\t"
++      "stl    rscratch2_AT, $dst, rscratch1_GP" 
++      %}
++  
++  ins_encode %{
++    Address dst   = $dst$$Address;
++    int    shamt = $shift$$constant;
++    __ ldl(rscratch2_AT, dst);
++    __ srll(rscratch2_AT, shamt&0x3f, rscratch2_AT); 
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_imm);
++%}*/
++
++// Logical Shift Right by variable
++instruct shrL_rReg_CL(rRegL dst, rRegL src, rRegI shift)
++%{
++  match(Set dst (URShiftL src shift));
++ 
++  format %{ 
++       "srll    $src, $shift, $dst\t# long\t@shrL_rReg_CL" 
++  %}
++  
++  ins_encode %{
++      Register   dst = $dst$$Register;
++      Register   src = $src$$Register;
++      Register shift = $shift$$Register;
++      
++      //__ and_ins(shift, 0x3f, shift);TODO:
++      __ srll(src, shift, dst);
++  %} 
++  ins_pipe(ialu_regL_regL);
++%}
++
++/* memory operands no need for SW64
++// Logical Shift Right by variable
++instruct shrL_mem_CL(memory dst, rRegI shift)
++%{
++  match(Set dst (StoreL dst (URShiftL (LoadL dst) shift)));
++ 
++
++  format %{ 
++      "ldl    rscratch2_AT, $dst\t# long\t@shrL_mem_CL\n\t"
++      "and_ins    $shift, #0x3f, $shift\n\t"
++      "srll   rscratch2_AT, $shift, rscratch2_AT\n\t"
++      "stl    rscratch2_AT, $dst, rscratch1_GP" 
++      %}
++  
++  ins_encode %{
++      Address dst = $dst$$Address;
++      Register shift = $shift$$Register;
++      
++      __ ldl(rscratch2_AT, dst);
++      __ and_ins(shift, 0x3f, shift);
++      __ srll(rscratch2_AT, shift, rscratch2_AT);
++      __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %} 
++  //ins_pipe(ialu_mem_reg);
++%}*/
++
++
++// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
++// This idiom is used by the compiler for the i2b bytecode.
++instruct i2b(rRegI dst, rRegI src, immI_24 twentyfour)
++%{
++  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
++
++  format %{ "sextb  $src, $dst\t#@i2b" %}
++  
++  ins_encode %{
++      Register src = $src$$Register;
++      Register dst = $dst$$Register;
++      
++      __ sextb(src, dst);
++  %} 
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
++// This idiom is used by the compiler the i2s bytecode.
++instruct i2s(rRegI dst, rRegI src, immI_16 sixteen)
++%{
++  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
++
++  format %{ "sexth  $src, $dst\t#@i2s" %}
++  
++  ins_encode %{
++    Register src = $src$$Register;
++    Register dst = $dst$$Register;
++
++    __ sexth(src, dst);
++  %} 
++  ins_pipe(ialu_regI_regI);
++%}
++
++/* Rotate shift No need for SW64 ??
++// ROL/ROR instructions
++
++// ROL expand
++instruct rolI_rReg_imm1(rRegI dst, immI1 lshift, immI_M1 rshift) %{
++  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
++  format %{ "roll    #@rolI_rReg_imm1" %}
++  
++  ins_encode %{
++      Register dst = $dst$$Register;
++//      int    lshift = $lshift$$constant;
++//      int    rshift = $rshift$$constant;
++      __ slll(dst, 0x1, rscratch2_AT);
++      __ srll(dst, 0x1F, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++      
++  %} 
++  //ins_pipe(ialu_reg);
++%}
++
++/*--x86 does not provide any match rule, compiling error---*/
++// end of ROL expand
++
++// Rotate Left by one
++//instruct rolI_rReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift)
++//%{
++//  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
++//
++//  expand %{
++//    //rolI_rReg_imm1(dst, cr);
++//  %}
++//%}
++
++// Rotate Left by 8-bit immediate
++instruct rolI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift)
++%{
++  predicate((0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)) && UseSW8A);
++  match(Set dst (OrI (LShiftI dst lshift) (URShiftI dst rshift)));
++  format %{ "rolw    $dst, $dst, $lshift #@rolI_rReg_i8" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    int    lshift = $lshift$$constant;
++    int    rshift = $rshift$$constant;
++    if(UseSW8A) {
++      __ rolw(dst, lshift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Rotate Left by variable
++instruct rolI_rReg_Var_C0(rRegI dst, rRegI shift, immI0 zero)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
++  format %{ "rolw    $dst, $dst, $shift #@rolI_rReg_Var_C0" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ rolw(dst, shift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Rotate Left by variable
++instruct rolI_rReg_Var_C32(rRegI dst, rRegI shift, immI_32 c32)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
++  format %{ "rolw    $dst, $dst, $shift #@rolI_rReg_Var_C32" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ rolw(dst, shift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct rolAddI_rReg_i8(rRegI dst, immI8 lshift, immI8 rshift)
++%{
++  predicate((0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f)) && UseSW8A);
++  match(Set dst (AddI (LShiftI dst lshift) (URShiftI dst rshift)));
++  format %{ "rolw    $dst, $dst, $lshift #@rolAddI_rReg_i8" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    int    lshift = $lshift$$constant;
++    int    rshift = $rshift$$constant;
++    if(UseSW8A) {
++      __ rolw(dst, lshift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// ROR expand
++/*---x86 does not provide any match rule, compiling error---*/
++/*
++instruct rorI_rReg_imm1(rRegI dst)
++%{
++
++  format %{ "rorl    $dst" %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      __ srll(dst, 0x1, rscratch2_AT);
++      __ slll(dst, 0x1F, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++  %} 
++  //ins_pipe(ialu_reg);
++%}
++
++// Rotate Right by 8-bit immediate
++instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
++  format %{ "rorl    #@rorI_rReg_i8" %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      int    rshift = $rshift$$constant;
++      int    lshift = $lshift$$constant;
++      __ srll(dst, rshift, rscratch2_AT);
++      __ slll(dst, lshift, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++  %} 
++  //ins_pipe(ialu_reg);
++%}
++// end of ROR expand
++
++// Rotate Right by one
++//instruct rorI_rReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift)
++//%{
++//  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
++//
++//  expand %{
++//    //rorI_rReg_imm1(dst, cr);
++//  %}
++//%}
++
++// Rotate Right by 8-bit immediate
++//instruct rorI_rReg_i8(rRegI dst, immI8 rshift, immI8 lshift)
++//%{
++//  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
++//  match(Set dst (OrI (URShiftI dst rshift) (LShiftI dst lshift)));
++//
++//  expand %{
++//    //rorI_rReg_imm8(dst, rshift, cr);
++//  %}
++//%}
++*/
++// Rotate Right by variable
++instruct rorI_rReg_Var_C0(rRegI dst, rRegI shift, immI0 zero)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
++  format %{ "rorw    $dst, $dst, 0 - $shift #@rorI_rReg_Var_C0" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ and_ins(shift, 0x1f, rscratch1_GP);
++      __ subw(R0, rscratch1_GP, rscratch1_GP);
++      __ rolw(dst, rscratch1_GP, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// Rotate Right by variable
++instruct rorI_rReg_Var_C32(rRegI dst, rRegI shift, immI_32 c32)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
++  format %{ "rorw    $dst, $dst, 32 - $shift #@rorI_rReg_Var_C32" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ and_ins(shift, 0x1f, rscratch1_GP);
++      __ subw(R0, rscratch1_GP, rscratch1_GP);
++      __ rolw(dst, rscratch1_GP, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++/*
++// for long rotate
++// ROL expand
++instruct rolL_rReg_imm1(rRegL dst) %{
++
++  format %{ 
++      "slll    $dst, 1, rscratch2_AT\t#long\t@rolL_rReg_imm1\n\t"
++      "srll    $dst, 63, rscratch1_GP\n\t"
++      "or_ins  rscratch2_AT, rscratch1_GP, $dst"
++      %}
++  
++  ins_encode %{
++      Register dst = $dst$$Register;
++      
++      __ slll(dst, 1, rscratch2_AT);
++      __ srll(dst, 63, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++      
++  %} 
++  //ins_pipe(ialu_reg);
++%}
++
++instruct rolL_rReg_CL(rRegL dst, rRegI shift) %{
++//  format %{ 
++//      "andw               $shift,       #0x1f,  $shift\t#long\t@rolL_rReg_CL\n\t" 
++//      "mov_immediate32    rscratch2_AT, #64\n\t" 
++//      "subw               rscratch2_AT, $shift, rscratch1_GP\n\t" 
++//      "mov_immediate64    rscratch2_AT, 0xffffffffffffffff\n\t" 
++//      "slll               rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t"
++//      "and_ins            $src,         rscratch2_AT, rscratch2_AT\n\t"
++//      "srll               rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t"
++//      "slll               $src,         $shift,       $dst\n\t"
++//      "or_ins             $dst,         rscratch2_AT, $dst"
++//      %}
++  format %{ "rolL_rReg_CL\t#@rolL_rReg_CL\n\t" %}
++  ins_encode %{
++      Register dst   = $dst$$Register;
++      Register shift = $shift$$Register;
++      
++      __ andw(shift, 0x1f, shift);
++      __ slll(dst, shift, rscratch2_AT);
++      __ mov_immediate32(rscratch1_GP, 64);
++      __ subw(rscratch1_GP, shift, rscratch1_GP);
++      __ srll(dst, rscratch1_GP, rscratch1_GP);
++      
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++  %} 
++  //ins_pipe(ialu_reg_reg);
++%}
++// end of ROL expand
++
++// Rotate Left by one
++instruct rolL_rReg_i1(rRegL dst, immI1 lshift, immI_M1 rshift)
++%{
++  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
++
++  expand %{
++    rolL_rReg_imm1(dst);
++  %}
++%}
++*/
++// Rotate Left by 8-bit immediate
++instruct rolL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift)
++%{
++  predicate((0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)) && UseSW8A);
++  match(Set dst (OrL (LShiftL dst lshift) (URShiftL dst rshift)));
++  format %{ "roll    $dst, $dst, $lshift #@rolL_rReg_i8" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    int    lshift = $lshift$$constant;
++    int    rshift = $rshift$$constant;
++    if(UseSW8A) {
++      __ roll(dst, lshift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++// Rotate Left by variable
++instruct rolL_rReg_Var_C0(rRegL dst, rRegI shift, immI0 zero)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI zero shift))));
++  format %{ "roll    $dst, $dst, $shift #@rolL_rReg_Var_C0" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ roll(dst, shift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++// Rotate Left by variable
++instruct rolL_rReg_Var_C64(rRegL dst, rRegI shift, immI_64 c64)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrL (LShiftL dst shift) (URShiftL dst (SubI c64 shift))));
++  format %{ "roll    $dst, $dst, $shift #@rolL_rReg_Var_C64" %}
++  ins_encode %{
++    Register   dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ roll(dst, shift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct rolAddL_rReg_i8(rRegL dst, immI8 lshift, immI8 rshift)
++%{
++  predicate((0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f)) && UseSW8A);
++  match(Set dst (AddL (LShiftL dst lshift) (URShiftL dst rshift)));
++  format %{ "roll    $dst, $dst, $lshift #@rolAddL_rReg_i8" %}
++  ins_encode %{
++    Register  dst = $dst$$Register;
++    int    lshift = $lshift$$constant;
++    int    rshift = $rshift$$constant;
++    if(UseSW8A) {
++      __ roll(dst, lshift, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++/*
++// ROR expand
++instruct rorL_rReg_imm1(rRegL dst)
++%{
++  format %{ 
++      "srll    $dst, #1, rscratch2_AT\t#@rorL_rReg_imm1\n\t"
++      "slll    $dst, #63, rscratch1_GP\n\t"
++      "or_ins  rscratch2_AT, rscratch1_GP, $dst"
++      %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      
++      __ srll(dst, 1, rscratch2_AT);
++      __ slll(dst, 63, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++  %} 
++  //ins_pipe(ialu_reg);
++%}
++*/
++/* The following two methods cannot be implemented since there are no match rules*/
++/*
++instruct rorL_rReg_imm8(rRegL dst, immI8 shift)
++%{
++
++  format %{ "rorq    $dst, $shift" %}
++  
++  ins_encode%{
++      Register dst = $dst$$Register;
++      int    shift = $rshift$$constant;
++      
++      __ srll(dst, shift, rscratch2_AT);
++      __ slll(dst, 64-shift, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++  %}
++  //ins_pipe(ialu_reg);
++%}
++
++instruct rorL_rReg_CL(rRegL dst, rRegI shift)
++%{
++
++  format %{ "rorq    $dst, $shift" %}
++  
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register shift = $shift$$Register;
++
++      __ srll(dst, shift, rscratch2_AT);
++      __ mov_immediate32(rscratch1_GP, 64);
++      __ subw(rscratch1_GP, shift, rscratch1_GP);
++      __ slll(dst, rscratch1_GP, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++  %}  
++  //ins_pipe(ialu_reg_reg);
++%}
++*/
++/*
++// end of ROR expand
++
++// Rotate Right by one
++instruct rorL_rReg_i1(rRegL dst, immI1 rshift, immI_M1 lshift)
++%{
++  match(Set dst (OrL (URShiftL dst rshift) (LShiftL dst lshift)));
++
++  expand %{
++    rorL_rReg_imm1(dst);
++  %}
++%}
++
++// Rotate Right by 8-bit immediate
++instruct rorL_rReg_i8(rRegL dst, rRegL src, immI8 rshift, immI8 lshift)
++%{
++  predicate(0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x3f));
++  match(Set dst (OrL (URShiftL src rshift) (LShiftL src lshift)));
++//instruct rorL_rReg_imm8(rRegL dst, rRegL src, immI8 shift)
++//%{
++  format %{ 
++      "srll    $dst, $rshift, rscratch2_AT\t#@rorL_rReg_i8\n\t"
++      "slll    $dst, 64-rshift, rscratch1_GP\n\t"
++      "or_ins  rscratch2_AT, rscratch1_GP, $dst"
++      %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      int    shift = $rshift$$constant;
++      
++      __ srll(dst, shift, rscratch2_AT);
++      __ slll(dst, 64-shift, rscratch1_GP);
++      __ or_ins(rscratch2_AT, rscratch1_GP, dst);
++  %} 
++  //ins_pipe(ialu_reg);
++%}
++*/
++// Rotate Right by variable
++instruct rorL_rReg_Var_C0(rRegL dst, rRegI shift, immI0 zero)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI zero shift))));
++  format %{ "rorl    $dst, $dst, 0 - $shift #@rorL_rReg_Var_C0" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ and_ins(shift, 0x3f, rscratch1_GP);
++      __ subw(R0, rscratch1_GP, rscratch1_GP);
++      __ roll(dst, rscratch1_GP, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++instruct rorL_rReg_Var_C64(rRegL dst, rRegI shift, immI_64 c64)
++%{
++  predicate(UseSW8A);
++  match(Set dst (OrL (URShiftL dst shift) (LShiftL dst (SubI c64 shift))));
++  format %{ "rorl    $dst, $dst, 64 - $shift #@rorL_rReg_Var_C64" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register shift = $shift$$Register;
++    if(UseSW8A) {
++      __ and_ins(shift, 0x3f, rscratch1_GP);
++      __ subw(R0, rscratch1_GP, rscratch1_GP);
++      __ roll(dst, rscratch1_GP, dst);
++    } else {
++      Unimplemented();
++    }
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++
++// Logical Instructions
++
++// Integer Logical Instructions
++
++// And Instructions
++// And Register with Register
++instruct andI_rReg(rRegI dst, rRegI src1, rRegI src2)
++%{
++  match(Set dst (AndI src1 src2));
++
++  format %{ 
++    "and_ins  $src1, $src2, $dst\t# int @andI_rReg"
++  %}
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register src1 = $src1$$Register;
++      Register src2 = $src2$$Register;
++     __ and_ins(src1, src2, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/* sw8
++instruct andI_Reg_Reg(rRegI dst, rRegI src1,  rRegI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and    $dst, $src1, $src2 #@andI_Reg_Reg" %}
++  ins_encode %{
++//    Register dst = $dst$$Register;
++//    Register src1 = $src1$$Register;
++//    Register src2 = $src2$$Register;
++//    __ andr(dst, src1, src2);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++/* TODO no in jdk8
++// And Register with Immediate 255
++instruct andI_rReg_imm255(rRegI dst, rRegI src1, immI_255 src2)
++%{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and_ins    $src1, #255, $dst\t# int & 0xFF @andI_rReg_imm255"%}
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register src1 = $src1$$Register;
++      __ and_ins(src1, 255, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Register with Immediate 255 and promote to long
++instruct andI2L_rReg_imm255(rRegI dst, rRegI src, immI_255 mask)
++%{
++  match(Set dst (ConvI2L (AndI src mask)));
++
++  format %{ "and_ins $src, #255, $dst\t# int & 0xFF -> long @andI2L_rReg_imm255"%}
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register  src = $src$$Register;
++      __ and_ins(src, 255, dst);
++//      __ sextb(src, dst);  //TODO CHECK LSP I2L signed extend 
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++// And Register with Immediate 65535
++instruct andI_rReg_imm65535(rRegI dst, rRegI src1, immI_65535 src2)
++%{
++  match(Set dst (AndI src1 src2));
++  ins_cost(40);
++  format %{ "zapnot  $src1, #3, $dst\t# int & 0xFFFF @andI_rReg_imm65535"%}
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register  src = $src1$$Register;
++      __ zapnot(src, 0x3, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++/* TODO no in jdk8 
++// And Register with Immediate 65535 and promote to long
++instruct andI2L_rReg_imm65535(rRegI dst, rRegI src, immI_65535 mask)
++%{
++  match(Set dst (ConvI2L (AndI src mask)));
++
++  format %{ "zapnot  $src, #3, $dst\t# int & 0xFFFF -> long @andI2L_rReg_imm65535"%}
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register  src = $src$$Register;
++      __ zapnot(src, 0x3, dst);
++//      __ sexth(dst, dst); // TODO CHECK lsp I2L signed extend?
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++// And Register with Immediate
++instruct andI_rReg_imm(rRegI dst, rRegI src1,  immU8 src2)
++%{
++  match(Set dst (AndI src1 src2));
++  ins_cost(60);
++  format %{ "andw  $src1, $src2, $dst\t# int @andI_rReg_imm"%}
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register  src = $src1$$Register;
++      int       val = $src2$$constant;
++    __ and_ins(src, val, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/*
++instruct andI_Reg_immI(rRegI dst, rRegI src1,  immI src2) %{
++  match(Set dst (AndI src1 src2));
++
++  format %{ "and  $dst, $src1, $src2 #@andI_Reg_immI" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    int      val = $src2$$constant;
++
++    __ mov_immediate32(AT, val);
++    __ and_ins(src, AT, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++
++/* memory operands no need for SW64
++// And Register with Memory
++instruct andI_rReg_mem(rRegI dst, memory src1, rRegI src2)
++%{
++  match(Set dst (AndI src2 (LoadI src1)));
++
++//  ins_cost(125);//todo
++  format %{ "ldw    rscratch3, $src1\t# int @andI_rReg_mem\n\t"
++            "andw     rscratch3, $src2, $dst" %}
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Address  src1 = $src1$$Address;
++      Register src2 = $src2$$Register;
++      __ ldw(rscratch3, src1);  
++      __ andw(rscratch3, src2, dst);
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}
++
++// And Memory with Register
++instruct andI_mem_rReg(memory dst, rRegI src)
++%{
++  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
++
++  ins_cost(150);//todo
++  format %{ "ldw    rscratch3, $dst\t# int @andI_mem_rReg\n\t"
++            "and_ins     rscratch3, $src, rscratch2_AT\n\t" 
++            "stw    rscratch3, $dst" %}
++  ins_encode %{
++      Address   dst = $dst$$Address;
++      Register  src = $src$$Register;
++       __ ldw(rscratch3, dst);
++       __ and_ins(src, rscratch3, rscratch3);
++       __ stw(rscratch3, dst);
++      
++  %}
++//  ins_pipe(ialu_mem_reg);
++%} 
++
++// And Memory with Immediate
++instruct andI_mem_imm(memory dst, immI src)
++%{
++  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
++
++  ins_cost(125);//todo
++  format %{ "ldw    rscratch2_AT, $dst\t# int @andI_mem_imm\n\t"
++            "movws     rscratch2_AT, $src, rscratch2_AT\n\t" 
++            "and_ins     rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t" 
++            "stw    rscratch2_AT, $dst" %}
++  ins_encode %{
++      Address   dst = $dst$$Address;
++      int       val = $src$$constant;
++       __ ldw(rscratch1_GP, dst);
++       __ movws(rscratch2_AT, val);
++       __ and_ins(rscratch2_AT, rscratch1_GP, rscratch2_AT);
++       __ stw(rscratch2_AT, dst, rscratch1_GP);
++      
++  %}
++//  ins_pipe(ialu_mem_imm);
++%}
++
++// BMI1 instructions 
++instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1) %{
++  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2)));
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ "ldw   rscratch2_AT, $src2\t# @andnI_rReg_rReg_mem\n\t"
++            "ornot   R0, $src1, rscratch1_GP\n\t" 
++            "andw    rscratch1_GP, rscratch2_AT, $dst" %}
++
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register  src1 = $src1$$Register;
++      Address   src2 = $src2$$Address;
++      int        val = $minus_1$$constant;
++      __ ldw(rscratch2_AT, src2); 
++      __ ornot(R0, src1, rscratch1_GP);
++      __ andw(rscratch1_GP, rscratch2_AT, dst);
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}*/
++/*no need in swjdk8 
++instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1) %{
++  match(Set dst (AndI (XorI src1 minus_1) src2));
++//  predicate(UseBMI1Instructions);
++
++  format %{ "ornot  R0, $src1, rscratch3 \t# @andnI_rReg_rReg_rReg\n\t"
++            "andw   rscratch3, $src2, $dst"
++  %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src1 = $src1$$Register;
++      Register src2 = $src2$$Register;
++      
++      __ ornot(R0, src1, rscratch3);
++      __ andw(rscratch3, src2, dst);      
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++// TODO CHECK lsp: instruct name blsiI needed to be changed??
++instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero) %{
++  match(Set dst (AndI (SubI imm_zero src) src));
++//  predicate(UseBMI1Instructions);
++
++  format %{ "subw  R0, $src, rscratch3\t# @blsiI_rReg_rReg\n\t"
++            "andw  rscratch3, $src, $dst"%}
++
++  ins_encode %{
++//    __ blsil($dst$$Register, $src$$Register);
++    __ subw(R0, $src$$Register, rscratch3);
++    __ andw(rscratch3, $src$$Register, $dst$$Register);
++  %}
++//  ins_pipe(ialu_reg);
++%}*/
++/* memory operands no need for SW64
++instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero) %{
++  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ "ldw  rscratch2_AT, $src\t# @blsiI_rReg_mem\n\t"
++            "subw  R0, rscratch2_AT, rscratch1_GP\n\t"
++            "andw  rscratch1_GP, rscratch2_AT, $dst"%}
++
++  ins_encode %{
++//    __ blsil($dst$$Register, $src$$Address);
++    __ ldw(rscratch2_AT, $src$$Address);
++    __ subw(R0, rscratch2_AT, rscratch1_GP);
++    __ andw(rscratch1_GP, rscratch2_AT, $dst$$Register);
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1)
++%{
++  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ) );
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ "ldw  rscratch2_AT, $src\t# @blsmskI_rReg_mem\n\t"
++            "subw  rscratch2_AT, #1, rscratch1_GP\n\t"
++            "xorw  rscratch1_GP, rscratch2_AT, $dst"%}
++
++  ins_encode %{
++//    __ blsmskl($dst$$Register, $src$$Address);
++    __ ldw(rscratch2_AT, $src$$Address);
++    __ subw(rscratch2_AT, 1, rscratch1_GP);
++    __ xorw(rscratch1_GP, rscratch2_AT, $dst$$Register);
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}*/
++
++/* no need in swjdk8 
++instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1)
++%{
++  match(Set dst (XorI (AddI src minus_1) src));
++//  predicate(UseBMI1Instructions);
++
++  format %{ "subw  $src, #1, rscratch3\t# @blsmskI_rReg_rReg\n\t"
++            "xorw  rscratch3, $src, $dst"%}
++
++  ins_encode %{
++//    __ blsmskl($dst$$Register, $src$$Register);
++    __ subw($src$$Register, 1, rscratch3);
++    __ xorw(rscratch3, $src$$Register, $dst$$Register);
++  %}
++
++//  ins_pipe(ialu_reg);
++%}
++
++instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1)
++%{
++  match(Set dst (AndI (AddI src minus_1) src) );
++//  predicate(UseBMI1Instructions);
++
++  format %{ "subw  $src, #1, rscratch3\t# @blsrI_rReg_rReg\n\t"
++            "andw  rscratch3, $src, $dst"%}
++
++  ins_encode %{
++//    __ blsrl($dst$$Register, $src$$Register);
++    __ subw($src$$Register, 1, rscratch3);
++    __ andw(rscratch3, $src$$Register, $dst$$Register);
++  %}
++
++//  ins_pipe(ialu_reg_mem);
++%}
++*/
++/* memory operands no need for SW64
++instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1)
++%{
++  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ) );
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ "ldw  rscratch2_AT, $src\t# @blsmskI_rReg_mem\n\t"
++            "subw  rscratch2_AT, #1, rscratch1_GP\n\t"
++            "andw  rscratch2_AT, rscratch1_GP, $dst"%}
++
++  ins_encode %{
++//    __ blsrl($dst$$Register, $src$$Address);
++    __ ldw(rscratch2_AT, $src$$Address); 
++    __ subw(rscratch2_AT, 1, rscratch1_GP);
++    __ andw(rscratch2_AT, rscratch1_GP, $dst$$Register);
++  %}
++
++//  ins_pipe(ialu_reg);
++%}*/
++
++// Or Instructions
++// Or Register with Register
++instruct orI_rReg(rRegI dst, rRegI src1, rRegI src2)
++%{
++  match(Set dst (OrI src1 src2));
++
++  format %{ 
++    "bis     $src1, $src2, $dst\t# int @orI_rReg"
++  %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    //__ stop("orI_rReg");//TODO
++    __ bis(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++
++// Or Register with Immediate
++instruct orI_rReg_imm(rRegI dst, rRegI src1, immU8 src2)
++%{
++  match(Set dst (OrI src1 src2));
++  ins_cost(60);
++  format %{ "bis     $src1, $src2, $dst\t# int @orI_rReg_imm" %} 
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int      src2 = $src2$$constant;
++    //__ stop("orI_rReg_imm");//TODO
++    __ bis(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}
++
++/* memory operands no need for SW64
++// Or Register with Memory
++instruct orI_rReg_mem(rRegI dst, memory src1, rRegI src2)
++%{
++  match(Set dst (OrI src2 (LoadI src1)));
++
++  ins_cost(125);
++  format %{ 
++      "ldw     rscratch2_AT,    $src1\t# int @orI_rReg_mem\n\t"
++      "orw       $src2, rscratch2_AT, $dst"
++      %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Address  src1 = $src1$$Address;
++    Register src2 = $src2$$Register;
++    __ ldw(rscratch2_AT, src1); 
++    __ orw(src2, rscratch2_AT, dst);
++  %}
++  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}
++
++// Or Memory with Register
++instruct orI_mem_rReg(memory dst, rRegI src)
++%{
++  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
++
++  ins_cost(150);
++  format %{ 
++      "ldw     rscratch2_AT, $dst\t# int @orI_mem_rReg\n\t"
++      "orw       $src, rscratch2_AT, rscratch2_AT\n\t"
++      "stw     rscratch2_AT, $dst"
++  %}
++  ins_encode %{
++    Address dst = $dst$$Address;
++    Register src = $src$$Register;
++    __ ldw(rscratch2_AT, dst);
++    __ bis(rscratch2_AT, src, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %}
++  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}
++
++// Or Memory with Immediate
++instruct orI_mem_imm(memory dst, immI src)
++%{
++  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
++
++  ins_cost(125);
++  format %{ 
++      "ldw    rscratch2_AT, $dst\t# int @orI_mem_imm\n\t"
++      "movws       rscratch1_GP, $src\n\t"
++      "bis       rscratch2_AT, $src, rscratch2_AT\n\t"
++      "stw     rscratch2_AT, $dst"
++      %}
++  ins_encode %{
++    Address dst = $dst$$Address;
++    int     src = $src$$constant;
++    __ ldw(rscratch2_AT, dst);
++    __ movws(rscratch1_GP, src);
++    __ bis(rscratch2_AT, rscratch1_GP, rscratch2_AT);
++    __ stw(rscratch2_AT, dst, rscratch1_GP);
++  %}
++  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}*/
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorI_rReg(rRegI dst, rRegI src1, rRegI src2)
++%{
++  match(Set dst (XorI src1 src2));
++//  ins_cost(60);
++
++  format %{ 
++    "xor_ins  $src1, $src2, $dst\t# int @xorI_rReg"
++  %}
++  ins_encode %{
++    //__ stop("xorI_rReg"); TODO:jzy
++    __ xor_ins($src1$$Register, $src2$$Register, $dst$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++/* TODO no in jdk8
++// Xor Register with Immediate -1
++instruct xorI_rReg_im1(rRegI dst, rRegI src, immI_M1 imm) %{
++  match(Set dst (XorI src imm));
++  ins_cost(60);
++  
++  format %{ "ornot  R0, $src, $dst\t# int @xorI_rReg_im1\n\t"
++            "zapnot  $dst, 0xf, $dst"%}
++  ins_encode %{
++     __ ornot(R0, $src$$Register, $dst$$Register);
++//     __ zapnot($dst$$Register, 0xf, $dst$$Register);//??
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++// Xor Register with Immediate
++instruct xorI_rReg_imm(rRegI dst, rRegI src1, immU8 src2)
++%{
++  match(Set dst (XorI src1 src2));
++  ins_cost(60);
++
++  format %{ "xor_ins  $src1, $src2, $dst\t# int @xorI_rReg_imm\n\t" %}
++  ins_encode %{
++     //__ stop("xorI_rReg_imm"); TODO:jzy
++    __ xor_ins($src1$$Register, $src2$$constant, $dst$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/* memory operands no need for SW64
++// Xor Register with Memory
++instruct xorI_rReg_mem(rRegI dst, rRegI src1, memory src2)
++%{
++  match(Set dst (XorI src1 (LoadI src2)));
++  ins_cost(125);
++
++  format %{ "ldw  rscratch2_AT, $src2\t# int @xorI_rReg_mem\n\t" 
++            "xorw  $src1, rscratch2_AT, $dst"%}
++  ins_encode %{
++       __ ldw(rscratch2_AT, $src2$$Address); 
++       __ xorw($src1$$Register, rscratch2_AT, $dst$$Register);
++  %}
++//  ins_pipe( ialu_reg_mem );
++%}
++
++// Xor Memory with Register
++instruct xorI_mem_rReg(memory dst, rRegI src)
++%{
++  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
++  ins_cost(150);
++
++  format %{ "ldw rscratch2_AT, $dst\t# int @xorI_rReg_mem\n\t"
++            "xor_ins  rscratch2_AT, $src, rscratch2_AT\n\t"
++            "stw  rscratch2_AT, $dst"%}
++  ins_encode %{
++       __ ldw(rscratch2_AT, $dst$$Address);
++       __ xor_ins(rscratch2_AT, $src$$Register, rscratch2_AT);
++       __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe( ialu_reg_mem );
++%}
++
++// Xor Memory with Immediate
++instruct xorI_mem_imm(memory dst, immI src)
++%{
++  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
++  ins_cost(125);
++
++  format %{ "ldw  rscratch2_AT, $dst\t# int @xorI_mem_imm\n\t"
++            "movws  rscratch1_GP, $src\n\t"
++            "xor_ins  rscratch2_AT, $src, rscratch2_AT\n\t"
++            "stw  rscratch2_AT, $dst"%}
++  ins_encode %{
++       __ ldw(rscratch2_AT, $dst$$Address);
++       __ movws(rscratch1_GP, (int)$src$$constant);
++       __ xor_ins(rscratch2_AT, rscratch1_GP, rscratch2_AT);
++       __ stw(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_mem_imm);
++%}
++ */
++
++
++// Long Logical Instructions
++
++// And Instructions
++// And Register with Register
++instruct andL_rReg(rRegL dst, rRegL src1, rRegL src2)
++%{
++  match(Set dst (AndL src1 src2));
++  ins_cost(100);
++  format %{ "and_ins    $src1, $src2, $dst\t# long @andL_rReg" %}
++  ins_encode %{
++      Register dst  = $dst$$Register;
++      Register src1 = $src1$$Register;
++      Register src2 = $src2$$Register;
++      
++      __ and_ins(src1, src2, dst);
++  %}    
++  ins_pipe( ialu_regL_regL );
++%}
++/* TODO no in jdk8
++// And Register with Immediate 255
++instruct andL_rReg_imm255(rRegL dst, rRegL src1, immL_255 src2)
++%{
++  match(Set dst (AndL src1 src2));
++
++  format %{ "and_ins  $src1, #255, $dst\t# long & 0xFF @andL_rReg_imm_255" %}
++  ins_encode %{
++      Register dst  = $dst$$Register;
++      Register src1 = $src1$$Register;
++      __ and_ins(src1, 255, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Register with Immediate 65535
++instruct andL_rReg_imm65535(rRegL dst, rRegL src1, immL_65535 src2)
++%{
++  match(Set dst (AndL src1 src2));
++
++  format %{ "zapnot $src1, 0x3, $dst\t# long & 0xFFFF @andL_rReg_imm65535"%}
++  ins_encode %{
++      Register dst  = $dst$$Register;
++      Register src1 = $src1$$Register;
++      __ zapnot(src1, 0x3, dst);//TODO CHECK LSP
++  %}
++  ins_pipe(ialu_regI_imm16);
++%}
++
++// And Register with Immediate
++instruct andL_rReg_imm(rRegL dst, rRegL src1, immL32 src2)
++%{
++  match(Set dst (AndL src1 src2));
++
++  format %{ "movws $dst, $src2\t# long @andL_rReg_imm\n\t"
++            "and_ins    $dst, $src1, $dst"%}
++  ins_encode %{
++      Register dst  = $dst$$Register;
++      Register src1 = $src1$$Register;
++      int      value = $src2$$constant;
++      __ movws(dst, value);
++      __ and_ins(dst, src1, dst);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++*/
++instruct andL_Reg_Reg_convI2L(rRegL dst, rRegL src1, rRegI src2) %{
++  match(Set dst (AndL src1 (ConvI2L src2)));
++  format %{ "AND    $dst, $src1, $src2 @ andL_Reg_Reg_convI2L\n\t" %}
++  ins_encode %{
++    Register dst_reg = $dst$$Register;
++    Register src1_reg = $src1$$Register;
++    Register src2_reg = $src2$$Register;
++//    __ stop("andL_Reg_Reg_convI2L  copy from jdk8 why src2 do not signed extend lsp");
++ 
++    __ and_ins(src1_reg, src2_reg, dst_reg);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++instruct andL_Reg_imm_0_255(rRegL dst, rRegL src1,  immUL8 src2) %{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_255" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ and_ins(src, val, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++// And Register with Immediate 65535
++instruct andL_rReg_imm65535(rRegL dst, rRegL src1, immL_65535 src2)
++%{
++  match(Set dst (AndL src1 src2));
++  ins_cost(60);
++  format %{ "zapnot $src1, 0x3, $dst\t# long & 0xFFFF @andL_rReg_imm65535"%}
++  ins_encode %{
++      Register dst  = $dst$$Register;
++      Register src1 = $src1$$Register;
++      __ zapnot(src1, 0x3, dst);//TODO CHECK LSP
++  %}
++  ins_pipe(ialu_regI_imm16);
++%}
++instruct andL2I_Reg_imm_0_255(rRegI dst, rRegL src1,  immUL8 src2) %{
++  match(Set dst (ConvL2I (AndL src1 src2)));
++  ins_cost(60);
++
++  format %{ "and  $dst, $src1, $src2 #@andL2I_Reg_imm_0_255" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src1$$Register;
++    long     val = $src2$$constant;
++
++    __ and_ins(src, val, dst);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/* memory operands no need for SW64
++// And Register with Memory
++instruct andL_rReg_mem(rRegL dst, memory src1, rRegL src2)
++%{
++  match(Set dst (AndL src2 (LoadL src1)));
++
++  ins_cost(125);//todo
++  format %{ 
++      "ldl   rscratch2_AT, $src1\t# long @andL_rReg_mem\n\t" 
++      "and_ins   rscratch2_AT, $src2, rscratch2_AT\n\t" 
++      "stl    rscratch2_AT, $dst" 
++      %}
++  ins_encode %{
++      Register dst  = $dst$$Register;
++      Address src1 = $src1$$Address;
++      Register src2  = $src2$$Register;
++      __ ldl(rscratch2_AT, src1);
++      __ and_ins(rscratch2_AT, src2, rscratch2_AT);
++      __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}
++
++// And Memory with Immediate
++instruct andL_mem_imm(memory dst, immL32 src)
++%{
++  match(Set dst (StoreL dst (AndL (LoadL dst) src)));
++
++  ins_cost(125);//todo
++  format %{ "ldl   rscratch2_AT, $dst\t# long @andL_mem_imm\n\t"
++            "movws   rscratch1_GP, $src\n\t" 
++            "and_ins   rscratch2_AT, $src, rscratch2_AT\n\t" 
++            "stl    rscratch2_AT, $dst" %}
++  ins_encode %{
++      Address   dst = $dst$$Address;
++      int       val = $src$$constant;
++      __ ldl(rscratch2_AT, dst);
++      __ movws(rscratch1_GP, val);
++      __ and_ins(rscratch1_GP, rscratch2_AT, rscratch2_AT);
++      __ stl(rscratch2_AT, dst);
++  %}
++//  ins_pipe(ialu_mem_imm);
++%}
++ 
++
++// BMI1 instructions
++instruct andnL_rReg_rReg_mem(rRegL dst, rRegL src1, memory src2, immL_M1 minus_1) %{
++  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2)));
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ "ldl   rscratch2_AT, $src2\t# @andL_mem_rReg\n\t"
++            "ornot   R0, $src1, rscratch3\n\t" 
++            "and_ins  rscratch3, rscratch2_AT, $dst" %}
++
++  ins_encode %{
++      Register  dst = $dst$$Register;
++      Register  src1 = $src1$$Register;
++      Address   src2 = $src2$$Address;
++      __ ldl(rscratch2_AT, src2);
++      __ ornot(R0, src1, rscratch3);
++      __ and_ins(rscratch3, rscratch2_AT, dst);
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}*/
++/* TODO no in jdk8
++instruct andnL_rReg_rReg_rReg(rRegL dst, rRegL src1, rRegL src2, immL_M1 minus_1) %{
++  match(Set dst (AndL (XorL src1 minus_1) src2));
++//  predicate(UseBMI1Instructions);
++
++  format %{ 
++      "ornot  R0, $src1, rscratch3\t# @andnL_rReg_rReg_rReg\n\t" 
++      "andptr  rscratch3, $src2, $dst"
++      %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src1 = $dst$$Register;
++      Register src2 = $dst$$Register;
++      __ ornot(R0, src1, rscratch3);
++      __ andptr(rscratch3, src2, dst);
++  %}
++  ins_pipe(ialu_regL_regL);
++%}
++*/
++/*no need in swjdk8
++instruct blsiL_rReg_rReg(rRegL dst, rRegL src, immL0 imm_zero) %{
++  match(Set dst (AndL (SubL imm_zero src) src));
++//  predicate(UseBMI1Instructions);
++
++  format %{ 
++      "subptr  R0, $src, rscratch3\t# @blsiL_rReg_rReg\n\t"
++      "andptr  rscratch3, $src, $dst"
++      %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      __ subptr(R0, src, rscratch3);
++      __ andptr(rscratch3, src, dst);      
++  %}
++  ins_pipe(ialu_regI_regI);
++%}*/
++/* memory operands no need for SW64
++instruct blsiL_rReg_mem(rRegL dst, memory src, immL0 imm_zero) %{
++  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ 
++      "ldl   rscratch2_AT, $src\t# @blsiL_rReg_rReg\n\t" 
++      "subl  R0, rscratch2_AT, rscratch3\n\t" 
++      "and_ins  rscratch3, rscratch2_AT, $dst" 
++      %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Address src = $src$$Address;
++      __ ldl(rscratch2_AT, src);
++      __ subl(R0, rscratch2_AT, rscratch3);
++      __ and_ins(rscratch3, rscratch2_AT, dst);      
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}
++
++instruct blsmskL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1)
++%{
++  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ) );
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ 
++      "ldl   rscratch2_AT, $src\t# @blsmskL_Reg_mem\n\t" 
++      "subl  rscratch2_AT, #1, rscratch1_GP\n\t" 
++      "xor_ins  rscratch2_AT, rscratch1_GP, $dst" 
++      %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Address  src = $src$$Address;
++      __ ldl(rscratch2_AT, src);
++      __ subl(rscratch2_AT, 1, rscratch1_GP);
++      __ xor_ins(rscratch2_AT, rscratch1_GP, dst);
++  %}
++//  ins_pipe(ialu_reg_mem);
++%}*/
++/*no need in swjdk8
++instruct blsmskL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1)
++%{
++  match(Set dst (XorL (AddL src minus_1) src));
++//  predicate(UseBMI1Instructions);
++
++  format %{ "subl  $src, #1, rscratch3\t# @blsmskL_rReg_rReg\n\t"
++            "xor_ins  rscratch3, $src, $dst" %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      __ subl(src, 1, rscratch3);
++      __ xor_ins(rscratch3, src, dst);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct blsrL_rReg_rReg(rRegL dst, rRegL src, immL_M1 minus_1)
++%{
++  match(Set dst (AndL (AddL src minus_1) src) );
++//  predicate(UseBMI1Instructions);
++
++  format %{ "subl  $src, #1, rscratch3\t# @blsrL_Reg_Reg\n\t"
++            "and_ins  rscratch3, $src, $dst"  %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Register src = $src$$Register;
++      
++      __ subl(src, 1, rscratch3);
++      __ and_ins(rscratch3, src, dst);
++  %}
++
++  ins_pipe(ialu_regI_regI);
++%}*/
++/* memory operands no need for SW64
++instruct blsrL_rReg_mem(rRegL dst, memory src, immL_M1 minus_1)
++%{
++  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src)) );
++//  predicate(UseBMI1Instructions);
++
++  ins_cost(125);//todo
++  format %{ "ldl   rscratch2_AT, $src\t# @blsrL_rReg_mem\n\t" 
++            "subl  rscratch2_AT, #1, rscratch1_GP\n\t" 
++            "and_ins rscratch2_AT, rscratch1_GP, $dst"  %}
++
++  ins_encode %{
++      Register dst = $dst$$Register;
++      Address  src = $src$$Address;
++      __ ldl(rscratch2_AT, src);
++      __ subl(rscratch2_AT, 1, rscratch1_GP);
++      __ and_ins(rscratch2_AT, rscratch1_GP, dst);
++  %}
++
++//  ins_pipe(ialu_reg);
++%}
++*/
++// Or Instructions
++// Or Register with Register
++instruct orL_rReg(rRegL dst, rRegL src1, rRegL src2)
++%{
++  match(Set dst (OrL src1 src2));
++
++  format %{ "bis   $src1, $src2, $dst\t# @orL_rReg\n\t" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ bis(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}
++
++
++// Use any_RegP to match R15 (TLS register) without spilling.
++instruct orL_rReg_castP2X(rRegL dst, any_RegP src1, rRegL src2) %{
++  match(Set dst (OrL src2 (CastP2X src1)));
++
++  format %{ "bis   $src1, $src2, $dst\t# @orL_rReg_castP2X\n\t" %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    Register src2 = $src2$$Register;
++    __ bis(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regL_regL);//ins_pipe needs to be changed to a proper one
++%}
++
++
++// Or Register with Immediate
++instruct orL_rReg_imm(rRegL dst, rRegL src1, immU8 src2)
++%{
++  match(Set dst (OrL src1 src2));
++  ins_cost(80);
++  format %{
++"movws   $dst, $src2, $dst\t# @orL_rReg_imm\n\t"
++"bis $src1, $dst, $dst"
++  %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Register src1 = $src1$$Register;
++    int      src2 = $src2$$constant;
++    __ bis(src1, src2, dst);
++  %}
++  ins_pipe(ialu_regL_regL);//ins_pipe needs to be changed to a proper one
++%}
++
++/* memory operands no need for SW64
++// Or Register with Memory
++instruct orL_rReg_mem(rRegL dst, memory src1, rRegL src2)
++%{
++  match(Set dst (OrL src2 (LoadL src1)));
++
++  ins_cost(125);
++  format %{ 
++      "ldptr rscratch2_AT, $src1\t# @orL_rReg_mem\n\t"
++      "bis   $src2, rscratch2_AT, $dst" 
++    %}
++  ins_encode %{
++    Register dst  = $dst$$Register;
++    Address  src1 = $src1$$Address;
++    Register src2  = $src2$$Register;
++    __ ldptr(rscratch2_AT, src1);
++    __ bis(src2, rscratch2_AT, dst);
++  %}    
++//  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}
++
++
++// Or Memory with Register
++instruct orL_mem_rReg(memory dst, rRegL src)
++%{
++  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
++  ins_cost(150);
++  format %{ 
++      "ldl rscratch2_AT, $dst\t# @orL_mem_rReg\n\t"
++      "bis   rscratch2_AT, $src, rscratch2_AT\n\t"
++      "stl rscratch2_AT, $dst"
++      %}
++  ins_encode%{
++    Address dst  = $dst$$Address;
++    Register src  = $src$$Register;
++    __ ldl(rscratch2_AT, dst);
++    __ bis(rscratch2_AT, src, rscratch2_AT);
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}
++
++// Or Memory with Immediate
++instruct orL_mem_imm(memory dst, immL32 src)
++%{
++  match(Set dst (StoreL dst (OrL (LoadL dst) src)));
++
++  ins_cost(125);
++  format %{ 
++      "ldl rscratch2_AT, $dst\t# @orL_mem_imm\n\t"
++      "movws rscratch1_GP, $src\n\t"
++      "bis rscratch2_AT, rscratch1_GP, rscratch2_AT\n\t"
++      "stl rscratch2_AT, $dst"      
++      %}
++  ins_encode %{
++    Address dst  = $dst$$Address;
++    int     src  = $src$$constant;
++    __ ldl(rscratch2_AT, dst);
++    __ movws(rscratch1_GP, src);
++    __ bis(rscratch2_AT, rscratch1_GP, rscratch2_AT);
++    __ stl(rscratch2_AT, dst, rscratch1_GP);
++  %}
++//  ins_pipe(ialu_regI_regI);//ins_pipe needs to be changed to a proper one
++%}
++ */
++
++
++// Xor Instructions
++// Xor Register with Register
++instruct xorL_rReg(rRegL dst, rRegL src1, rRegL src2)
++%{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xorptr  $src1, $src2, $dst\t# long @xorL_rReg" %}
++  ins_encode %{
++     __ xorptr($src1$$Register, $src2$$Register, $dst$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++/* TODO no in jdk8
++// Xor Register with Immediate -1
++instruct xorL_rReg_im1(rRegL dst, rRegL src, immL_M1 imm) %{
++  match(Set dst (XorL src imm));
++  ins_cost(60);
++  
++  format %{ "ornot  R0, $src, $dst\t# long @xorL_rReg_im1" %}
++  ins_encode %{
++     __ ornot(R0, $src$$Register, $dst$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++*/
++// Xor Register with Immediate
++instruct xorL_rReg_imm(rRegL dst, rRegL src1, immU8 src2)
++%{
++  match(Set dst (XorL src1 src2));
++  ins_cost(60);
++
++  format %{ "xor_ins  $src1, $src2, $dst\t# long @xorL_rReg_imm\n"  %}
++  ins_encode %{
++       __ xor_ins($src1$$Register, $src2$$constant, $dst$$Register);
++  %}
++  ins_pipe( ialu_regI_regI );
++%}
++
++/* memory operands no need for SW64
++// Xor Register with Memory
++instruct xorL_rReg_mem(rRegL dst, rRegL src1, memory src2)
++%{
++  match(Set dst (XorL src1 (LoadL src2)));
++  ins_cost(125);
++
++  format %{ "ldl  rscratch2_AT, $src2\t# long @xorL_rReg_mem\n\t"
++            "xor_ins $src1, rscratch2_AT, $dst" %}
++  ins_encode %{
++       __ ldl(rscratch2_AT, $src2$$Address);
++       __ xor_ins($src1$$Register, rscratch2_AT, $dst$$Register); 
++  %}
++//  ins_pipe( ialu_reg_mem );
++%}
++
++// Xor Memory with Register
++instruct xorL_mem_rReg(memory dst, rRegL src)
++%{
++  match(Set dst (StoreL dst (XorL (LoadL dst) src)));
++  ins_cost(150);
++  
++  format %{ "ldl  rscratch2_AT, $src\t# long @xorL_mem_rReg\n\t"
++            "xor_ins $src, rscratch2_AT, $dst\n\t"
++            "stl  rscratch2_AT, $dst"%}
++  ins_encode %{
++       __ ldl(rscratch2_AT, $dst$$Address);
++       __ xor_ins(rscratch2_AT, $src$$Register, rscratch2_AT);
++       __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe( ialu_reg_mem );
++%}
++
++// Xor Memory with Immediate
++instruct xorL_mem_imm(memory dst, immL32 src)
++%{
++  match(Set dst (StoreI dst (XorL (LoadI dst) src)));
++  ins_cost(125);
++
++  format %{ "ldl rscratch2_AT, $dst\t# long @xorL_mem_imm\n\t"
++            "movws rscratch1_GP, $src\n\t"
++            "xo_ins rscratch2_AT, $src, rscratch2_AT\n\t"
++            "stl  rscratch2_AT, $dst"%}
++  ins_encode %{
++       __ ldl(rscratch2_AT, $dst$$Address);
++       __ movws(rscratch1_GP, (int)$src$$constant);
++       __ xor_ins(rscratch2_AT, rscratch1_GP, rscratch2_AT);
++       __ stl(rscratch2_AT, $dst$$Address, rscratch1_GP);
++  %}
++//  ins_pipe( ialu_mem_imm );
++%}
++ */
++
++// Convert Int to Boolean
++instruct convI2B(rRegI dst, rRegI src)
++%{
++  match(Set dst (Conv2B src));
++
++  ins_cost(100);
++  format %{ "selne   $src, #1, $src, $dst\t# @ convI2B" %}
++  ins_encode %{
++      Register dst = as_Register($dst$$reg);
++      Register src = as_Register($src$$reg);
++      __ selne(src, 1, src, dst);
++  %}
++  ins_pipe(ialu_regL_regL ); 
++%}
++
++
++// Convert Pointer to Boolean
++instruct convP2B(rRegI dst, rRegP src)
++%{
++  match(Set dst (Conv2B src));
++
++  format %{ "selne   $src, #1, $src, $dst\t# @convP2B" %}
++  ins_encode %{
++      Register dst = as_Register($dst$$reg);
++      Register src = as_Register($src$$reg);
++      __ selne(src, 1, src, dst);
++  %}
++  ins_pipe( ialu_regL_regL ); 
++%}
++
++//lsp if p<q dst=-1 else dst=0 ??
++instruct cmpLTMask(rRegI dst, rRegI p, rRegI q)
++%{
++  match(Set dst (CmpLTMask p q));
++
++  ins_cost(400);
++  format %{ "cmplt    $p, $q, $dst @cmpLTMask\n\t" 
++            "subl     R0, $dst, $dst"%}
++  ins_encode %{
++       Register p   = $p$$Register;
++       Register q   = $q$$Register;
++       Register dst = $dst$$Register;
++       __ cmplt(p, q, dst);
++       __ subl(R0, dst, dst);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct cmpLTMask0(rRegI dst, rRegI p, immI0 zero)
++%{
++  match(Set dst (CmpLTMask p zero));
++
++  ins_cost(100);//todo
++  format %{ "sral    $p, #31, $dst\t# @ cmpLTMask0" %}
++  ins_encode %{
++       Register src = $p$$Register;
++       Register dst = $dst$$Register;
++
++       __ sral(src, 31, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++/* Better to save a register than avoid a branch */
++/* no need for swjdk8 and aarch64
++instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y)
++%{
++  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
++  ins_cost(300);//todo
++  format %{ "subl   $p,$q,$p\t# cadd_cmpLTMask\n\t"
++            "bge    done\n\t"
++            "addl   $p,$y,$p\n"
++            "done:  " %}
++  ins_encode %{
++    Register Rp = $p$$Register;
++    Register Rq = $q$$Register;
++    Register Ry = $y$$Register;
++    Label done;
++    __ subl(Rp, Rq, Rp);
++    __ jcc(Assembler::greaterEqual, done, Rp);
++    __ addl(Rp, Ry, Rp);
++    __ bind(done);
++  %}
++  ins_pipe(pipe_slow);
++%}
++*/
++/* Better to save a register than avoid a branch */
++/*no need for swjdk8 and aarch64
++instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y)
++%{
++  match(Set y (AndI (CmpLTMask p q) y));
++
++  ins_cost(300);//todo
++
++  format %{ "cmpl     $p, $q\t# and_cmpLTMask\n\t"
++            "blt      done\n\t"
++            "movl     $y, $y\n"
++            "done:  " %}
++  ins_encode %{
++    Register Rp = $p$$Register;
++    Register Rq = $q$$Register;
++    Register Ry = $y$$Register;
++    Label done;
++    __ cmpl(Rp, Rq);
++    __ jcc(Assembler::less, done);
++    __ movl(Ry, R0);
++    __ bind(done);
++  %}
++  ins_pipe(pipe_slow);
++%}*/
++
++//---------- FP Instructions------------------------------------------------
++
++instruct addF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (AddF src1 src2));
++  format %{ "AddF  $dst, $src1, $src2 @addF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct addD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (AddD src1 src2));
++  format %{ "AddD  $dst, $src1, $src2 @addD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ add_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (SubF src1 src2));
++  format %{ "SubF  $dst, $src1, $src2 @subF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct subD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (SubD src1 src2));
++  format %{ "SubD  $dst, $src1, $src2 @subD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    FloatRegister dst  = as_FloatRegister($dst$$reg);
++
++    __ sub_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct divF_Reg_Reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (DivF src1 src2));
++
++  ins_cost(300);
++  format %{ "divF   $dst, $src1, $src2 @ divF_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    // Here do we need to trap an exception manually ? */
++    __ div_s(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct divD_Reg_Reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (DivD src1 src2));
++
++  ins_cost(300);
++  format %{ "divD   $dst, $src1, $src2 @ divD_Reg_Reg" %}
++  ins_encode %{
++     FloatRegister src1 = $src1$$FloatRegister;
++     FloatRegister src2 = $src2$$FloatRegister;
++     FloatRegister dst  = $dst$$FloatRegister;
++
++    /* Here do we need to trap an exception manually ? */
++    __ div_d(dst, src1, src2);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct mulF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (MulF src1 src2));
++  format %{ "MULF  $dst, $src1, $src2 @mulF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_s(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// Mul two double precision floating piont number
++instruct mulD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (MulD src1 src2));
++  format %{ "MULD  $dst, $src1, $src2 @mulD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_d(dst, src1, src2);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++
++instruct modD_reg_reg(regD dst, regD src1, regD src2) %{
++  match(Set dst (ModD src1 src2));
++  format %{ "MODD $dst, $src1, $src2 @modD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;//x
++    FloatRegister src2 = $src2$$FloatRegister;//y
++    FloatRegister dst  = $dst$$FloatRegister;
++      Label nan, cont, end;
++      // y = 0.0f
++      __ ffbeq(src2, nan);
++      // x = NaN infinity
++      __ boundary_test(src1, rscratch2_AT);
++      __ beq_l(rscratch2_AT, nan);
++      // y = NaN 
++      __ boundary_test(src2, rscratch2_AT);
++      __ bne_l(rscratch2_AT, cont);
++      __ fimovd(src2, rscratch3);
++      __ slll(rscratch3, 12, rscratch2_AT);
++      __ bne_l(rscratch2_AT, nan);
++      
++      __ bind(cont);
++      __ pushad();
++      __ fcpys(src1, src1, f16);  //x
++      __ fcpys(src2, src2, f17);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      __ fcpys(f0, f0, fscratch1);
++      __ popad();
++      __ fcpys(fscratch1, fscratch1, dst);
++      __ jmp(end);
++      
++      __ bind(nan);
++      __ fdivd(f31, f31, dst);
++      __ bind(end);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct modF_reg_reg(regF dst, regF src1, regF src2) %{
++  match(Set dst (ModF src1 src2));
++  format %{ "MODF $dst, $src1, $src2 @modF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;//x
++    FloatRegister src2 = $src2$$FloatRegister;//y
++    FloatRegister dst  = $dst$$FloatRegister;
++      Label nan, cont, end;
++      // y = 0.0f
++      __ ffbeq(src2, nan);
++      // x = NaN infinity
++      __ boundary_test(src1, rscratch2_AT);
++      __ beq_l(rscratch2_AT, nan);
++      // y = NaN 
++      __ boundary_test(src2, rscratch2_AT);
++      __ bne_l(rscratch2_AT, cont);
++      __ fimovd(src2, rscratch3);
++      __ slll(rscratch3, 12, rscratch2_AT);
++      __ bne_l(rscratch2_AT, nan);
++      
++      __ bind(cont);
++      __ pushad();
++      __ fcpys(src1, src1, f16);  //x
++      __ fcpys(src2, src2, f17);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      __ fcpys(f0, f0, fscratch1);
++      __ popad();
++      __ fcpys(fscratch1, fscratch1, dst);
++      __ jmp(end);
++      
++      __ bind(nan);
++      __ fdivd(f31, f31, dst);
++      __ bind(end);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negF_reg(regF dst, regF src) %{
++  match(Set dst (NegF src));
++  format %{ "negF  $dst, $src @negF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fcpysn(src, src, dst);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct negD_reg(regD dst, regD src) %{
++  match(Set dst (NegD src));
++  format %{ "negD  $dst, $src @negD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fcpysn(src, src, dst);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct maddF_reg_reg(regF dst, regF src1, regF src2, regF src3) %{
++  match(Set dst (AddF (MulF src1 src2) src3));
++  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
++  ins_cost(44444);
++  format %{ "maddF  $dst, $src1, $src2, $src3 @maddF_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister src3 = $src3$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_s(f29, src1, src2); //F29 as FcmpRes, here use as tmp FloatRegister
++    __ add_s(dst, f29, src3);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct maddD_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
++  match(Set dst (AddD (MulD src1 src2) src3));
++  // For compatibility reason (e.g. on the Loongson platform), disable this guy.
++  ins_cost(44444);
++  format %{ "maddD  $dst, $src1, $src2, $src3 @maddD_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = $src1$$FloatRegister;
++    FloatRegister src2 = $src2$$FloatRegister;
++    FloatRegister src3 = $src3$$FloatRegister;
++    FloatRegister dst  = $dst$$FloatRegister;
++
++    __ mul_d(f29, src1, src2);  //F29 as FcmpRes, here use as tmp FloatRegister
++    __ add_d(dst, f29, src3);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct absF_reg(regF dst, regF src) %{
++  match(Set dst (AbsF src));
++  ins_cost(100);
++  format %{ "absF  $dst, $src @absF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fcpys(f31, src, dst);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++// intrinsics for math_native.
++// AbsD  SqrtD  CosD  SinD  TanD  LogD  Log10D
++
++instruct absD_reg(regD dst, regD src) %{
++  match(Set dst (AbsD src));
++  ins_cost(100);
++  format %{ "absD  $dst, $src @absD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ fcpys(f31, src, dst);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtF_reg(regF dst, regF src) %{
++  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
++  ins_cost(100);
++  format %{ "SqrtF  $dst, $src @sqrtF_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++instruct sqrtD_reg(regD dst, regD src) %{
++  match(Set dst (SqrtD src));
++  ins_cost(100);
++  format %{ "SqrtD  $dst, $src @sqrtD_reg" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ sqrt_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++//----------Arithmetic Conversion Instructions---------------------------------
++
++instruct roundFloat_nop(regF dst)
++%{
++  match(Set dst (RoundFloat dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct roundDouble_nop(regD dst)
++%{
++  match(Set dst (RoundDouble dst));
++
++  ins_cost(0);
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++instruct convF2D_reg_reg(regD dst, regF src) %{
++  match(Set dst (ConvF2D src));
++  format %{ "convF2D  $dst, $src\t# @convF2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_d_s(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++/* memory operands no need for SW64
++instruct convF2D_reg_mem(regD dst, memory src)
++%{
++  match(Set dst (ConvF2D (LoadF src)));
++
++
++  format %{"flds   f28, $src\t# @convF2D_reg_mem\n\t"
++           "convF2D  f28, $dst, $dst"  %}
++  ins_encode %{
++          FloatRegister dst = $dst$$FloatRegister;
++          __ load_float(f28, $src$$Address); //LSPCHEKC use f28 OK??
++          __ cvt_d_s(dst, f28);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}*/
++
++instruct convD2F_reg_reg(regF dst, regD src) %{
++  match(Set dst (ConvD2F src));
++  format %{ "convD2F  $dst, $src\t# @convD2F_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    FloatRegister src = $src$$FloatRegister;
++
++    __ cvt_s_d(dst, src);
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++/* memory operands no need for SW64
++instruct convD2F_reg_mem(regF dst, memory src)
++%{
++  match(Set dst (ConvD2F (LoadD src)));
++  format %{ "load_double f28, $src\t#D2F @convD2F_reg_mem\n\t"
++              "cvt_s_d    $dst, f28 "%}
++  ins_encode %{
++        FloatRegister dst = $dst$$FloatRegister;
++          __ load_double(f28, $src$$Address); //LSPCHEKC use f28 OK??
++          __ cvt_s_d(dst, f28);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++*/
++
++instruct convF2I_reg_fast( rRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(150);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_fast" %}
++  ins_encode %{
++  if (UseSW8A) {
++          Register dst = as_Register($dst$$reg);
++          FloatRegister src = as_FloatRegister($src$$reg);
++
++          __ cmovdw_z(dst, src);
++    } else {
++    Register      dst = $dst$$Register;
++    FloatRegister src = $src$$FloatRegister;
++    FloatRegister temp_float_reg = f30;
++    FloatRegister temp_float_reg1 = f28;
++    Label Convert,Overflow,Done;
++
++    __ fcmpun(src, src, temp_float_reg);
++    __ ffbne(temp_float_reg, Convert); //If Unorder,Jump to Convert Label
++
++    __ bis(R0, 1, rscratch3);
++    __ slll(rscratch3, 31, rscratch3);
++
++    __ ifmovs(rscratch3, temp_float_reg);
++    __ fcvtwl(temp_float_reg, temp_float_reg1);
++    __ fcvtls(temp_float_reg1, temp_float_reg);
++    __ fcmple(src, temp_float_reg, temp_float_reg1);
++    __ ffbne(temp_float_reg1, Overflow);  //If less than min_int(0x80000000),jump to Skip Label
++
++    __ subw(rscratch3, 0x1, rscratch3);
++    __ ifmovs(rscratch3, temp_float_reg);
++    __ fcvtwl(temp_float_reg, temp_float_reg1);
++    __ fcvtls(temp_float_reg1, temp_float_reg);
++    __ fcmple(temp_float_reg, src, temp_float_reg1);
++    __ ffbne(temp_float_reg1, Overflow); //If >= max_int(0x7fffffff),jump to Skip Label
++
++    //Lable Convert
++    __ BIND(Convert);
++    __ fcvtsd(src, temp_float_reg);
++    __ fcvtdl_z(temp_float_reg, temp_float_reg1);
++    __ fcvtlw(temp_float_reg1, temp_float_reg);
++    __ fimovs(temp_float_reg, dst);
++    __ addw(dst, 0, dst);
++    __ beq_l(R0, Done);
++
++    //Lable Skip
++    __ BIND(Overflow)
++    __ addw(rscratch3, 0, dst);
++    __ BIND(Done);
++    }
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convF2I_reg_slow( rRegI dst, regF src ) %{
++  match(Set dst (ConvF2I src));
++  ins_cost(250);
++  format %{ "convf2i    $dst, $src @ convF2I_reg_slow" %}
++  ins_encode %{
++    Register      dreg = $dst$$Register;
++    FloatRegister fval = $src$$FloatRegister;
++
++    __ pushad(rscratch3);
++    __ mov_s(F16, fval);
++    __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::f2i))));
++    __ movl(rscratch3, V0);
++    __ popad(rscratch3);
++    __ movl(dreg, rscratch3);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_fast( rRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(150);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_fast" %}
++  ins_encode %{
++    if(UseSW8A) {
++      Register dst = as_Register($dst$$reg);
++      FloatRegister src = as_FloatRegister($src$$reg);
++      __ cmovdl_z(dst, src);
++    }else {
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = $src$$FloatRegister;
++    Label Convert,Overflow,Done;
++      FloatRegister temp_float_reg = f30;
++      FloatRegister temp_float_reg1 = f28;
++
++      __ fcmpun(src, src, temp_float_reg);
++      __ ffbne(temp_float_reg, Convert);
++
++      __ bis (R0, 1, rscratch3);
++      __ slll(rscratch3, 63, rscratch3);
++
++      __ ifmovd(rscratch3, temp_float_reg);
++      __ fcvtls(temp_float_reg, temp_float_reg1);
++      __ fcmple(src, temp_float_reg1, temp_float_reg);
++      __ ffbne(temp_float_reg, Overflow); //if less than min_long(0x8000000000000000),jump to Skip Lable
++
++      __ subl(rscratch3, 1, rscratch3);
++      __ ifmovd(rscratch3, temp_float_reg);
++      __ fcvtls(temp_float_reg, temp_float_reg1);
++      __ fcmple(temp_float_reg1,src, temp_float_reg);
++      __ ffbne(temp_float_reg, Overflow); // if >=max_long(0x7fffffffffffffff),jump to Skip Lable
++
++      //Lable Convert
++      __ BIND(Convert);
++      __ fcvtsd(src, temp_float_reg);
++      __ fcvtdl_z(temp_float_reg, temp_float_reg1);
++      __ fimovd(temp_float_reg1, dst);
++      __ beq_l(R0, Done);
++
++      //Lable Skip
++      __ BIND(Overflow);
++      __ move(dst, rscratch3);
++      __ BIND(Done);
++  }
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convF2L_reg_slow( rRegL dst, regF src ) %{
++  match(Set dst (ConvF2L src));
++  ins_cost(250);
++  format %{ "convf2l    $dst, $src @ convF2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister fval = $src$$FloatRegister;
++    //TODO:jzy check usage
++    __ pushad(rscratch3);
++    __ mov_s(F16, fval);
++//    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
++    __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::f2l))));
++    __ movl(rscratch3, V0);
++    __ popad(rscratch3);
++    __ movl(dst, rscratch3);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct convD2L_reg_fast( rRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(150);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_fast" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++    FloatRegister temp_float_reg = f30;
++    FloatRegister temp_float_reg1 = f28;
++    assert((temp_float_reg1 != $src$$FloatRegister), "can not use F28");
++    assert((temp_float_reg1 != $dst$$FloatRegister), "can not use F28");
++
++    Label Convert,Overflow,Done;
++    __ fcmpun (src, src, temp_float_reg);
++    __ ffbne (temp_float_reg, Convert); //If Unorder,Jump to Convert Label
++
++    __ bis (R0, 1, rscratch3);
++    __ slll (rscratch3, 63, rscratch3);
++
++    __ ifmovd (rscratch3, temp_float_reg);
++    __ fcvtld (temp_float_reg, temp_float_reg1);
++    __ fcmple (src, temp_float_reg1, temp_float_reg);
++    __ ffbne (temp_float_reg, Overflow); //If less than min_long(0x8000000000000000),jump to Skip Label
++
++    __ subl (rscratch3, 0x1, rscratch3);
++    __ ifmovd (rscratch3, temp_float_reg);
++    __ fcvtld (temp_float_reg, temp_float_reg1);
++    __ fcmple (temp_float_reg1, src, temp_float_reg);
++    __ ffbne (temp_float_reg, Overflow); //If >= max_long(0x7fffffffffffffff),jump to Skip Label
++
++    //Label Convert
++    __ BIND(Convert);
++    __ fcvtdl_z (src, temp_float_reg);//lx20121018,result is rounded toward zero
++    __ fimovd(temp_float_reg,dst);
++    __ beq_l (R0, Done);
++    //Labe Skip
++    __ BIND(Overflow);
++    __ move(dst,rscratch3);
++    __ BIND(Done);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2L_reg_slow( rRegL dst, regD src ) %{
++  match(Set dst (ConvD2L src));
++  ins_cost(250);
++  format %{ "convD2L    $dst, $src @ convD2L_reg_slow" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ pushad(rscratch3);
++    __ mov_d(F16, src);
++//    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
++    __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::d2l))));
++    __ movl(rscratch3, V0);
++    __ popad(rscratch3);
++    __ movl(dst, rscratch3);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
++instruct convD2I_reg_reg_fast( rRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(150);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_fast" %}
++
++  ins_encode %{
++    if (UseSW8A) {
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++	__ cmovdw_z(dst, src);
++    } else {
++    FloatRegister src = $src$$FloatRegister;
++    Register      dst = $dst$$Register;
++    FloatRegister temp_float_reg = f30;
++    FloatRegister temp_float_reg1 = f28;
++    FloatRegister tmp = f27;
++
++    assert( (temp_float_reg1 != src), "can not use F28");
++    assert( (temp_float_reg != src), "can not use F28");
++    Label Convert,Overflow,Done;
++    __ fcmpun (src, src, temp_float_reg);
++    __ ffbne (temp_float_reg, Convert); //If Unorder,Jump to Convert Label
++
++    __ bis (R0, 1, rscratch3);
++    __ slll (rscratch3, 31, rscratch3);
++    __ ifmovs (rscratch3, temp_float_reg);
++    __ fcvtwl (temp_float_reg, temp_float_reg1);
++    __ fcvtld (temp_float_reg1, temp_float_reg);
++    __ fcmple (src, temp_float_reg, temp_float_reg1);
++    __ ffbne (temp_float_reg1, Overflow); //If less than min_int(0x80000000),jump to Skip Label
++
++    __ subw (rscratch3, 0x1, rscratch3);
++    __ ifmovs (rscratch3, temp_float_reg);
++    __ fcvtwl (temp_float_reg, temp_float_reg1);
++    __ fcvtld (temp_float_reg1, temp_float_reg);
++    __ fcmple (temp_float_reg, src, temp_float_reg1);
++    __ ffbne (temp_float_reg1, Overflow); //If >= max_int(0x7fffffff),jump to Skip Label
++
++    //Label Convert
++    __ BIND(Convert);
++    __ fcvtdl_z (src, temp_float_reg);//lx20121018,result is rounded toward zero
++    __ fcvtlw (temp_float_reg, tmp);
++    __ fimovs(tmp,dst);
++    __ addw(dst,0,dst);
++    __ beq_l (R0, Done);
++    //Labe Overflow
++    __ BIND(Overflow);
++    __ addw(rscratch3, 0, dst);
++    __ BIND(Done);
++    }
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct convD2I_reg_reg_slow( rRegI dst, regD src ) %{
++  match(Set dst (ConvD2I src));
++
++  ins_cost(250);
++  format %{ "convD2I $dst, $src\t# @ convD2I_reg_reg_slow" %}
++
++  ins_encode %{
++      FloatRegister src = $src$$FloatRegister;
++      Register      dst = $dst$$Register;
++
++    __ pushad(rscratch3);
++    __ mov_d(F16, src);
++//    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
++    __ call(RuntimeAddress((CAST_FROM_FN_PTR(address, SharedRuntime::d2i))));
++    __ movl(rscratch3, V0);
++    __ popad(rscratch3);
++    __ movl(dst, rscratch3);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct convI2F_reg_reg( regF dst, rRegI src ) %{
++  match(Set dst (ConvI2F src));
++  format %{ "convi2f    $dst, $src @ convI2F_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    if (UseSW8A) {
++        __ cmovws(dst, src);
++    } else {
++        __ ifmovs(src, f30);
++        __ fcvtwl(f30, f28);
++        __ fcvtls(f28, dst);
++    }
++  %}
++
++  ins_pipe( fpu_regF_regF );
++%}
++/* memory operands no need for SW64
++instruct convI2F_reg_mem(regF dst, memory src)
++%{
++  match(Set dst (ConvI2F (LoadI src)));
++
++  format %{ "ldw rscratch1_GP, $src\t# i2f@convI2F_reg_mem\n\t"
++            "convi2f    $dst, rscratch1_GP " %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    __ ldw(rscratch1_GP, $src$$Address);
++    __ ifmovs(rscratch1_GP, f30);
++    __ fcvtwl(f30, f28);
++    __ fcvtls(f28, dst);
++//    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Address);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++*/
++
++instruct convI2D_reg_reg(regD dst, rRegI src) %{
++  match(Set dst (ConvI2D src));
++  format %{ "conI2D $dst, $src @convI2D_reg" %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    if (UseSW8A){
++        __ cmovwd(dst, src);
++    } else {
++    __ ifmovd(src, f30);
++    __ fcvtld(f30, dst);
++    }
++  %}
++  ins_pipe( fpu_regF_regF );
++%}
++
++/* memory operands no need for SW64
++instruct convI2D_reg_mem(regD dst, memory src)
++%{
++  match(Set dst (ConvI2D (LoadI src)));
++  format %{ "ldw rscratch1_GP, $src\t# i2d@convI2D_reg_mem\n\t"
++            "conI2D    $dst, rscratch1_GP " %}
++  ins_encode %{
++    Register      src = $src$$Register;
++    FloatRegister dst = $dst$$FloatRegister;
++    __ ldw(rscratch1_GP, $src$$Address);
++    __ ifmovd(rscratch1_GP, f30);
++    __ fcvtld(f30, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}*/
++/*
++instruct convXI2F_reg(regF dst, rRegI src)
++%{
++  predicate(UseXmmI2F);
++  match(Set dst (ConvI2F src));
++
++  format %{ "movdl $dst, $src\n\t"
++            "cvtdq2psl $dst, $dst\t# i2f" %}
++  ins_encode %{
++    __ movdl($dst$$XMMRegister, $src$$Register);
++    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++instruct convXI2D_reg(regD dst, rRegI src)
++%{
++  predicate(UseXmmI2D);
++  match(Set dst (ConvI2D src));
++
++  format %{ "movdl $dst, $src\n\t"
++            "cvtdq2pdl $dst, $dst\t# i2d" %}
++  ins_encode %{
++    __ movdl($dst$$XMMRegister, $src$$Register);
++    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++*/
++instruct convL2F_reg_reg(regF dst, rRegL src)
++%{
++  match(Set dst (ConvL2F src));
++  format %{ "convl2f    $dst, $src @ convL2F_reg" %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++    if (UseSW8A){
++       __ cmovls(dst, src);
++    } else {
++       __ ifmovd(src, f30);
++       __ fcvtls(f30, dst);
++     }
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++/* memory operands no need for SW64
++instruct convL2F_reg_mem(regF dst, memory src)
++%{
++  match(Set dst (ConvL2F (LoadL src)));
++  format %{ "ldl rscratch1_GP, $src\t# l2f@convL2F_reg_mem\n\t"
++            "conI2D    $dst, rscratch1_GP " %}
++  ins_encode %{
++    FloatRegister dst = $dst$$FloatRegister;
++    Register src = as_Register($src$$reg);
++     __ ldl(rscratch1_GP, $src$$Address);
++     __ ifmovd(rscratch1_GP, f30);
++     __ fcvtls(f30, dst);
++//    __ cvtsi2ssq ($dst$$XMMRegister, $src$$Address);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++ */
++
++instruct convL2D_reg_reg(regD dst, rRegL src)
++%{
++  match(Set dst (ConvL2D src));
++
++  format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    if (UseSW8A) {
++        __ cmovld(dst, src);
++    } else {
++        __ ifmovd(src, f30);
++        __ fcvtld(f30, dst);
++    }
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}
++
++/* memory operands no need for SW64
++instruct convL2D_reg_mem(regD dst, memory src)
++%{
++  match(Set dst (ConvL2D (LoadL src)));
++
++   format %{ "convL2D    $dst, $src @ convL2D_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    __ ldl(rscratch1_GP, $src$$Address);
++    __ ifmovd(rscratch1_GP, f30);
++    __ fcvtld(f30, dst);
++  %}
++  ins_pipe(pipe_slow); // XXX
++%}*/
++
++instruct convI2L_reg_reg( rRegL dst, rRegI src) %{
++  match(Set dst (ConvI2L src));
++
++  ins_cost(100);
++  format %{ "movws   $dst, $src @ convI2L_reg\t"  %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    if(dst != src) __ movws(dst, src);
++  %}
++  ins_pipe( ialu_regL_regL );
++%}
++
++
++// instruct convI2L_reg_reg_foo(rRegL dst, rRegI src)
++// %{
++//   match(Set dst (ConvI2L src));
++// //   predicate(_kids[0]->_leaf->as_Type()->type()->is_int()->_lo >= 0 &&
++// //             _kids[0]->_leaf->as_Type()->type()->is_int()->_hi >= 0);
++//   predicate(((const TypeNode*) n)->type()->is_long()->_hi ==
++//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_hi &&
++//             ((const TypeNode*) n)->type()->is_long()->_lo ==
++//             (unsigned int) ((const TypeNode*) n)->type()->is_long()->_lo);
++
++//   format %{ "movl    $dst, $src\t# unsigned i2l" %}
++//   ins_encode(enc_copy(dst, src));
++// //   opcode(0x63); // needs REX.W
++// //   ins_encode(REX_reg_reg_wide(dst, src), OpcP, reg_reg(dst,src));
++//   ins_pipe(ialu_reg_reg);
++// %}
++
++
++// Zero-extend convert int to long
++instruct convI2L_reg_reg_zex(rRegL dst, rRegI src, immL_32bits mask)
++%{
++  match(Set dst (AndL (ConvI2L src) mask));
++
++  format %{ "movwu   $dst, $src \t# i2l zero-extend @convI2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    __ movwu(dst, src);
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++/* memory operands no need for SW64
++// Zero-extend convert int to long
++instruct convI2L_reg_mem_zex(rRegL dst, memory src, immL_32bits mask)
++%{
++  match(Set dst (AndL (ConvI2L (LoadI src)) mask));
++
++  format %{ "ldwu    $dst, $src\t# i2l zero-extend @convI2L_reg_mem_zex" %}
++  ins_encode %{
++    __ ldwu($dst$$Register, $src$$Address);
++  %}
++  ins_pipe(pipe_slow);
++%}
++*/
++
++instruct zerox_long_reg_reg(rRegL dst, rRegL src, immL_32bits mask)
++%{
++  match(Set dst (AndL src mask));
++  format %{ "zapnot   $src, 0xf, $dst\t# i2l zero-extend @zerox_long_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++    __ zapnot(src, 0xf, dst);
++
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg_zex(rRegL dst, rRegL src, immL_32bits mask)
++%{
++  match(Set dst (AndL (ConvI2L (ConvL2I src)) mask));
++
++  format %{ "zapnot    $dst, $src\t# L2I2L zero-extend @ convL2I2L_reg_reg_zex" %}
++  ins_encode %{
++    Register dst = $dst$$Register;
++    Register src = $src$$Register;
++
++    __ zapnot(src, 0xf, dst);
++
++  %}
++  ins_pipe(ialu_regI_regI);
++%}
++
++instruct convL2I2L_reg_reg( rRegL dst, rRegL src ) %{
++  match(Set dst (ConvI2L (ConvL2I src)));
++
++  format %{ "addw    $dst, $src, 0 @ convL2I2L_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++    __ addw(src, 0, dst);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++instruct convL2I_reg_reg( rRegI dst, rRegL src ) %{
++  match(Set dst (ConvL2I src));
++
++  format %{ "addw     $src, 0, $dst@ convL2I_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++     __ addw(src, 0, dst);
++  %}
++
++  ins_pipe( ialu_regI_regI );
++%}
++
++
++instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++
++  ins_cost(125);
++  format %{ "ldw    $dst, $src\t# @MoveF2I_stack_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    __ ldw(dst, Address(esp, $src$$disp));//LSP CHECK sign extend?
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++
++  ins_cost(125);
++  format %{ "store_float   $dst, $src\t# @MoveI2F_stack_reg " %}
++  ins_encode %{
++      FloatRegister dst = as_FloatRegister($dst$$reg);
++    __ store_float(dst, Address(esp, $src$$disp));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct MoveD2L_stack_reg(rRegL dst, stackSlotD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++
++  ins_cost(125);
++  format %{ "movq    $dst, $src\t# MoveD2L_stack_reg" %}
++  ins_encode %{
++    __ ldl($dst$$Register, Address(esp, $src$$disp));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct MoveL2D_stack_reg_partial(regD dst, stackSlotL src) %{
++//  predicate(!UseXmmLoadAndClearUpper);
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++
++  ins_cost(125);
++  format %{ "store_double   $dst, $src\t# @MoveI2F_stack_reg"%}
++  ins_encode %{
++      FloatRegister dst = as_FloatRegister($dst$$reg);
++    __ store_double(dst, Address(esp, $src$$disp));
++  %}
++  ins_pipe(pipe_slow);
++%}
++/*
++instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
++  predicate(UseXmmLoadAndClearUpper);
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++
++  ins_cost(125);
++  format %{ "movsd   $dst, $src\t# MoveL2D_stack_reg" %}
++  ins_encode %{
++    __ movdbl($dst$$XMMRegister, Address(esp, $src$$disp));
++  %}
++  ins_pipe(pipe_slow);
++%}
++*/
++
++instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++
++  ins_cost(95); // XXX
++  format %{ "store_float   $dst, $src\t# MoveF2I_reg_stack" %}
++  ins_encode %{
++      FloatRegister src = as_FloatRegister($src$$reg);
++    __ store_float(src, Address(esp, $dst$$disp));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++
++  ins_cost(100);
++  format %{ "stw    $src, $dst\t# @MoveI2F_reg_stack" %}
++  ins_encode %{
++    __ stw( $src$$Register, Address(esp, $dst$$disp));
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++
++  ins_cost(95); // XXX
++  format %{ "store_double  $src,  $dst\t# @MoveL2D_reg_stack" %}
++  ins_encode %{
++    FloatRegister src = as_FloatRegister($src$$reg);
++    __ store_double(src, Address(esp, $dst$$disp));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct MoveL2D_reg_stack(stackSlotD dst, rRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++
++  ins_cost(100);
++  format %{ "stl     $src, $dst\t# @MoveL2D_reg_stack" %}
++  ins_encode %{
++    __ stl($src$$Register, Address(esp, $dst$$disp));
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct MoveF2I_reg_reg(rRegI dst, regF src) %{
++  match(Set dst (MoveF2I src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveF2I   $dst, $src @ MoveF2I_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ fimovs(src, dst);
++    __ addw(dst, 0, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++instruct MoveD2L_reg_reg(rRegL dst, regD src) %{
++  match(Set dst (MoveD2L src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveD2L   $dst, $src @ MoveD2L_reg_reg" %}
++  ins_encode %{
++    Register dst = as_Register($dst$$reg);
++    FloatRegister src = as_FloatRegister($src$$reg);
++
++    __ fimovd(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++ instruct MoveI2F_reg_reg(regF dst, rRegI src) %{
++  match(Set dst (MoveI2F src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveI2F   $dst, $src @ MoveI2F_reg_reg" %}
++  ins_encode %{
++    Register src = as_Register($src$$reg);
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++
++    __ ifmovs(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct MoveL2D_reg_reg(regD dst, rRegL src) %{
++  match(Set dst (MoveL2D src));
++  effect(DEF dst, USE src);
++  ins_cost(85);
++  format %{ "MoveL2D   $dst, $src @ MoveL2D_reg_reg" %}
++  ins_encode %{
++    FloatRegister dst = as_FloatRegister($dst$$reg);
++    Register src = as_Register($src$$reg);
++
++   __ ifmovd(src, dst);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++
++// =======================================================================
++// fast clearing of an array
++instruct clear_array(rRegL cnt, rRegP base, Universe dummy) %{
++  match(Set dummy (ClearArray cnt base));
++  format %{ "CLEAR_ARRAY base = $base, cnt = $cnt # Clear doublewords" %}
++  ins_encode %{
++    //Assume cnt is the number of bytes in an array to be cleared,
++    //and base points to the starting address of the array.
++    Register base = $base$$Register;
++    Register num  = $cnt$$Register;
++    Label Loop, done;
++
++    __ move(rscratch3, num);  /* rscratch3 = words */
++    __ beq_l(rscratch3, done);
++    __ move(rscratch2_AT, base);
++
++    __ BIND(Loop);
++    __ stl(R0, Address(rscratch2_AT, 0));
++    __ addl(rscratch2_AT, wordSize, rscratch2_AT);
++    __ subl(rscratch3, 1, rscratch3);
++    __ bne_l(rscratch3, Loop);
++    __ BIND(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++/*TODO:need implement jzy
++instruct string_compareU(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2,
++                        a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, rFlagsReg cr)
++%{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
++  ins_encode %{
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      fnoreg, fnoreg, fnoreg, StrIntrinsicNode::UU);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_compareL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2,
++                        a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, rFlagsReg cr)
++%{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      fnoreg, fnoreg, fnoreg, StrIntrinsicNode::LL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_compareUL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2,
++                        a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, 
++                        f28_RegD vtmp1, f29_RegD vtmp2, f30_RegD vtmp3, rFlagsReg cr)
++%{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
++         USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
++                      $vtmp3$$FloatRegister, StrIntrinsicNode::UL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_compareLU(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2,
++                        a0_RegI result, t8_RegP tmp1, t9_RegL tmp2, 
++                        f28_RegD vtmp1, f29_RegD vtmp2, f30_RegD vtmp3, rFlagsReg cr)
++%{
++  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
++  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(KILL tmp1, KILL tmp2, KILL vtmp1, KILL vtmp2, KILL vtmp3,
++         USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
++
++  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   # KILL $tmp1, $tmp2, $vtmp1, $vtmp2, $vtmp3" %}
++  ins_encode %{
++    __ string_compare($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $vtmp1$$FloatRegister, $vtmp2$$FloatRegister,
++                      $vtmp3$$FloatRegister,StrIntrinsicNode::LU);
++  %}
++  ins_pipe(pipe_slow);
++%}*/
++/*TODO:need implement jzy
++instruct string_indexofUU(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2,
++                        a0_RegI result,  rRegI tmp1, rRegI tmp2, rRegI tmp3,
++                        rRegI tmp4, rRegI tmp5, rRegI tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UU)" %}
++
++  ins_encode %{
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      -1, $result$$Register, StrIntrinsicNode::UU);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexofLL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2,
++                          a0_RegI result,  rRegI tmp1, rRegI tmp2, rRegI tmp3,
++                          rRegI tmp4, rRegI tmp5, rRegI tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (LL)" %}
++
++  ins_encode %{
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      -1, $result$$Register, StrIntrinsicNode::LL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexofUL(a1_RegP str1, a2_RegI cnt1, a3_RegP str2, a4_RegI cnt2,
++                          a0_RegI result,  rRegI tmp1, rRegI tmp2, rRegI tmp3,
++                          rRegI tmp4, rRegI tmp5, rRegI tmp6, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, TEMP tmp5, TEMP tmp6, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result (UL)" %}
++
++  ins_encode %{
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, $cnt2$$Register,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register,
++                      $tmp5$$Register, $tmp6$$Register,
++                      -1, $result$$Register, StrIntrinsicNode::UL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexof_conUU(a1_RegP str1, a4_RegI cnt1, a3_RegP str2,
++                 immI_le_4 int_cnt2, a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3,
++                          rRegI tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UU)" %}
++
++  ins_encode %{
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, zr,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register, zr, zr,
++                      icnt2, $result$$Register, StrIntrinsicNode::UU);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexof_conLL(a1_RegP str1, a4_RegI cnt1, a3_RegP str2,
++                          immI_le_4 int_cnt2, a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3,
++                          rRegI tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (LL)" %}
++
++  ins_encode %{
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, zr,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register, zr, zr,
++                      icnt2, $result$$Register, StrIntrinsicNode::LL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexof_conUL(a1_RegP str1, a4_RegI cnt1, a3_RegP str2,
++                          immI_le_4 int_cnt2, a0_RegI result, rRegI tmp1, rRegI tmp2, rRegI tmp3,
++                          rRegI tmp4, rFlagsReg cr)
++%{
++  predicate(((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL);
++  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt1,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, KILL cr);
++  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result (UL)" %}
++
++  ins_encode %{
++    int icnt2 = (int)$int_cnt2$$constant;
++    __ string_indexof($str1$$Register, $str2$$Register,
++                      $cnt1$$Register, zr,
++                      $tmp1$$Register, $tmp2$$Register,
++                      $tmp3$$Register, $tmp4$$Register, zr, zr,
++                      icnt2, $result$$Register, StrIntrinsicNode::UL);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_indexofU_char(a1_RegP str1, a2_RegI cnt1, a3_RegI ch,
++                              a0_RegI result, rRegI tmp1, rRegI tmp2,
++                              rRegI tmp3, rFlagsReg cr)
++%{
++  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
++  effect(USE_KILL str1, USE_KILL cnt1, USE_KILL ch,
++         TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++
++  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result" %}
++
++  ins_encode %{
++    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register,
++                           $result$$Register, $tmp1$$Register, $tmp2$$Register,
++                           $tmp3$$Register);
++  %}
++  ins_pipe(pipe_slow);
++%}*/
++//TODO:need implement jzy
++/*
++instruct string_equalsL(a1_RegP str1, a3_RegP str2, a4_RegI cnt,
++                        a0_RegI result, rFlagsReg cr)
++%{
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
++
++  format %{ "String Equals $str1,$str2,$cnt -> $result" %}
++  ins_encode %{
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals($str1$$Register, $str2$$Register,
++                     $result$$Register, $cnt$$Register, 1);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct string_equalsU(a1_RegP str1, a3_RegP str2, a4_RegI cnt,
++                        a0_RegI result, rFlagsReg cr)
++%{
++  predicate(((StrEqualsNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (StrEquals (Binary str1 str2) cnt));
++  effect(USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL cr);
++
++  format %{ "String Equals $str1,$str2,$cnt -> $result" %}
++  ins_encode %{
++    // Count is in 8-bit bytes; non-Compact chars are 16 bits.
++    __ string_equals($str1$$Register, $str2$$Register,
++                     $result$$Register, $cnt$$Register, 2);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct array_equalsB(a1_RegP ary1, a2_RegP ary2, a0_RegI result,
++                       a3_RegP tmp1, a4_RegP tmp2, a5_RegP tmp3,
++                       t10_RegP tmp, rFlagsReg cr)
++%{
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
++  match(Set result (AryEq ary1 ary2));
++  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++
++  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
++  ins_encode %{
++    __ arrays_equals($ary1$$Register, $ary2$$Register,
++                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                     $result$$Register, $tmp$$Register, 1);
++    %}
++  ins_pipe(pipe_slow);
++%}
++
++instruct array_equalsC(a1_RegP ary1, a2_RegP ary2, a0_RegI result,
++                       a3_RegP tmp1, a4_RegP tmp2, a5_RegP tmp3,
++                       t10_RegP tmp, rFlagsReg cr)
++%{
++  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
++  match(Set result (AryEq ary1 ary2));
++  effect(KILL tmp, USE_KILL ary1, USE_KILL ary2, TEMP tmp1, TEMP tmp2, TEMP tmp3, KILL cr);
++
++  format %{ "Array Equals $ary1,ary2 -> $result    // KILL $tmp" %}
++  ins_encode %{
++    __ arrays_equals($ary1$$Register, $ary2$$Register,
++                     $tmp1$$Register, $tmp2$$Register, $tmp3$$Register,
++                     $result$$Register, $tmp$$Register, 2);
++  %}
++  ins_pipe(pipe_slow);
++%}
++*/
++instruct has_negatives(a1_RegP ary1, a2_RegI len, v0_RegI result, rFlagsReg cr)
++%{
++  match(Set result (HasNegatives ary1 len));
++  effect(USE_KILL ary1, USE_KILL len, KILL cr);
++  format %{ "has negatives byte[] $ary1,$len -> $result" %}
++  ins_encode %{
++    __ has_negatives($ary1$$Register, $len$$Register, $result$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// fast char[] to byte[] compression TODO:jzy
++/*instruct string_compress(a2_RegP src, a1_RegP dst, a3_RegI len,
++                         f27_RegD tmp1, f28_RegD tmp2, f29_RegD tmp3, f30_RegD tmp4,
++                         a0_RegI result, rFlagsReg cr)
++%{
++  match(Set result (StrCompressedCopy src (Binary dst len)));
++  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
++
++  format %{ "String Compress $src,$dst -> $result    // KILL R1, R2, R3, R4" %}
++  ins_encode %{
++    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
++                           $tmp1$$FloatRegister, $tmp2$$FloatRegister,
++                           $tmp3$$FloatRegister, $tmp4$$FloatRegister,
++                           $result$$Register);
++  %}
++  ins_pipe( pipe_slow );
++%}*/
++
++// fast byte[] to char[] inflation TODO:should implement jzy
++/*instruct string_inflate(Universe dummy, a0_RegP src, a1_RegP dst, a2_RegI len,
++                        f27_RegD tmp1, f28_RegD tmp2, f29_RegD tmp3, f30_RegD tmp4, rFlagsReg cr)
++%{
++  match(Set dummy (StrInflatedCopy src (Binary dst len)));
++  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
++
++  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
++  ins_encode %{
++    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
++                          $tmp1$$FloatRegister, $tmp2$$FloatRegister, $tmp3$$FloatRegister, $tmp4$$Register);
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// encode char[] to byte[] in ISO_8859_1
++instruct encode_iso_array(a2_RegP src, a1_RegP dst, a3_RegI len,
++                          f27_RegD Vtmp1, f28_RegD Vtmp2, f29_RegD Vtmp3, f30_RegD Vtmp4,
++                          a0_RegI result, rFlagsReg cr)
++%{
++  match(Set result (EncodeISOArray src (Binary dst len)));
++  effect(USE_KILL src, USE_KILL dst, USE_KILL len,
++         KILL Vtmp1, KILL Vtmp2, KILL Vtmp3, KILL Vtmp4, KILL cr);
++
++  format %{ "Encode array $src,$dst,$len -> $result" %}
++  ins_encode %{
++    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
++         $result$$Register, $Vtmp1$$FloatRegister,  $Vtmp2$$FloatRegister,
++         $Vtmp3$$FloatRegister,  $Vtmp4$$FloatRegister);
++  %}
++  ins_pipe( pipe_slow );
++%}*/
++
++/*
++
++//----------Overflow Math Instructions-----------------------------------------
++
++instruct overflowAddI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
++%{
++  match(Set cr (OverflowAddI op1 op2));
++  effect(DEF cr, USE_KILL op1, USE op2);
++
++  format %{ "addl    $op1, $op2\t# overflow check int" %}
++
++  ins_encode %{
++    __ addl($op1$$Register, $op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowAddI_rReg_imm(rFlagsReg cr, rax_RegI op1, immI op2)
++%{
++  match(Set cr (OverflowAddI op1 op2));
++  effect(DEF cr, USE_KILL op1, USE op2);
++
++  format %{ "addl    $op1, $op2\t# overflow check int" %}
++
++  ins_encode %{
++    __ addl($op1$$Register, $op2$$constant);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowAddL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
++%{
++  match(Set cr (OverflowAddL op1 op2));
++  effect(DEF cr, USE_KILL op1, USE op2);
++
++  format %{ "addq    $op1, $op2\t# overflow check long" %}
++  ins_encode %{
++    __ addq($op1$$Register, $op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowAddL_rReg_imm(rFlagsReg cr, rax_RegL op1, immL32 op2)
++%{
++  match(Set cr (OverflowAddL op1 op2));
++  effect(DEF cr, USE_KILL op1, USE op2);
++
++  format %{ "addq    $op1, $op2\t# overflow check long" %}
++  ins_encode %{
++    __ addq($op1$$Register, $op2$$constant);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
++%{
++  match(Set cr (OverflowSubI op1 op2));
++
++  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
++  ins_encode %{
++    __ cmpl($op1$$Register, $op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
++%{
++  match(Set cr (OverflowSubI op1 op2));
++
++  format %{ "cmpl    $op1, $op2\t# overflow check int" %}
++  ins_encode %{
++    __ cmpl($op1$$Register, $op2$$constant);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
++%{
++  match(Set cr (OverflowSubL op1 op2));
++
++  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
++  ins_encode %{
++    __ cmpq($op1$$Register, $op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowSubL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
++%{
++  match(Set cr (OverflowSubL op1 op2));
++
++  format %{ "cmpq    $op1, $op2\t# overflow check long" %}
++  ins_encode %{
++    __ cmpq($op1$$Register, $op2$$constant);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowNegI_rReg(rFlagsReg cr, immI0 zero, rax_RegI op2)
++%{
++  match(Set cr (OverflowSubI zero op2));
++  effect(DEF cr, USE_KILL op2);
++
++  format %{ "negl    $op2\t# overflow check int" %}
++  ins_encode %{
++    __ negl($op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowNegL_rReg(rFlagsReg cr, immL0 zero, rax_RegL op2)
++%{
++  match(Set cr (OverflowSubL zero op2));
++  effect(DEF cr, USE_KILL op2);
++
++  format %{ "negq    $op2\t# overflow check long" %}
++  ins_encode %{
++    __ negq($op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg);
++%}
++
++instruct overflowMulI_rReg(rFlagsReg cr, rax_RegI op1, rRegI op2)
++%{
++  match(Set cr (OverflowMulI op1 op2));
++  effect(DEF cr, USE_KILL op1, USE op2);
++
++  format %{ "imull    $op1, $op2\t# overflow check int" %}
++  ins_encode %{
++    __ imull($op1$$Register, $op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg_alu0);
++%}
++
++instruct overflowMulI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
++%{
++  match(Set cr (OverflowMulI op1 op2));
++  effect(DEF cr, TEMP tmp, USE op1, USE op2);
++
++  format %{ "imull    $tmp, $op1, $op2\t# overflow check int" %}
++  ins_encode %{
++    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
++  %}
++  ins_pipe(ialu_reg_reg_alu0);
++%}
++
++instruct overflowMulL_rReg(rFlagsReg cr, rax_RegL op1, rRegL op2)
++%{
++  match(Set cr (OverflowMulL op1 op2));
++  effect(DEF cr, USE_KILL op1, USE op2);
++
++  format %{ "imulq    $op1, $op2\t# overflow check long" %}
++  ins_encode %{
++    __ imulq($op1$$Register, $op2$$Register);
++  %}
++  ins_pipe(ialu_reg_reg_alu0);
++%}
++
++instruct overflowMulL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2, rRegL tmp)
++%{
++  match(Set cr (OverflowMulL op1 op2));
++  effect(DEF cr, TEMP tmp, USE op1, USE op2);
++
++  format %{ "imulq    $tmp, $op1, $op2\t# overflow check long" %}
++  ins_encode %{
++    __ imulq($tmp$$Register, $op1$$Register, $op2$$constant);
++  %}
++  ins_pipe(ialu_reg_reg_alu0);
++%}
++*/
++
++/* Cmpxxx useless in SW64
++//----------Control Flow Instructions------------------------------------------
++// Signed compare Instructions
++
++// XXX more variants!!
++instruct compI_rReg(rFlagsReg cr, rRegI op1, rRegI op2)
++%{
++  match(Set cr (CmpI op1 op2));
++  effect(DEF cr, USE op1, USE op2);
++
++  format %{ "cmpw    $op1, $op2\t@compI_rReg" %}
++    
++  ins_encode %{
++    __ cmpw($op1$$Register, $op2$$Register, $cr$$Register); 
++  %}
++  //ins_pipe(ialu_cr_reg_reg);
++%}
++
++instruct compI_rReg_imm(rFlagsReg cr, rRegI op1, immI op2)
++%{
++  match(Set cr (CmpI op1 op2));
++
++  format %{ "cmpw    $op1, $op2\t@compI_rReg_imm" %}
++    
++  ins_encode %{
++    __ mov_immediate32(rscratch3, $op2$$constant);
++    __ cmpw($op1$$Register, rscratch3, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compI_rReg_mem(rFlagsReg cr, rRegI op1, memory op2)
++%{
++  match(Set cr (CmpI op1 (LoadI op2)));
++
++  ins_cost(500); // XXX
++  format %{ "cmpw    $op1, $op2\t@compI_rReg_mem" %}
++    
++  ins_encode %{
++    __ ldws(rscratch2_AT, $op2$$Address);
++    __ cmpw($op1$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct testI_reg(rFlagsReg cr, rRegI src, immI0 zero)
++%{
++  match(Set cr (CmpI src zero));
++
++  format %{ "testw   $src, $src\t@testI_reg" %}
++  ins_encode %{
++    __ testw($src$$Register, $src$$Register, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct testI_reg_imm(rFlagsReg cr, rRegI src, immI con, immI0 zero)
++%{
++  match(Set cr (CmpI (AndI src con) zero));
++
++  format %{ "testl   $src, $con\t@testI_reg_imm" %}
++  ins_encode %{
++    __ testw($src$$Register, $con$$constant, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct testI_reg_mem(rFlagsReg cr, rRegI src, memory mem, immI0 zero)
++%{
++  match(Set cr (CmpI (AndI src (LoadI mem)) zero));
++
++  format %{ "testl   $src, $mem\t@testI_reg_mem" %}
++  ins_encode %{
++    __ ldws(rscratch2_AT, $mem$$Address);
++    __ testw($src$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++// Unsigned compare Instructions; really, same as signed except they
++// produce an rFlagsRegU instead of rFlagsReg.
++instruct compU_rReg(rFlagsRegU cr, rRegI op1, rRegI op2)
++%{
++  match(Set cr (CmpU op1 op2));
++
++  format %{ "cmpw    $op1, $op2\t# unsigned\t@compU_rReg" %}
++  ins_encode %{
++    __ cmpw($op1$$Register, $op2$$Register, $cr$$Register);
++    __ stop("need add unsigned instruct: jzy?");
++  %}
++  //ins_pipe(ialu_cr_reg_reg);
++%}
++
++instruct compU_rReg_imm(rFlagsRegU cr, rRegI op1, immI op2)
++%{
++  match(Set cr (CmpU op1 op2));
++
++  format %{ "cmpl    $op1, $op2\t# unsigned\t@compU_rReg_imm" %}
++  ins_encode %{
++    __ mov_immediate32(rscratch2_AT, $op2$$constant);
++    __ cmpw($op1$$Register, rscratch2_AT, $cr$$Register);
++    __ stop("need add unsigned instruct: jzy?");
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compU_rReg_mem(rFlagsRegU cr, rRegI op1, memory op2)
++%{
++  match(Set cr (CmpU op1 (LoadI op2)));
++
++  ins_cost(500); // XXX
++  format %{ "cmpl    $op1, $op2\t# unsigned\t@compU_rReg_mem" %}
++  ins_encode %{
++    __ ldws(rscratch2_AT, $op2$$Address);
++    __ cmpw($op1$$Register, rscratch2_AT, $cr$$Register);
++    __ stop("need add unsigned instruct: jzy?");
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++// // // Cisc-spilled version of cmpU_rReg
++// //instruct compU_mem_rReg(rFlagsRegU cr, memory op1, rRegI op2)
++// //%{
++// //  match(Set cr (CmpU (LoadI op1) op2));
++// //
++// //  format %{ "CMPu   $op1,$op2" %}
++// //  ins_cost(500);
++// //  opcode(0x39);  
++// //  ins_encode( OpcP, reg_mem( op1, op2) );
++// //%}
++
++instruct testU_reg(rFlagsRegU cr, rRegI src, immI0 zero)
++%{
++  match(Set cr (CmpU src zero));
++
++  format %{ "testw  $src, $src\t# unsigned\t@testU_reg" %}
++  ins_encode %{
++    __ testw($src$$Register, $src$$Register, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compP_rReg(rFlagsRegU cr, rRegP op1, rRegP op2)
++%{
++  match(Set cr (CmpP op1 op2));
++
++  format %{ "cmpptr    $op1, $op2\t# ptr\t@compP_rReg" %}
++  ins_encode %{
++    __ cmpptr($op1$$Register, $op2$$Register, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_reg);
++%}
++
++instruct compP_rReg_mem(rFlagsRegU cr, rRegP op1, memory op2)
++%{
++  match(Set cr (CmpP op1 (LoadP op2)));
++
++  ins_cost(500); // XXX
++  format %{ "cmpptr    $op1, $op2\t# ptr\t@compP_rReg_mem" %}
++  ins_encode %{
++    __ ldws(rscratch2_AT, $op2$$Address);
++    __ cmpptr($op1$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++// // // Cisc-spilled version of cmpP_rReg
++// //instruct compP_mem_rReg(rFlagsRegU cr, memory op1, rRegP op2)
++// //%{
++// //  match(Set cr (CmpP (LoadP op1) op2));
++// //
++// //  format %{ "CMPu   $op1,$op2" %}
++// //  ins_cost(500);
++// //  opcode(0x39);  
++// //  ins_encode( OpcP, reg_mem( op1, op2) );
++// //%}
++
++// XXX this is generalized by compP_rReg_mem???
++// Compare raw pointer (used in out-of-heap check).
++// Only works because non-oop pointers must be raw pointers
++// and raw pointers have no anti-dependencies.
++instruct compP_mem_rReg(rFlagsRegU cr, rRegP op1, memory op2)
++%{
++  predicate(n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none);//TODO:jzy?
++  match(Set cr (CmpP op1 (LoadP op2)));
++
++  format %{ "cmpptr    $op1, $op2\t# raw ptr\t@compP_mem_rReg" %}
++  ins_encode %{
++    __ ldws(rscratch2_AT, $op2$$Address);
++    __ cmpptr($op1$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++// This will generate a signed flags result. This should be OK since
++// any compare to a zero should be eq/neq.
++instruct testP_reg(rFlagsReg cr, rRegP src, immP0 zero)
++%{
++  match(Set cr (CmpP src zero));
++
++  format %{ "testptr   $src, $src\t# ptr\t@testP_reg" %}
++  ins_encode %{
++    __ testptr($src$$Register, $src$$Register, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++// This will generate a signed flags result. This should be OK since
++// any compare to a zero should be eq/neq.
++instruct testP_mem(rFlagsReg cr, memory op, immP0 zero)
++%{
++  predicate(!UseCompressedOops || (Universe::narrow_oop_base() != NULL));
++  match(Set cr (CmpP (LoadP op) zero));
++
++  ins_cost(500); // XXX
++  format %{ "testq   $op, 0xffffffffffffffff\t# ptr\t@testP_mem" %}
++  ins_encode %{
++    __ ldptr(rscratch2_AT, $op$$Address);
++    __ testptr(rscratch2_AT, 0xFFFFFFFF, $cr$$Register);
++    __ stop("0xFFFFFFFF or 0xffffffffffffffff jzy?");
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct testP_mem_reg0(rFlagsReg cr, memory mem, immP0 zero)
++%{
++  predicate(UseCompressedOops && (Universe::narrow_oop_base() == NULL) && (Universe::narrow_klass_base() == NULL));
++  match(Set cr (CmpP (LoadP mem) zero));
++
++  format %{ "cmpq    R0, $mem\t# ptr (r12_heapbase==0)\t@testP_mem_reg0" %}
++  ins_encode %{
++    __ ldptr(rscratch2_AT, $mem$$Address);
++    __ cmpptr(rscratch2_AT, 0, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct compN_rReg(rFlagsRegU cr, rRegN op1, rRegN op2)
++%{
++  match(Set cr (CmpN op1 op2));
++
++  format %{ "cmpw    $op1, $op2\t# compressed ptr\t@compN_rReg" %}
++  ins_encode %{ 
++    __ cmpw($op1$$Register, $op2$$Register, $cr$$Register); 
++  %}
++  //ins_pipe(ialu_cr_reg_reg);
++%}
++
++instruct compN_rReg_mem(rFlagsRegU cr, rRegN src, memory mem)
++%{
++  match(Set cr (CmpN src (LoadN mem)));
++
++  format %{ "cmpw    $src, $mem\t# compressed ptr\t@compN_rReg_mem" %}
++  ins_encode %{
++    __ ldwu(rscratch2_AT, $mem$$Address);
++    __ cmpw($src$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct compN_rReg_imm(rFlagsRegU cr, rRegN op1, immN op2) %{
++  match(Set cr (CmpN op1 op2));
++
++  format %{ "cmpw    $op1, $op2\t# compressed ptr\t@compN_rReg_imm" %}
++  ins_encode %{
++    __ cmp_narrow_oop($op1$$Register, (jobject)$op2$$constant, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compN_mem_imm(rFlagsRegU cr, memory mem, immN src)
++%{
++  match(Set cr (CmpN src (LoadN mem)));
++
++  format %{ "cmpw    $mem, $src\t# compressed ptr\t@compN_mem_imm" %}
++  ins_encode %{
++    __ cmp_narrow_oop($mem$$Address, (jobject)$src$$constant, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct compN_rReg_imm_klass(rFlagsRegU cr, rRegN op1, immNKlass op2) %{
++  match(Set cr (CmpN op1 op2));
++
++  format %{ "cmpw    $op1, $op2\t# compressed klass ptr\t@compN_rReg_imm_klass" %}
++  ins_encode %{
++    __ cmp_narrow_klass($op1$$Register, (Klass*)$op2$$constant, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compN_mem_imm_klass(rFlagsRegU cr, memory mem, immNKlass src)
++%{
++  match(Set cr (CmpN src (LoadNKlass mem)));
++
++  format %{ "cmpw    $mem, $src\t# compressed klass ptr\t@compN_mem_imm_klass" %}
++  ins_encode %{
++    __ cmp_narrow_klass($mem$$Address, (Klass*)$src$$constant, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct testN_reg(rFlagsReg cr, rRegN src, immN0 zero) %{
++  match(Set cr (CmpN src zero));
++
++  format %{ "testw   $src, $src\t# compressed ptr\t@testN_reg" %}
++  ins_encode %{ __ testw($src$$Register, $src$$Register, $cr$$Register); %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct testN_mem(rFlagsReg cr, memory mem, immN0 zero)
++%{
++  predicate(Universe::narrow_oop_base() != NULL);
++  match(Set cr (CmpN (LoadN mem) zero));
++
++  ins_cost(500); // XXX
++  format %{ "testw   $mem, 0xffffffff\t# compressed ptr\t@testN_mem" %}
++  ins_encode %{
++    __ ldwu(rscratch2_AT, $mem$$Address);
++    __ cmpw(rscratch2_AT, (int)0xFFFFFFFF, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct testN_mem_reg0(rFlagsReg cr, memory mem, immN0 zero)
++%{
++  predicate(Universe::narrow_oop_base() == NULL && (Universe::narrow_klass_base() == NULL));
++  match(Set cr (CmpN (LoadN mem) zero));
++
++  format %{ "cmpl    R12, $mem\t# compressed ptr (R12_heapbase==0)\t@testN_mem_reg0" %}
++  ins_encode %{
++    __ ldwu(rscratch2_AT, $mem$$Address);
++    __ cmpw(R0, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++// Yanked all unsigned pointer compare operations.
++// Pointer compares are done with CmpP which is already unsigned.
++
++instruct compL_rReg(rFlagsReg cr, rRegL op1, rRegL op2)
++%{
++  match(Set cr (CmpL op1 op2));
++
++  format %{ "cmpl    $op1, $op2\t@compL_rReg" %}
++  ins_encode %{
++    __ cmpl($op1$$Register, $op2$$Register, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_reg);
++%}
++
++instruct compL_rReg_imm(rFlagsReg cr, rRegL op1, immL32 op2)
++%{
++  match(Set cr (CmpL op1 op2));
++
++  format %{ "cmpl    $op1, $op2\t@compL_rReg_imm" %}
++  ins_encode %{
++    __ cmpl($op1$$Register, (int)$op2$$constant, $cr$$Register);
++    __ stop("immL32's length is OK? jzy");
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compL_rReg_mem(rFlagsReg cr, rRegL op1, memory op2)
++%{
++  match(Set cr (CmpL op1 (LoadL op2)));
++
++  format %{ "cmpl    $op1, $op2\t@compL_rReg_mem" %}
++  ins_encode %{
++    __ ldl(rscratch2_AT, $op2$$Address);
++    __ cmpl($op1$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct testL_reg(rFlagsReg cr, rRegL src, immL0 zero)
++%{
++  match(Set cr (CmpL src zero));
++
++  format %{ "testl   $src, $src\t@testL_reg" %}
++  ins_encode %{
++    __ testl($src$$Register, $src$$Register, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct testL_reg_imm(rFlagsReg cr, rRegL src, immL32 con, immL0 zero)
++%{
++  match(Set cr (CmpL (AndL src con) zero));
++
++  format %{ "testl   $src, $con\t# long\t@testL_reg_imm" %}
++  ins_encode %{
++    __ testl($src$$Register, (int)$con$$constant, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct testL_reg_mem(rFlagsReg cr, rRegL src, memory mem, immL0 zero)
++%{
++  match(Set cr (CmpL (AndL src (LoadL mem)) zero));
++
++  format %{ "testl   $src, $mem\t@testL_reg_mem" %}
++  ins_encode %{
++    __ ldl(rscratch2_AT, $mem$$Address);
++    __ testl($src$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct testL_reg_mem2(rFlagsReg cr, rRegP src, memory mem, immL0 zero)
++%{
++  match(Set cr (CmpL (AndL (CastP2X src) (LoadL mem)) zero));
++
++  format %{ "testl   $src, $mem\t@testL_reg_mem2" %}
++  ins_encode %{
++    __ ldl(rscratch2_AT, $mem$$Address);
++    __ testl($src$$Register, rscratch2_AT, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++*/
++// Manifest a CmpL result in an integer register. 
++// (src1 < src2) ? -1 : ((src1 > src2) ? 1 : 0)
++instruct cmpL3_reg_reg(rRegI dst, rRegL src1, rRegL src2) %{
++  match(Set dst (CmpL3 src1 src2));
++  ins_cost(1000);
++  format %{ "cmpL3  $dst, $src1, $src2 @ cmpL3_reg_reg" %}
++  ins_encode %{
++    Register opr1 = $src1$$Register;
++    Register opr2 = $src2$$Register;
++    Register dst  = $dst$$Register;
++
++    Label done;
++    __ subl(opr1, opr2, rscratch3);
++    __ subl(R0, 1, dst);
++    __ blt_l(rscratch3, done);
++
++    __ selgt(rscratch3, 1, R0, dst);
++    __ BIND(done);
++    
++//    __ cmpl(opr1, opr2, rcc);
++//    __ ldi(rscratch3, -1, R0);
++//    __ sellt(rcc, rscratch3, R0, dst);
++//    __ selgt(rcc, 1, dst, dst);
++    
++    
++//    Label done;
++//    __ cmplt(opr2, opr1, dst);
++//    __ jcc(Assembler::neq, done);
++//    __ cmpeq(opr1, opr2, rcc);
++//    __ ldi(rscratch3, -1);
++//    __ seleq(rcc, rscratch3, R0, dst);
++//    __ bind(done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++//
++// less_rsult     = -1
++// greater_result =  1
++// equal_result   =  0
++// nan_result     = -1
++//
++instruct cmpF3_reg_reg(rRegI dst, regF src1, regF src2, rFlagsReg cr) %{
++  match(Set dst (CmpF3 src1 src2));
++  effect(KILL cr);
++  
++  //ins_cost(1000);
++  format %{ "cmpF3  $dst, $src1, $src2 @ cmpF3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    Label Done;
++
++    __ ldi(dst, -1, R0);
++    __ c_ole_s(src2, src1);
++    __ fbeq(fcc, 4);
++
++    __ movl(dst, R0);
++    __ c_eq_s(src1, src2);
++    __ fbne(fcc, 1);
++    __ ldi(dst, 1, R0);
++
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++instruct cmpD3_reg_reg(rRegI dst, regD src1, regD src2, rFlagsReg cr) %{
++  match(Set dst (CmpD3 src1 src2));
++  effect(KILL cr);
++  
++  //ins_cost(1000);
++  format %{ "cmpD3  $dst, $src1, $src2 @ cmpD3_reg_reg" %}
++  ins_encode %{
++    FloatRegister src1 = as_FloatRegister($src1$$reg);
++    FloatRegister src2 = as_FloatRegister($src2$$reg);
++    Register dst = as_Register($dst$$reg);
++
++    Label Done;
++
++    __ ldi(dst, -1, R0);
++    __ c_ole_d(src2, src1);
++    __ fbeq(fcc, 4);
++
++    __ movl(dst, R0);
++    __ c_eq_d(src1, src2);
++    __ fbne(fcc, 1);
++    __ ldi(dst, 1, R0);
++    
++    __ bind(Done);
++  %}
++  ins_pipe( pipe_slow );
++%}
++
++// Unsigned long compare Instructions; really, same as signed long except they
++// produce an rFlagsRegU instead of rFlagsReg.
++/*instruct compUL_rReg(rFlagsRegU cr, rRegL op1, rRegL op2)
++%{
++  match(Set cr (CmpUL op1 op2));
++
++  format %{ "cmpq    $op1, $op2\t# unsigned\t@compUL_rReg" %}
++  ins_encode %{
++    __ cmpUL($op1$$Register, $op2$$Register, $cr$$Register);
++  %}
++    ins_pipe(ialu_regL_regL);
++%}*/
++
++/*Cmpxxx useless in SW64
++instruct compUL_rReg_imm(rFlagsRegU cr, rRegL op1, immL32 op2)
++%{
++  match(Set cr (CmpUL op1 op2));
++
++  format %{ "cmpl    $op1, $op2\t# unsigned\t@compUL_rReg_imm" %}
++  ins_encode %{
++    __ mov_immediate32(rscratch2_AT, (int)$op2$$constant);
++    __ cmpl($op1$$Register, rscratch2_AT, $cr$$Register);
++    __ stop("need unsigned edition of cmpw/cmpl? jzy");
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compUL_rReg_mem(rFlagsRegU cr, rRegL op1, memory op2)
++%{
++  match(Set cr (CmpUL op1 (LoadL op2)));
++
++  format %{ "cmpq    $op1, $op2\t# unsigned\t@compUL_rReg_mem" %}
++  ins_encode %{
++    __ ldl(rscratch2_AT, $op2$$Address);
++    __ cmpl($op1$$Register, rscratch2_AT, $cr$$Register);
++    __ stop("need unsigned edition of cmpw/cmpl? jzy");
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct testUL_reg(rFlagsRegU cr, rRegL src, immL0 zero)
++%{
++  match(Set cr (CmpUL src zero));
++
++  format %{ "testq   $src, $src\t# unsigned\t@testUL_reg" %}
++  ins_encode %{
++    __ testl($src$$Register, $src$$Register, $cr$$Register);
++  %}
++  //ins_pipe(ialu_cr_reg_imm);
++%}
++
++instruct compB_mem_imm(rFlagsReg cr, memory mem, immI8 imm)
++%{
++  match(Set cr (CmpI (LoadB mem) imm));
++
++  ins_cost(125);
++  format %{ "cmpb    $mem, $imm\t@compB_mem_imm" %}
++  ins_encode %{ 
++    __ cmpb($mem$$Address, $imm$$constant, $cr$$Register); 
++  %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}
++
++instruct testB_mem_imm(rFlagsReg cr, memory mem, immI8 imm, immI0 zero)
++%{
++  match(Set cr (CmpI (AndI (LoadB mem) imm) zero));
++
++  ins_cost(125);
++  format %{ "testb   $mem, $imm\t@testB_mem_imm" %}
++  ins_encode %{ __ testb($mem$$Address, $imm$$constant, $cr$$Register); %}
++  //ins_pipe(ialu_cr_reg_mem);
++%}*/
++
++//----------Max and Min--------------------------------------------------------
++// Min Instructions
++
++instruct minI_Reg_Reg(rRegI dst, rRegI src) %{
++  match(Set dst (MinI dst src));
++  //effect(KILL flags);
++  ins_cost(200);
++
++  format %{ "MIN    $dst, $src @minI_Reg_Reg" %}
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ cmplt(src, dst, rscratch3);
++    __ selne(rscratch3, src, dst, dst);
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++ 
++ // Max Register with Register (generic version)
++instruct maxI_Reg_Reg(rRegI dst, rRegI src) %{
++  match(Set dst (MaxI dst src));
++  ins_cost(80);
++
++  format %{ "MAX    $dst, $src @maxI_Reg_Reg" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++    Register src   = $src$$Register;
++
++    __ cmplt(dst, src, rscratch3);
++    __ selne(rscratch3, src, dst, dst);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++instruct maxI_Reg_zero(rRegI dst, immI0 zero) %{
++  match(Set dst (MaxI dst zero));
++  ins_cost(50);
++
++  format %{ "MAX    $dst, 0 @maxI_Reg_zero" %}
++
++  ins_encode %{
++    Register dst   = $dst$$Register;
++
++    __ cmplt(dst, R0, rscratch3);
++    __ selne(rscratch3, R0, dst, dst);
++
++  %}
++
++  ins_pipe( pipe_slow );
++%}
++
++// ============================================================================
++// Branch Instructions
++
++// Jump Direct - Label defines a relative address from JMP+1
++instruct jmpDir(label labl)
++%{
++  match(Goto);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "jmp     $labl\t@jmpDir" %}
++  //size(5);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jmp(*L); // Always long jump
++  %}
++  ins_pipe(pipe_jmp); //CHECK TODO djx
++  ins_pc_relative(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct  jmpLoopEnd(cmpOp cop, rRegI src1, rRegI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label     *L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++//    __ cmpl(op1, op2);
++//    __ jcc((Assembler::Condition)flag, *L);
++    __ cmpls(flag, op1, op2);
++    __ bne_l(rcc, *L);
++  %}
++  ins_pipe( pipe_jmp );
++  ins_pc_relative(1);
++%}
++
++ instruct  jmpLoopEnd_reg_imm16(cmpOp cop, rRegI src1, immI16_sub src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(150);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_imm_16_sub" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label     *L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    //__ ldi(rscratch1_GP, -1 * val, op1);
++    //__ jcc((Assembler::Condition)flag, *L, rscratch1_GP);
++    
++    __ cmpw(op1, val);
++    __ jcc((Assembler::Condition)flag, *L);
++  %}
++  ins_pipe( pipe_jmp );
++  ins_pc_relative(1);
++%}
++
++ instruct  jmpLoopEnd_reg_immI(cmpOp cop, rRegI src1, immI src2, label labl) %{
++  match(CountedLoopEnd cop (CmpI src1 src2));
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_immI" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = rscratch3;
++    Label     *L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ mov_immediate32s(op2, $src2$$constant);
++    __ cmpls(flag, op1, op2);
++    __ bne_l(rcc, *L);
++  %}
++  ins_pipe( pipe_jmp );
++  ins_pc_relative(1);
++%}
++
++ instruct jmpLoopEnd_reg_immI0(cmpOp cop, rRegI src1, immI0 src2, label labl) %{
++  match( CountedLoopEnd cop (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(170);
++  format %{ "J$cop  $src1, $src2,  $labl\t# Loop end @ jmpLoopEnd_reg_imm0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label     *L = $labl$$label;
++    int     flag = $cop$$cmpcode;
++
++    __ jcc((Assembler::Condition)flag, *L, op1);
++  %}
++
++  ins_pipe( pipe_jmp );
++  ins_pc_relative(1);
++//ZLONG  ins_short_branch(1);
++%}
++
++// This match pattern is created for StoreIConditional since I cannot match IfNode without a RegFlags!
++instruct jmpCon_flags(cmpOp cop, rFlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "J$cop    $labl  #sw64 uses GP as eflag @jmpCon_flags" %}
++
++  ins_encode %{
++    //__ stop("jmpCon_flags is for StoreIConditional?? lsp");
++    Label    *L = $labl$$label;
++    int    flag = $cop$$cmpcode;
++    __ jcc((Assembler::Condition)flag, *L);
++    /*switch((Assembler::Condition)flag)
++    {
++      case Assembler::equal: //equal
++        __ bne_l($cr$$Register, *L);
++        break; 
++      case Assembler::notEqual: //not equal
++        __ beq_l($cr$$Register, *L);
++        break;
++      default:
++        Unimplemented();
++    }*/
++  %}
++
++  ins_pipe( pipe_jmp );
++  ins_pc_relative(1);
++%}
++
++//SW64:OKOK:
++instruct branchConP_zero(cmpOpU cmp, rRegP op1, immP0 zero, label labl) %{
++  match(If cmp (CmpP op1 zero));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConP_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label   * L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ jcc((Assembler::Condition)flag, *L, op1);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConN2P_zero_short(cmpOpU cmp, rRegN op1, immP0 zero, label labl) %{
++  match(If cmp (CmpP (DecodeN op1) zero));
++  predicate(Universe::narrow_oop_base() == NULL && Universe::narrow_oop_shift() == 0);
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "b$cmp   $op1, R0, $labl #@branchConN2P_zero_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label   * L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++    //__ stop("why only use beq&bne? sny");
++    switch((Assembler::Condition)flag)
++    {
++      case Assembler::equal: //equal
++        __ beq_l(op1, *L);
++        break;
++      case Assembler::notEqual: //not_equal
++        __ bne_l(op1, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++
++instruct branchConP_short(cmpOpU cmp, rRegP op1, rRegP op2, label labl) %{
++  match(If cmp (CmpP op1 op2));
++//  predicate(can_branch_register(_kids[0]->_leaf, _kids[1]->_leaf));
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "b$cmp   $op1, $op2, $labl #@branchConP_short" %}
++
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label   * L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmplu(flag, op1, op2);
++    __ bne_l(rcc, *L);//TODO: add default rcc jzy
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct cmpN_null_branch_short(cmpOp cmp, rRegN op1, immN0 null, label labl) %{
++  match(If cmp (CmpN op1 null));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,0\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_null_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = R0;
++    Label   * L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ jcc((Assembler::Condition)flag, *L, op1);
++  %}
++
++//TODO: pipe_branchP or create pipe_branchN LEE
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct cmpN_reg_branch_short(cmpOpU cmp, rRegN op1, rRegN op2, label labl) %{
++  match(If cmp (CmpN op1 op2));
++  effect(USE labl);
++
++  ins_cost(180);
++  format %{ "CMP    $op1,$op2\t! compressed ptr\n\t"
++            "BP$cmp   $labl @ cmpN_reg_branch_short" %}
++  ins_encode %{
++    Register op1 = $op1$$Register;
++    Register op2 = $op2$$Register;
++    Label   * L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmplu(flag, op1, op2);
++    __ bne_l(rcc, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_reg_short(cmpOpU cmp, rRegI src1, rRegI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++//    __ stop("check if op1 & op2 are unsigned //sny");
++    __ cmplu((Assembler::Condition)flag, op1, op2);
++    __ bne_l(rcc, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++
++instruct branchConIU_reg_imm_short(cmpOpU cmp, rRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpU src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_imm_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ mov_immediate32s(rscratch3, val);
++    __ cmplu((Assembler::Condition)flag, op1, rscratch3);
++    __ bne_l(rcc, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_reg_short(cmpOp cmp, rRegI src1, rRegI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_reg_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Register op2 = $src2$$Register;
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ cmpls(flag, op1, op2);
++    __ bne_l(rcc, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConI_reg_imm0_short(cmpOp cmp, rRegI src1, immI0 src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++  ins_cost(20);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ jcc((Assembler::Condition)flag, *L, op1);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_imm_short(cmpOp cmp, rRegI src1, immI src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++//  ins_cost(200);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm_short" %}
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label    * L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ mov_immediate32s(rscratch3, val);
++//    __ cmpl(op1, rscratch3);
++//    __ jcc((Assembler::Condition)flag, *L);
++    __ cmpls(flag, op1, rscratch3);
++    __ bne_l(rcc, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConIU_reg_imm0_short(cmpOpU cmp, rRegI src1, immI0 zero, label labl) %{
++  match( If cmp (CmpU src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConIU_reg_imm0_short" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++//    __ stop("is below necessary? sny");
++    switch((Assembler::Condition)flag) {
++      case Assembler::equal: //equal
++        __ beq_l(op1, *L);
++        break;
++      case Assembler::notEqual: //not_equal
++        __ bne_l(op1, *L);
++        break;
++      case Assembler::above: //above
++        __ bne_l(op1, *L);
++        break;
++      case Assembler::aboveEqual: //above_equal
++        __ beq_l(R0, *L);
++        break; 
++      case Assembler::below: //below
++        Unimplemented();
++        return;
++        break;
++      case Assembler::belowEqual: //below_equal
++        __ beq_l(op1, *L);
++        break;
++      default:
++        Unimplemented();
++    }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++//instruct branchConIU_reg_immI16_short(cmpOpU cmp, rRegI src1, immI16_sub src2, label labl) %{
++//  match( If cmp (CmpU src1 src2) );
++//  effect(USE labl);
++//  ins_cost(180);
++//  format %{ "BR$cmp   $src1, $src2, $labl #@branchConIU_reg_immI16_short" %}
++//
++//  ins_encode %{
++//    Register op1 = $src1$$Register;
++//    int      val = $src2$$constant;
++//    Label     &L = *($labl$$label);
++//    int     flag = $cmp$$cmpcode;
++//
++//    __ ldi(AT, -1 * val, op1);
++//    switch(flag) {
++//      case 0x04: //equal
++//       if (&L)
++//         __ beq_l(AT, L);
++//       else
++//         __ beq(AT, (int)0);
++//        break;
++//      case 0x05: //not_equal
++//       if (&L)
++//         __ bne_l(AT, L);
++//       else
++//         __ bne(AT, (int)0);
++//        break;
++//      case 0x0F: //above
++//        if(&L)
++//          __ bgt_l(AT, L);
++//        else
++//          __ bgt(AT, (int)0);
++//        break;
++//      case 0x0D: //above_equal
++//        if(&L)
++//          __ bge_l(AT, L);
++//        else
++//          __ bge(AT, (int)0);
++//        break;
++//      case 0x0C: //below
++//        if(&L)
++//          __ blt_l(AT, L);
++//        else
++//          __ blt(AT, (int)0);
++//        break;
++//      case 0x0E: //below_equal
++//        if(&L)
++//          __ ble_l(AT, L);
++//        else
++//          __ ble(AT, (int)0);
++//        break;
++//      default:
++//        Unimplemented();
++//    }
++//  %}
++//
++//  ins_pc_relative(1);
++//  ins_pipe( pipe_alu_branch );
++////ZLONG  ins_short_branch(1);
++//%}
++
++instruct branchConL_regL_regL_short(cmpOp cmp, rRegL src1, rRegL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_regL_short" %}
++//  ins_cost(250);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = as_Register($src2$$reg);
++
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++//    __ cmpl(op1, op2);
++//    __ jcc((Assembler::Condition)flag, *L);
++    __ cmpls(flag, op1, op2);
++    __ bne_l(rcc, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConI_reg_imm16_sub(cmpOp cmp, rRegI src1, immI16_sub src2, label labl) %{
++  match( If cmp (CmpI src1 src2) );
++  effect(USE labl);
++//  ins_cost(180);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConI_reg_imm16_sub" %}
++
++  ins_encode %{
++    Register op1 = $src1$$Register;
++    int      val = $src2$$constant;
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    //__ ldi(rscratch1_GP, -1 * val, op1);
++    //__ jcc((Assembler::Condition)flag, *L, rscratch1_GP);
++    
++    __ cmpw(op1, val);
++    __ jcc((Assembler::Condition)flag, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++%}
++
++instruct branchConL_regL_immL0_short(cmpOp cmp, rRegL src1, immL0 zero, label labl) %{
++  match( If cmp (CmpL src1 zero) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, zero, $labl #@branchConL_regL_immL0_short" %}
++  ins_cost(80);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Label   * L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ jcc((Assembler::Condition)flag, *L, op1);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConL_regL_immL_short(cmpOp cmp, rRegL src1, immL src2, label labl) %{
++  match( If cmp (CmpL src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConL_regL_immL_short" %}
++//  ins_cost(100);
++
++  ins_encode %{
++    Register op1 = as_Register($src1$$reg);
++    Register op2 = rscratch2_AT;
++
++    Label   * L  = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    __ mov_immediate64(op2, $src2$$constant);
++//    __ cmpl(op1, op2);
++//    __ jcc((Assembler::Condition)flag, *L);
++      __ cmpls(flag, op1, op2);
++      __ bne_l(rcc, *L);
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe( pipe_alu_branch );
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConF_reg_reg_short(cmpOp cmp, regF src1, regF src2, label labl) %{
++  match( If cmp (CmpF src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConF_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister op1 = $src1$$FloatRegister;
++    FloatRegister op2 = $src2$$FloatRegister;
++    Label    * L =  $labl$$label;
++    int     flag = $cmp$$cmpcode;
++
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ ffbne(FcmpRES, *L);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ ffbeq(FcmpRES, *L);
++      break;
++    case Assembler::greater:
++      __ c_olt_s(op2, op1);
++      __ ffbne(FcmpRES, *L);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_s(op2, op1);
++      __ ffbne(FcmpRES, *L);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_s(op2, op1);
++        __ ffbeq(FcmpRES, *L);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_s(op2, op1);
++        __ ffbeq(FcmpRES, *L);
++      break;
++  }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++//ZLONG  ins_short_branch(1);
++%}
++
++instruct branchConD_reg_reg_short(cmpOp cmp, regD src1, regD src2, label labl) %{
++  match( If cmp (CmpD src1 src2) );
++  effect(USE labl);
++  format %{ "BR$cmp   $src1, $src2, $labl #@branchConD_reg_reg_short" %}
++
++  ins_encode %{
++    FloatRegister op1 = $src1$$FloatRegister;
++    FloatRegister op2 = $src2$$FloatRegister;
++    Label    * L = $labl$$label;
++    int     flag = $cmp$$cmpcode;
++    
++    switch((Assembler::Condition)flag) {
++    case Assembler::equal:
++      __ fcmpeq(op1, op2, fcc);
++      __ ffbne(FcmpRES, *L);
++      break;
++    case Assembler::notEqual:
++      __ fcmpeq(op1, op2, fcc);
++      __ ffbeq(FcmpRES, *L);
++      break;
++    case Assembler::greater:
++      __ c_olt_d(op2, op1);
++      __ ffbne(FcmpRES, *L);
++      break;
++    case Assembler::greaterEqual:
++      __ c_ole_d(op2, op1);
++      __ ffbne(FcmpRES, *L);
++      break;
++    case Assembler::less:
++      __ block_comment("less;;");
++        __ c_ole_d(op2, op1);
++        __ ffbeq(FcmpRES, *L);
++      break;
++    case Assembler::lessEqual:
++      __ block_comment("lessEqual;;");
++        __ c_olt_d(op2, op1);
++        __ ffbeq(FcmpRES, *L);
++      break;
++  }
++  %}
++
++  ins_pc_relative(1);
++  ins_pipe(pipe_slow);
++//ZLONG  ins_short_branch(1);
++%}
++
++// mask version
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++/* TODO:jzy what's n->has_vector_mask_set()?
++instruct jmpLoopEnd_and_restoreMask(cmpOp cop, rFlagsReg cr, label labl)
++%{
++  predicate(n->has_vector_mask_set());
++  match(CountedLoopEnd cop cr);
++  effect(USE labl);
++
++  ins_cost(400);
++  format %{ "j$cop     $labl\t# loop end\n\t"
++            "restorevectmask \t# vector mask restore for loops" %}
++  size(10);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, cr); // Always long jump
++    __ stop("jmpLoopEnd_and_restoreMask stop");
++    //__ restorevectmask();
++  %}
++  //ins_pipe(pipe_jcc);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, rFlagsRegU cmp, label labl) %{
++  predicate(n->has_vector_mask_set());
++  match(CountedLoopEnd cop cmp);
++  effect(USE labl);
++
++  ins_cost(400);
++  format %{ "j$cop,u   $labl\t# loop end\n\t"
++            "restorevectmask \t# vector mask restore for loops" %}
++  size(10);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
++    __ restorevectmask();
++  %}
++  ins_pipe(pipe_jcc);
++%}
++
++instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
++  predicate(n->has_vector_mask_set());
++  match(CountedLoopEnd cop cmp);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,u   $labl\t# loop end\n\t"
++            "restorevectmask \t# vector mask restore for loops" %}
++  size(10);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
++    __ restorevectmask();
++  %}
++  ins_pipe(pipe_jcc);
++%}*/
++
++// Jump Direct Conditional - using unsigned comparison
++instruct jmpConU(cmpOpU cop, rFlagsRegU cmp, label labl) %{
++  match(If cop cmp);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,us  $labl\t@jmpConU" %}
++//  size(20);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jccb((Assembler::Condition)($cop$$cmpcode), *L); // Always long jump
++  %}
++  ins_pipe( pipe_jmp );
++//  ins_pc_relative(1);
++%}
++/*
++instruct jmpConUCF(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
++  match(If cop cmp);
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ "j$cop,u  $labl" %}
++  size(6);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
++  %}
++  ins_pipe(pipe_jcc);
++%}
++
++instruct jmpConUCF2(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
++  match(If cop cmp);
++  effect(USE labl);
++
++  ins_cost(200);
++  format %{ $$template
++    if ($cop$$cmpcode == Assembler::notEqual) {
++      $$emit$$"jp,u   $labl\n\t"
++      $$emit$$"j$cop,u   $labl"
++    } else {
++      $$emit$$"jp,u   done\n\t"
++      $$emit$$"j$cop,u   $labl\n\t"
++      $$emit$$"done:"
++    }
++  %}
++  ins_encode %{
++    Label* l = $labl$$label;
++    if ($cop$$cmpcode == Assembler::notEqual) {
++      __ jcc(Assembler::parity, *l, false);
++      __ jcc(Assembler::notEqual, *l, false);
++    } else if ($cop$$cmpcode == Assembler::equal) {
++      Label done;
++      __ jccb(Assembler::parity, done);
++      __ jcc(Assembler::equal, *l, false);
++      __ bind(done);
++    } else {
++       ShouldNotReachHere();
++    }
++  %}
++  ins_pipe(pipe_jcc);
++%}
++*/
++/*
++// ============================================================================
++// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary
++// superklass array for an instance of the superklass.  Set a hidden
++// internal cache on a hit (cache is checked with exposed code in
++// gen_subtype_check()).  Return NZ for a miss or zero for a hit.  The
++// encoding ALSO sets flags.
++
++instruct partialSubtypeCheck(rdi_RegP result,
++                             rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
++                             rFlagsReg cr)
++%{
++  match(Set result (PartialSubtypeCheck sub super));
++  effect(KILL rcx, KILL cr);
++
++  ins_cost(1100);  // slightly larger than the next version
++  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
++            "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
++            "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
++            "repne   scasq\t# Scan *rdi++ for a match with rax while rcx--\n\t"
++            "jne,s   miss\t\t# Missed: rdi not-zero\n\t"
++            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
++            "xorq    $result, $result\t\t Hit: rdi zero\n\t"
++    "miss:\t" %}
++
++  opcode(0x1); // Force a XOR of RDI
++  ins_encode(enc_PartialSubtypeCheck());
++  ins_pipe(pipe_slow);
++%}
++
++instruct partialSubtypeCheck_vs_Zero(rFlagsReg cr,
++                                     rsi_RegP sub, rax_RegP super, rcx_RegI rcx,
++                                     immP0 zero,
++                                     rdi_RegP result)
++%{
++  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
++  effect(KILL rcx, KILL result);
++
++  ins_cost(1000);
++  format %{ "movq    rdi, [$sub + in_bytes(Klass::secondary_supers_offset())]\n\t"
++            "movl    rcx, [rdi + Array<Klass*>::length_offset_in_bytes()]\t# length to scan\n\t"
++            "addq    rdi, Array<Klass*>::base_offset_in_bytes()\t# Skip to start of data; set NZ in case count is zero\n\t"
++            "repne   scasq\t# Scan *rdi++ for a match with rax while cx-- != 0\n\t"
++            "jne,s   miss\t\t# Missed: flags nz\n\t"
++            "movq    [$sub + in_bytes(Klass::secondary_super_cache_offset())], $super\t# Hit: update cache\n\t"
++    "miss:\t" %}
++
++  opcode(0x0); // No need to XOR RDI
++  ins_encode(enc_PartialSubtypeCheck());
++  ins_pipe(pipe_slow);
++%}
++*/
++// ============================================================================
++// Branch Instructions -- short offset versions
++//
++// These instructions are used to replace jumps of a long offset (the default
++// match) with jumps of a shorter offset.  These instructions are all tagged
++// with the ins_short_branch attribute, which causes the ADLC to suppress the
++// match rules in general matching.  Instead, the ADLC generates a conversion
++// method in the MachNode which can be used to do in-place replacement of the
++// long variant with the shorter variant.  The compiler will determine if a
++// branch can be taken by the is_short_branch_offset() predicate in the machine
++// specific code section of the file.
++
++// Jump Direct - Label defines a relative address from JMP+1
++instruct jmpDir_short(label labl) %{
++  match(Goto);
++  effect(USE labl);
++  ins_cost(300);
++  format %{ "JMP    $labl #@jmpDir" %}
++  ins_encode %{
++    Label &L = *($labl$$label);
++    if(&L)
++       __ beq_l(R0, L);
++    else
++      ShouldNotReachHere();
++  %}
++    ins_pipe( pipe_jmp );
++    ins_pc_relative(1);
++%}
++
++/*
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct jmpCon_short(cmpOp cop, rFlagsReg cr, label labl) %{
++  match(If cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,s   $labl" %}
++  size(2);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
++  %}
++  ins_pipe(pipe_jcc);
++  ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct jmpLoopEnd_short(cmpOp cop, rFlagsReg cr, label labl) %{
++  match(CountedLoopEnd cop cr);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,s   $labl\t# loop end" %}
++  size(2);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
++  %}
++  ins_pipe(pipe_jcc);
++  ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - Label defines a relative address from Jcc+1
++instruct jmpLoopEndU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
++  match(CountedLoopEnd cop cmp);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,us  $labl\t# loop end" %}
++  size(2);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
++  %}
++  ins_pipe(pipe_jcc);
++  ins_short_branch(1);
++%}
++
++instruct jmpLoopEndUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
++  match(CountedLoopEnd cop cmp);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,us  $labl\t# loop end" %}
++  size(2);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
++  %}
++  ins_pipe(pipe_jcc);
++  ins_short_branch(1);
++%}
++
++// Jump Direct Conditional - using unsigned comparison
++instruct jmpConU_short(cmpOpU cop, rFlagsRegU cmp, label labl) %{
++  match(If cop cmp);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,us  $labl" %}
++  size(2);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
++  %}
++  ins_pipe(pipe_jcc);
++  ins_short_branch(1);
++%}
++
++instruct jmpConUCF_short(cmpOpUCF cop, rFlagsRegUCF cmp, label labl) %{
++  match(If cop cmp);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ "j$cop,us  $labl" %}
++  size(2);
++  ins_encode %{
++    Label* L = $labl$$label;
++    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
++  %}
++  ins_pipe(pipe_jcc);
++  ins_short_branch(1);
++%}
++
++instruct jmpConUCF2_short(cmpOpUCF2 cop, rFlagsRegUCF cmp, label labl) %{
++  match(If cop cmp);
++  effect(USE labl);
++
++  ins_cost(300);
++  format %{ $$template
++    if ($cop$$cmpcode == Assembler::notEqual) {
++      $$emit$$"jp,u,s   $labl\n\t"
++      $$emit$$"j$cop,u,s   $labl"
++    } else {
++      $$emit$$"jp,u,s   done\n\t"
++      $$emit$$"j$cop,u,s  $labl\n\t"
++      $$emit$$"done:"
++    }
++  %}
++  size(4);
++  ins_encode %{
++    Label* l = $labl$$label;
++    if ($cop$$cmpcode == Assembler::notEqual) {
++      __ jccb(Assembler::parity, *l);
++      __ jccb(Assembler::notEqual, *l);
++    } else if ($cop$$cmpcode == Assembler::equal) {
++      Label done;
++      __ jccb(Assembler::parity, done);
++      __ jccb(Assembler::equal, *l);
++      __ bind(done);
++    } else {
++       ShouldNotReachHere();
++    }
++  %}
++  ins_pipe(pipe_jcc);
++  ins_short_branch(1);
++%}
++*/
++
++// ============================================================================
++// inlined locking and unlocking
++/*
++instruct cmpFastLockRTM(rFlagsReg cr, rRegP object, rbx_RegP box, rax_RegI tmp, rdx_RegI scr, rRegI cx1, rRegI cx2) %{
++  predicate(Compile::current()->use_rtm());
++  match(Set cr (FastLock object box));
++  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
++  ins_cost(300);
++  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
++  ins_encode %{
++    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
++                 $scr$$Register, $cx1$$Register, $cx2$$Register,
++                 _counters, _rtm_counters, _stack_rtm_counters,
++                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
++                 true, ra_->C->profile_rtm());
++  %}
++  ins_pipe(pipe_slow);
++%}*/
++instruct cmpFastLock(rFlagsReg cr, rRegP object, s3_RegP box, v0_RegI tmp, rRegP scr) %{
++  //predicate(!Compile::current()->use_rtm());
++  match(Set cr (FastLock object box));//TODO:check where use cr? jzy
++  effect(TEMP tmp, TEMP scr, USE_KILL box);
++  ins_cost(300);
++  format %{ "fastlock $object,$box\t! kills $box,$tmp,$scr\t@cmpFastLock" %}
++  ins_encode %{
++    //__ stop("need check parameters and implements. jzy?");
++    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
++                 $scr$$Register, noreg, noreg, _counters, NULL, false, false);
++  %}
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++instruct cmpFastUnlock(rFlagsReg cr, rRegP object, v0_RegP box, rRegP tmp) %{
++  match(Set cr (FastUnlock object box));
++  effect(TEMP tmp, USE_KILL box);
++  ins_cost(300);
++  format %{ "fastunlock $object,$box\t! kills $box,$tmp\t@cmpFastUnlock" %}
++  ins_encode %{
++    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, false);
++  %}
++  ins_pipe( pipe_slow );
++  ins_pc_relative(1);
++%}
++
++
++// ============================================================================
++// Safepoint Instructions
++/* no need in sw8
++instruct safePoint_poll(rFlagsReg cr)
++%{
++  predicate(!Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
++  match(SafePoint);
++  effect(KILL cr);
++
++  format %{ "testl  rax, [rip + #offset_to_poll_page]\t"
++            "# Safepoint: poll for GC" %}
++  ins_cost(125);
++  ins_encode %{
++    //AddressLiteral addr(os::get_polling_page(), relocInfo::poll_type);
++    //__ testl(rax, addr);
++    __ stop("safePoint_poll unimplement jzy?");
++  %}
++  ins_pipe(ialu_reg_mem);
++%}*/
++
++instruct safePoint_poll_far(rFlagsReg cr, rRegP poll)
++%{
++//  predicate(Assembler::is_polling_page_far() && SafepointMechanism::uses_global_page_poll());
++  match(SafePoint poll);
++  effect(USE poll);
++
++  ins_cost(125);
++  format %{ "Safepoint @ [$poll] : poll for GC @ safePoint_poll_reg" %}
++
++  ins_encode %{
++    Register poll_reg = $poll$$Register;
++
++    __ block_comment("Safepoint:");
++    __ relocate(relocInfo::poll_type);
++    __ ldw(rscratch3, 0, poll_reg);
++  %}
++  ins_pipe(ialu_reg_mem);
++%}
++/*
++instruct safePoint_poll_tls(rFlagsReg cr, t0_RegP poll)
++%{
++  predicate(SafepointMechanism::uses_thread_local_poll());
++  match(SafePoint poll);
++  effect(KILL cr, USE poll);
++
++  format %{ "testl  rax, [$poll]\t"
++            "# Safepoint: poll for GC\t@safePoint_poll_tls why rax? jzy" %}
++  ins_cost(125);
++//  size(3); 
++  ins_encode %{
++    //__ relocate(relocInfo::poll_type);
++    //address pre_pc = __ pc();
++    //__ testl(rax, Address($poll$$Register, 0));
++    //address post_pc = __ pc();
++    //guarantee(pre_pc[0] == 0x41 && pre_pc[1] == 0x85, "must emit #rex test-ax [reg]");
++    __ stop("safePoint_poll_tls unimplement jzy?");
++  %}
++  //ins_pipe(ialu_reg_mem);
++%}
++*/
++// ============================================================================
++// Procedure Call/Return Instructions
++// Call Java Static Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallStaticJavaDirect(method meth) %{
++  match(CallStaticJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "call,static\t@CallStaticJavaDirect" %}
++
++  ins_encode(Java_Static_Call(meth), call_epilog);
++  ins_pipe(pipe_slow);
++  ins_alignment(4);
++%}
++
++// Call Java Dynamic Instruction
++// Note: If this code changes, the corresponding ret_addr_offset() and
++//       compute_padding() functions will have to be adjusted.
++instruct CallDynamicJavaDirect(method meth)
++%{
++  match(CallDynamicJava);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "movq    v0, #Universe::non_oop_word()\t@CallDynamicJavaDirect\n\t"
++            "call,dynamic " %}
++  ins_encode(Java_Dynamic_Call(meth), call_epilog);
++  ins_pipe(pipe_slow);
++  ins_alignment(4);
++%}
++
++// Call Runtime Instruction
++instruct CallRuntimeDirect(method meth)
++%{
++  match(CallRuntime);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "call,runtime\t@CallRuntimeDirect" %}
++  ins_encode(sw64_Java_To_Runtime(meth));
++  ins_pipe(pipe_slow);
++//  ins_alignment(16);//lsp todo check
++%}
++
++// Call runtime without safepoint
++instruct CallLeafDirect(method meth)
++%{
++  match(CallLeaf);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "call_leaf,runtime\t@CallLeafDirect" %}
++  ins_encode(sw64_Java_To_Runtime(meth));
++  ins_pipe(pipe_slow);
++  ins_pc_relative(1);//lsp todo check
++//  ins_alignment(16);//lsp todo check
++%}
++
++// Call runtime without safepoint
++instruct CallLeafNoFPDirect(method meth)
++%{
++  match(CallLeafNoFP);
++  effect(USE meth);
++
++  ins_cost(300);
++  format %{ "call_leaf_nofp,runtime\t@CallLeafNoFPDirect" %}
++  ins_encode(sw64_Java_To_Runtime(meth));
++  ins_pipe(pipe_slow);
++  ins_pc_relative(1);//lsp todo check
++//  ins_alignment(16);
++%}
++
++// Return Instruction
++// Remove the return address & jump to it.
++// Notice: We always emit a nop after a ret to make sure there is room
++// for safepoint patching
++instruct Ret()
++%{
++  match(Return);
++
++  format %{ "ret\t@Ret" %}
++
++  ins_encode %{
++    __ ret_sw();
++  %}
++  ins_pipe(pipe_jmp);
++%}
++
++// Tail Call; Jump from runtime stub to Java code.
++// Also known as an 'interprocedural jump'.
++// Target of jump will eventually return to caller.
++// TailJump below removes the return address.
++instruct TailCalljmpInd(rRegP jump_target, rRegP method_oop)
++%{
++  match(TailCall jump_target method_oop);
++
++  ins_cost(300);
++  format %{ "jmp     $jump_target\t# rmethod: holds method oop\t@TailCalljmpInd" %}
++
++  ins_encode %{
++    Register target = $jump_target$$Register;
++    Register    oop = $method_oop$$Register;
++    //__ stop("check parameters jzy?");
++    // RA will be used in generate_forward_exception()
++//    __ push(RA);
++
++    __ movl(rmethod, oop);
++    __ jmp(target);
++    //__ stop("check parameters jzy?");
++  %}
++  ins_pipe(pipe_jmp);
++%}
++
++// Tail Jump; remove the return address; jump to target.
++// TailCall above leaves the return address around.
++instruct tailjmpInd(rRegP jump_target, v0_RegP ex_oop)
++%{
++  match(TailJump jump_target ex_oop);
++
++  ins_cost(300);
++  format %{ "Jmp     $jump_target  ; ex_oop = $ex_oop\t@tailjmpInd" %}
++  ins_encode %{
++    // V0, c_rarg2 are indicated in:
++    //      [stubGenerator_sw64.cpp] generate_forward_exception()
++    //      [runtime_sw64.cpp] OptoRuntime::generate_exception_blob()
++    //
++    Register target = $jump_target$$Register;
++    Register oop  = $ex_oop$$Register;
++    //Register exception_oop = V0;
++    Register exception_pc = c_rarg2;
++    //__ stop("check  tailjmpInd lsp");
++    __ block_comment(";;tailjmpInd start");
++    __ movl(exception_pc, RA);
++    //__ movl(rax, oop); // oop is same as exception_oop,both are v0
++    __ jmp(target);
++    %}
++  ins_pipe(pipe_jmp);
++%}
++
++// Create exception oop: created by stack-crawling runtime code.
++// Created exception is now available to this handler, and is setup
++// just prior to jumping to this handler.  No code emitted.
++instruct CreateException(v0_RegP ex_oop)
++%{
++  match(Set ex_oop (CreateEx));
++
++  size(0);
++  // use the following format syntax
++  format %{ "# exception oop is in v0; no code emitted" %}
++  ins_encode();
++  ins_pipe(empty);
++%}
++
++// Rethrow exception:
++// The exception oop will come in the first argument position.
++// Then JUMP (not call) to the rethrow stub code.
++instruct RethrowException()
++%{
++  match(Rethrow);
++
++  // use the following format syntax
++  format %{ "jmp     rethrow_stub\t@RethrowException" %}
++
++  ins_encode %{
++    //__ stop("check RethrowException  lsp");
++    __ block_comment("@ RethrowException");
++    cbuf.set_insts_mark();
++    cbuf.relocate(cbuf.insts_mark(), runtime_call_Relocation::spec());
++
++    // call OptoRuntime::rethrow_stub to get the exception handler in parent method
++    __ patchable_jump((address)OptoRuntime::rethrow_stub());
++  %}
++  ins_pipe(pipe_jmp);
++%}
++
++instruct ShouldNotReachHere( )
++%{
++  match(Halt);
++  ins_cost(300);
++
++  // Use the following format syntax
++  format %{ "ILLTRAP   ;#@ShouldNotReachHere" %}
++  ins_encode %{
++    // Here we should emit illtrap !
++
++    __ stop("in ShoudNotReachHere");
++
++  %}
++  ins_pipe( pipe_jmp );
++%}
++/*
++//
++// Execute ZGC load barrier (strong) slow path
++//
++
++// When running without XMM regs
++instruct loadBarrierSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
++
++  match(Set dst (LoadBarrierSlowReg mem));
++  predicate(MaxVectorSize < 16);
++
++  effect(DEF dst, KILL cr);
++
++  format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
++  ins_encode %{
++#if INCLUDE_ZGC
++    Register d = $dst$$Register;
++    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
++
++    assert(d != r12, "Can't be R12!");
++    assert(d != r15, "Can't be R15!");
++    assert(d != rsp, "Can't be RSP!");
++
++    __ lea(d, $mem$$Address);
++    __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
++#else
++    ShouldNotReachHere();
++#endif
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// For XMM and YMM enabled processors
++instruct loadBarrierSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
++                                     rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
++                                     rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
++                                     rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
++                                     rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
++
++  match(Set dst (LoadBarrierSlowReg mem));
++  predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16));
++
++  effect(DEF dst, KILL cr,
++         KILL x0, KILL x1, KILL x2, KILL x3,
++         KILL x4, KILL x5, KILL x6, KILL x7,
++         KILL x8, KILL x9, KILL x10, KILL x11,
++         KILL x12, KILL x13, KILL x14, KILL x15);
++
++  format %{"LoadBarrierSlowRegXmm $dst, $mem" %}
++  ins_encode %{
++#if INCLUDE_ZGC
++    Register d = $dst$$Register;
++    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
++
++    assert(d != r12, "Can't be R12!");
++    assert(d != r15, "Can't be R15!");
++    assert(d != rsp, "Can't be RSP!");
++
++    __ lea(d, $mem$$Address);
++    __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
++#else
++    ShouldNotReachHere();
++#endif
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// For ZMM enabled processors
++instruct loadBarrierSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
++                               rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
++                               rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
++                               rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
++                               rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
++                               rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
++                               rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
++                               rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
++                               rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
++
++  match(Set dst (LoadBarrierSlowReg mem));
++  predicate((UseAVX == 3) && (MaxVectorSize >= 16));
++
++  effect(DEF dst, KILL cr,
++         KILL x0, KILL x1, KILL x2, KILL x3,
++         KILL x4, KILL x5, KILL x6, KILL x7,
++         KILL x8, KILL x9, KILL x10, KILL x11,
++         KILL x12, KILL x13, KILL x14, KILL x15,
++         KILL x16, KILL x17, KILL x18, KILL x19,
++         KILL x20, KILL x21, KILL x22, KILL x23,
++         KILL x24, KILL x25, KILL x26, KILL x27,
++         KILL x28, KILL x29, KILL x30, KILL x31);
++
++  format %{"LoadBarrierSlowRegZmm $dst, $mem" %}
++  ins_encode %{
++#if INCLUDE_ZGC
++    Register d = $dst$$Register;
++    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
++
++    assert(d != r12, "Can't be R12!");
++    assert(d != r15, "Can't be R15!");
++    assert(d != rsp, "Can't be RSP!");
++
++    __ lea(d, $mem$$Address);
++    __ call(RuntimeAddress(bs->load_barrier_slow_stub(d)));
++#else
++    ShouldNotReachHere();
++#endif
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++//
++// Execute ZGC load barrier (weak) slow path
++//
++
++// When running without XMM regs
++instruct loadBarrierWeakSlowRegNoVec(rRegP dst, memory mem, rFlagsReg cr) %{
++
++  match(Set dst (LoadBarrierSlowReg mem));
++  predicate(MaxVectorSize < 16);
++
++  effect(DEF dst, KILL cr);
++
++  format %{"LoadBarrierSlowRegNoVec $dst, $mem" %}
++  ins_encode %{
++#if INCLUDE_ZGC
++    Register d = $dst$$Register;
++    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
++
++    assert(d != r12, "Can't be R12!");
++    assert(d != r15, "Can't be R15!");
++    assert(d != rsp, "Can't be RSP!");
++
++    __ lea(d, $mem$$Address);
++    __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
++#else
++    ShouldNotReachHere();
++#endif
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// For XMM and YMM enabled processors
++instruct loadBarrierWeakSlowRegXmmAndYmm(rRegP dst, memory mem, rFlagsReg cr,
++                                         rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
++                                         rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
++                                         rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
++                                         rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15) %{
++
++  match(Set dst (LoadBarrierWeakSlowReg mem));
++  predicate((UseSSE > 0) && (UseAVX <= 2) && (MaxVectorSize >= 16));
++
++  effect(DEF dst, KILL cr,
++         KILL x0, KILL x1, KILL x2, KILL x3,
++         KILL x4, KILL x5, KILL x6, KILL x7,
++         KILL x8, KILL x9, KILL x10, KILL x11,
++         KILL x12, KILL x13, KILL x14, KILL x15);
++
++  format %{"LoadBarrierWeakSlowRegXmm $dst, $mem" %}
++  ins_encode %{
++#if INCLUDE_ZGC
++    Register d = $dst$$Register;
++    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
++
++    assert(d != r12, "Can't be R12!");
++    assert(d != r15, "Can't be R15!");
++    assert(d != rsp, "Can't be RSP!");
++
++    __ lea(d,$mem$$Address);
++    __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
++#else
++    ShouldNotReachHere();
++#endif
++  %}
++  ins_pipe(pipe_slow);
++%}
++
++// For ZMM enabled processors
++instruct loadBarrierWeakSlowRegZmm(rRegP dst, memory mem, rFlagsReg cr,
++                                   rxmm0 x0, rxmm1 x1, rxmm2 x2,rxmm3 x3,
++                                   rxmm4 x4, rxmm5 x5, rxmm6 x6, rxmm7 x7,
++                                   rxmm8 x8, rxmm9 x9, rxmm10 x10, rxmm11 x11,
++                                   rxmm12 x12, rxmm13 x13, rxmm14 x14, rxmm15 x15,
++                                   rxmm16 x16, rxmm17 x17, rxmm18 x18, rxmm19 x19,
++                                   rxmm20 x20, rxmm21 x21, rxmm22 x22, rxmm23 x23,
++                                   rxmm24 x24, rxmm25 x25, rxmm26 x26, rxmm27 x27,
++                                   rxmm28 x28, rxmm29 x29, rxmm30 x30, rxmm31 x31) %{
++
++  match(Set dst (LoadBarrierWeakSlowReg mem));
++  predicate((UseAVX == 3) && (MaxVectorSize >= 16));
++
++  effect(DEF dst, KILL cr,
++         KILL x0, KILL x1, KILL x2, KILL x3,
++         KILL x4, KILL x5, KILL x6, KILL x7,
++         KILL x8, KILL x9, KILL x10, KILL x11,
++         KILL x12, KILL x13, KILL x14, KILL x15,
++         KILL x16, KILL x17, KILL x18, KILL x19,
++         KILL x20, KILL x21, KILL x22, KILL x23,
++         KILL x24, KILL x25, KILL x26, KILL x27,
++         KILL x28, KILL x29, KILL x30, KILL x31);
++
++  format %{"LoadBarrierWeakSlowRegZmm $dst, $mem" %}
++  ins_encode %{
++#if INCLUDE_ZGC
++    Register d = $dst$$Register;
++    ZBarrierSetAssembler* bs = (ZBarrierSetAssembler*)BarrierSet::barrier_set()->barrier_set_assembler();
++
++    assert(d != r12, "Can't be R12!");
++    assert(d != r15, "Can't be R15!");
++    assert(d != rsp, "Can't be RSP!");
++
++    __ lea(d,$mem$$Address);
++    __ call(RuntimeAddress(bs->load_barrier_weak_slow_stub(d)));
++#else
++    ShouldNotReachHere();
++#endif
++  %}
++  ins_pipe(pipe_slow);
++%}
++*/
++// ============================================================================
++// This name is KNOWN by the ADLC and cannot be changed.
++// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
++// for this guy.
++instruct tlsLoadP(s2_RegP dst) %{
++  match(Set dst (ThreadLocal));
++  effect(DEF dst);
++
++  size(0);
++  format %{ "# TLS is in S2" %}
++  ins_encode( /*empty encoding*/ );
++  ins_pipe(empty);
++%}
++/*
++
++//----------PEEPHOLE RULES-----------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++//
++// peepmatch ( root_instr_name [preceding_instruction]* );
++//
++// peepconstraint %{
++// (instruction_number.operand_name relational_op instruction_number.operand_name
++//  [, ...] );
++// // instruction numbers are zero-based using left to right order in peepmatch
++//
++// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
++// // provide an instruction_number.operand_name for each operand that appears
++// // in the replacement instruction's match rule
++//
++// ---------VM FLAGS---------------------------------------------------------
++//
++// All peephole optimizations can be turned off using -XX:-OptoPeephole
++//
++// Each peephole rule is given an identifying number starting with zero and
++// increasing by one in the order seen by the parser.  An individual peephole
++// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
++// on the command-line.
++//
++// ---------CURRENT LIMITATIONS----------------------------------------------
++//
++// Only match adjacent instructions in same basic block
++// Only equality constraints
++// Only constraints between operands, not (0.dest_reg == RAX_enc)
++// Only one replacement instruction
++//
++// ---------EXAMPLE----------------------------------------------------------
++//
++// // pertinent parts of existing instructions in architecture description
++// instruct movI(rRegI dst, rRegI src)
++// %{
++//   match(Set dst (CopyI src));
++// %}
++//
++// instruct incI_rReg(rRegI dst, immI1 src, rFlagsReg cr)
++// %{
++//   match(Set dst (AddI dst src));
++//   effect(KILL cr);
++// %}
++//
++// // Change (inc mov) to lea
++// peephole %{
++//   // increment preceeded by register-register move
++//   peepmatch ( incI_rReg movI );
++//   // require that the destination register of the increment
++//   // match the destination register of the move
++//   peepconstraint ( 0.dst == 1.dst );
++//   // construct a replacement instruction that sets
++//   // the destination to ( move's source register + one )
++//   peepreplace ( leaI_rReg_immI( 0.dst 1.src 0.src ) );
++// %}
++//
++
++// Implementation no longer uses movX instructions since
++// machine-independent system no longer uses CopyX nodes.
++//
++// peephole
++// %{
++//   peepmatch (incI_rReg movI);
++//   peepconstraint (0.dst == 1.dst);
++//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
++// %}
++
++// peephole
++// %{
++//   peepmatch (decI_rReg movI);
++//   peepconstraint (0.dst == 1.dst);
++//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
++// %}
++
++// peephole
++// %{
++//   peepmatch (addI_rReg_imm movI);
++//   peepconstraint (0.dst == 1.dst);
++//   peepreplace (leaI_rReg_immI(0.dst 1.src 0.src));
++// %}
++
++// peephole
++// %{
++//   peepmatch (incL_rReg movL);
++//   peepconstraint (0.dst == 1.dst);
++//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
++// %}
++
++// peephole
++// %{
++//   peepmatch (decL_rReg movL);
++//   peepconstraint (0.dst == 1.dst);
++//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
++// %}
++
++// peephole
++// %{
++//   peepmatch (addL_rReg_imm movL);
++//   peepconstraint (0.dst == 1.dst);
++//   peepreplace (leaL_rReg_immL(0.dst 1.src 0.src));
++// %}
++
++// peephole
++// %{
++//   peepmatch (addP_rReg_imm movP);
++//   peepconstraint (0.dst == 1.dst);
++//   peepreplace (leaP_rReg_imm(0.dst 1.src 0.src));
++// %}
++
++// // Change load of spilled value to only a spill
++// instruct storeI(memory mem, rRegI src)
++// %{
++//   match(Set mem (StoreI mem src));
++// %}
++//
++// instruct loadI(rRegI dst, memory mem)
++// %{
++//   match(Set dst (LoadI mem));
++// %}
++//
++
++peephole
++%{
++  peepmatch (loadI storeI);
++  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
++  peepreplace (storeI(1.mem 1.mem 1.src));
++%}
++
++peephole
++%{
++  peepmatch (loadL storeL);
++  peepconstraint (1.src == 0.dst, 1.mem == 0.mem);
++  peepreplace (storeL(1.mem 1.mem 1.src));
++%}
++
++//----------SMARTSPILL RULES---------------------------------------------------
++// These must follow all instruction definitions as they use the names
++// defined in the instructions definitions.
++*/
+diff --git a/src/hotspot/cpu/sw64/sw64Test.cpp b/src/hotspot/cpu/sw64/sw64Test.cpp
+new file mode 100644
+index 0000000000..9dc042b5bb
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/sw64Test.cpp
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2014, 2020, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include <stdlib.h>
++
++#include "precompiled.hpp"
++#include "code/codeBlob.hpp"
++#include "asm/macroAssembler.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++
++// hook routine called during JVM bootstrap to test AArch64 assembler
++
++extern "C" void entry(CodeBuffer*);
++
++
++class X_Generator: public StubCodeGenerator {
++ public:
++  X_Generator(CodeBuffer *c, bool print_code = true) : StubCodeGenerator(c, print_code) {}
++  
++  
++  address generate_getGenerateInfo() {
++    StubCodeMark mark(this, "VM_Version", "getGenerateInfo");
++    
++#   define __ _masm->
++    address start = __ pc();
++
++    __ movl(V0, c_rarg0); // Copy to eax we need a return value anyhow
++    __ xchgptr(V0, Address(c_rarg1, 0)); // automatic LOCK
++    __ ret();
++
++#   undef __
++    return start;
++  }
++};
++
++void directTestCode() 
++{
++  BufferBlob* b = BufferBlob::create("sw64Test", 500000);
++  CodeBuffer code(b);
++  MacroAssembler _masm(&code);
++  //entry(&code);
++#define _masm __
++ 
++#undef __
++}
++
++
++extern "C" {
++  typedef void (*getGenerateStub_t)(void*);
++}
++static getGenerateStub_t getGenerateStub = NULL;
++
++void sw64TestHook()
++{
++#ifdef ASSERT
++  //direct test generatecode
++  {
++    directTestCode();
++  }
++  
++  //test generation code by StubGenerator
++  {
++    {
++      ResourceMark rm;
++
++      BufferBlob* stub_blob = BufferBlob::create("sw64TestHook_stub", 500000);
++      if (stub_blob == NULL) {
++        vm_exit_during_initialization("Unable to allocate sw64TestHook_stub");
++      }
++
++      CodeBuffer c(stub_blob);
++      X_Generator g(&c, false);
++      getGenerateStub = CAST_TO_FN_PTR(getGenerateStub_t,
++                                       g.generate_getGenerateInfo());
++    }
++
++    address arg0;
++    getGenerateStub_t((void*)arg0);
++  }
++  
++#endif  
++}
+diff --git a/src/hotspot/cpu/sw64/sw64_ad.m4 b/src/hotspot/cpu/sw64/sw64_ad.m4
+new file mode 100644
+index 0000000000..e69de29bb2
+diff --git a/src/hotspot/cpu/sw64/templateInterpreterGenerator_sw64.cpp b/src/hotspot/cpu/sw64/templateInterpreterGenerator_sw64.cpp
+new file mode 100755
+index 0000000000..115aaefb60
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/templateInterpreterGenerator_sw64.cpp
+@@ -0,0 +1,2042 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "gc/shared/barrierSetAssembler.hpp"
++#include "interpreter/bytecodeHistogram.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/templateInterpreterGenerator.hpp"
++#include "interpreter/templateTable.hpp"
++#include "oops/arrayOop.hpp"
++#include "oops/methodData.hpp"
++#include "oops/method.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/jvmtiExport.hpp"
++#include "prims/jvmtiThreadState.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/deoptimization.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "runtime/timer.hpp"
++#include "runtime/vframeArray.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/macros.hpp"
++
++#define __ _masm->
++
++// Size of interpreter code.  Increase if too small.  Interpreter will
++// fail with a guarantee ("not enough space for interpreter generation");
++// if too small.
++// Run with +PrintInterpreter to get the VM to print out the size.
++// Max size with JVMTI
++int TemplateInterpreter::InterpreterCodeSize = JVMCI_ONLY(268) NOT_JVMCI(256) * 1024;
++
++
++const int method_offset = frame::interpreter_frame_method_offset * wordSize;
++const int bcp_offset    = frame::interpreter_frame_bcp_offset    * wordSize;
++const int locals_offset = frame::interpreter_frame_locals_offset * wordSize;
++
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++
++//-----------------------------------------------------------------------------
++
++extern "C" void entry(CodeBuffer*);
++
++//-----------------------------------------------------------------------------
++
++address TemplateInterpreterGenerator::generate_StackOverflowError_handler() {
++  BLOCK_COMMENT("generate_StackOverflowError_handler enter"); //__ warn("TODO:check function right generate_StackOverflowError_handler jzy ");
++  address entry = __ pc();
++  Register rax = V0;
++  
++#ifdef ASSERT
++  {
++    Label L;
++    __ lea(rax, Address(rfp,
++                        frame::interpreter_frame_monitor_block_top_offset *
++                        wordSize));
++    __ cmpptr(rax, esp); // rax = maximal rsp for current rbp (stack
++                         // grows negative)
++    __ jcc(Assembler::aboveEqual, L); // check if frame is complete
++    __ stop ("interpreter frame not set up");
++    __ bind(L);
++  }
++#endif // ASSERT
++  // Restore bcp under the assumption that the current frame is still
++  // interpreted
++  __ restore_bcp();
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // throw exception
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::throw_StackOverflowError));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler() {SCOPEMARK_NAME(TemplateInterpreterGenerator::generate_ArrayIndexOutOfBounds_handler, _masm)
++  address entry = __ pc();
++  // The expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++
++  Register rarg = c_rarg1;
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::
++                                     throw_ArrayIndexOutOfBoundsException),
++                    rarg, c_rarg2);
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_ClassCastException_handler() {//__ warn("TODO:check function right generate_ClassCastException_handler jzy ");
++  address entry = __ pc();
++
++  // object is at TOS
++  Register rarg = c_rarg1;
++  __ pop(rarg);
++
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::
++                              throw_ClassCastException),
++             rarg);BLOCK_COMMENT("generate_ClassCastException_handler leave");
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_exception_handler_common(
++        const char* name, const char* message, bool pass_oop) {SCOPEMARK_NAME(TemplateInterpreterGenerator::generate_exception_handler_common, _masm)
++  assert(!pass_oop || message == NULL, "either oop or message but not both");
++  address entry = __ pc();
++
++  Register rarg = c_rarg1;
++  Register rarg2 = c_rarg2;
++  Register rax = V0;
++
++  if (pass_oop) {
++    // object is at TOS
++    __ pop(rarg2);
++  }
++  // expression stack must be empty before entering the VM if an
++  // exception happened
++  __ empty_expression_stack();
++  // setup parameters
++  __ lea(rarg, ExternalAddress((address)name));
++  if (pass_oop) {
++    __ call_VM(rax, CAST_FROM_FN_PTR(address,
++                                     InterpreterRuntime::
++                                     create_klass_exception),
++               rarg, rarg2);
++  } else {
++    __ lea(rarg2, ExternalAddress((address)message));
++    __ call_VM(rax,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::create_exception),
++               rarg, rarg2);
++  }
++  // throw exception
++  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_return_entry_for(TosState state, int step, size_t index_size) {SCOPEMARK_NAME(TemplateInterpreterGenerator::generate_return_entry_for, _masm);
++  address entry = __ pc();
++  Register rax = FSR;
++  
++  // Restore stack bottom in case i2c adjusted stack
++  __ ldptr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // and NULL it as marker that esp is now tos until next java call
++  __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++
++  __ restore_bcp();
++  __ restore_locals();
++
++  if (state == atos) {
++    Register mdp = rscratch1;
++    Register tmp = rscratch2;
++    __ profile_return_type(mdp, rax, tmp);
++  }
++
++  const Register cache = rscratch1;
++  const Register index = rscratch2;
++  __ get_cache_and_index_at_bcp(cache, index, 1, index_size);
++
++  const Register flags = cache;
++  __ ldw(flags, Address(cache, index, Address::times_ptr, ConstantPoolCache::base_offset() + ConstantPoolCacheEntry::flags_offset()));
++  __ andw(flags, ConstantPoolCacheEntry::parameter_size_mask, flags);
++  __ lea(esp, Address(esp, flags, Interpreter::stackElementScale()));
++
++   const Register java_thread = rthread;
++   if (JvmtiExport::can_pop_frame()) {
++     __ check_and_handle_popframe(java_thread);
++   }
++   if (JvmtiExport::can_force_early_return()) {
++     __ check_and_handle_earlyret(java_thread);
++   }
++
++  __ dispatch_next(state, step);
++
++  return entry;
++}
++
++
++address TemplateInterpreterGenerator::generate_deopt_entry_for(TosState state, int step, address continuation) {BLOCK_COMMENT("generate_deopt_entry_for enter");//__ warn("TODO:check function right generate_deopt_entry_for jzy ");
++  address entry = __ pc();
++  // NULL last_sp until next java call
++  __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ restore_bcp();
++  __ restore_locals();
++  Register rbx = rmethod;
++#if INCLUDE_JVMCI
++  // Check if we need to take lock at entry of synchronized method.  This can
++  // only occur on method entry so emit it only for vtos with step 0.
++  if ((EnableJVMCI || UseAOT) && state == vtos && step == 0) {
++    Label L;
++    __ ldbu(rcc, Address(rthread, JavaThread::pending_monitorenter_offset()));
++    __ jcc(Assembler::zero, L);
++    // Clear flag.
++    __ stb(R0, Address(rthread, JavaThread::pending_monitorenter_offset()));
++    // Satisfy calling convention for lock_method().
++    __ get_method(rbx);
++    // Take lock.
++    lock_method();
++    __ bind(L);
++  } else {
++#ifdef ASSERT
++    if (EnableJVMCI) {
++      Label L;
++      __ ldbu(rscratch3, Address(rthread, JavaThread::pending_monitorenter_offset()));
++      __ cmpw(rscratch3, R0);
++      __ jcc(Assembler::zero, L);
++      __ stop("unexpected pending monitor in deopt entry");
++      __ bind(L);
++    }
++#endif
++  }
++#endif
++  // handle exceptions
++  {
++    Label L;
++    __ cmpptr(Address(rthread, Thread::pending_exception_offset()), R0);
++    __ jcc(Assembler::zero, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here("260");
++    __ bind(L);
++  }
++  if (continuation == NULL) {
++    __ dispatch_next(state, step);
++  } else {
++    __ jump_to_entry(continuation);
++  }BLOCK_COMMENT("generate_deopt_entry_for leave");
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_result_handler_for(
++        BasicType type) {BLOCK_COMMENT("generate_result_handler_for enter");//__ warn("TODO:check function right generate_result_handler_for jzy ");
++  address entry = __ pc();
++  switch (type) {
++    case T_BOOLEAN: __ c2bool(V0);             break;
++    case T_CHAR   : __ zapnot(V0, 0x3, V0);    break;
++    case T_BYTE   : __ sign_extend_byte (V0);  break;
++    case T_SHORT  : __ sign_extend_short(V0);  break;
++    case T_INT    : /* nothing to do */        break;
++    case T_LONG   : /* nothing to do */        break;
++    case T_VOID   : /* nothing to do */        break;
++    case T_FLOAT  : /* nothing to do */        break;
++    case T_DOUBLE : /* nothing to do */        break;
++    case T_OBJECT :
++    // retrieve result from frame
++    __ ldptr(V0, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize));
++    // and verify it
++    __ verify_oop(V0);
++      break;
++    default       : ShouldNotReachHere();
++  }
++  __ ret_sw();                                  // return from result handler
++  BLOCK_COMMENT("generate_result_handler_for leave");
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_safept_entry_for(
++        TosState state,
++        address runtime_entry) {BLOCK_COMMENT("generate_safept_entry_for enter");//__ warn("TODO:check function right generate_safept_entry_for jzy ");
++  address entry = __ pc();
++  __ push(state);
++  __ call_VM(noreg, runtime_entry);
++  __ dispatch_via(vtos, Interpreter::_normal_table.table_for(vtos));BLOCK_COMMENT("generate_safept_entry_for leave");
++  return entry;
++}
++
++
++
++// Helpers for commoning out cases in the various type of method entries.
++//
++
++
++// increment invocation count & check for overflow
++//
++// Note: checking for negative value instead of overflow
++//       so we have a 'sticky' overflow test
++//
++// rmethod: method
++//
++void TemplateInterpreterGenerator::generate_counter_incr(
++        Label* overflow,
++        Label* profile_method,
++        Label* profile_method_continue) {SCOPEMARK_NAME(generate_counter_incr, _masm);//__ warn("TODO:check function right generate_counter_incr jzy ");
++  Label done;
++  Register rax = FSR;
++  Register rcx = rscratch1; 
++  if (TieredCompilation) {
++    int increment = InvocationCounter::count_increment;
++    Label no_mdo;
++    if (ProfileInterpreter) {
++      // Are we profiling?
++      __ ldptr(rax, Address(rmethod, Method::method_data_offset()));
++      __ jcc(Assembler::zero, no_mdo, rax);
++      // Increment counter in the MDO
++      const Address mdo_invocation_counter(rax, in_bytes(MethodData::invocation_counter_offset()) +
++                                                in_bytes(InvocationCounter::counter_offset()));
++      const Address mask(rax, in_bytes(MethodData::invoke_mask_offset()));
++      __ increment_mask_and_jump(mdo_invocation_counter, increment, mask, rcx, false, Assembler::zero, overflow);
++      __ jmp(done);
++    }
++    __ bind(no_mdo);
++    // Increment counter in MethodCounters
++    const Address invocation_counter(rax,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++    __ get_method_counters(rmethod, rax, done);
++    const Address mask(rax, in_bytes(MethodCounters::invoke_mask_offset()));
++    __ increment_mask_and_jump(invocation_counter, increment, mask, rcx,
++            false, Assembler::zero, overflow);
++    __ bind(done);
++  } else { // not TieredCompilation
++    const Address backedge_counter(rax,
++                  MethodCounters::backedge_counter_offset() +
++                  InvocationCounter::counter_offset());
++    const Address invocation_counter(rax,
++                  MethodCounters::invocation_counter_offset() +
++                  InvocationCounter::counter_offset());
++
++    __ get_method_counters(rmethod, rax, done);
++
++    if (ProfileInterpreter) { // %%% Merge this into methodDataOop
++      __ ldws(rscratch2, Address(rax,
++              MethodCounters::interpreter_invocation_counter_offset()));
++      __ incrementw(rscratch2, 1);
++      __ stw(rscratch2, Address(rax,
++              MethodCounters::interpreter_invocation_counter_offset()));
++    }
++    // Update standard invocation counters
++    __ ldws(rcx, invocation_counter);
++    __ incrementw(rcx, InvocationCounter::count_increment);
++    __ stw(rcx, invocation_counter);  // save invocation count
++
++    __ ldws(rax, backedge_counter);  // load backedge counter
++    __ andw(rax, InvocationCounter::count_mask_value, rax);
++
++    __ addw(rcx, rax, rcx);          // add both counters
++
++    if (ProfileInterpreter && profile_method != NULL) {
++      // Test to see if we should create a method data oop
++      __ ldptr(rax, Address(rmethod, Method::method_counters_offset()));
++      __ ldw(rax, Address(rax, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++      __ cmpw(rcx, rax);
++      __ jcc(Assembler::less, *profile_method_continue);
++
++      // if no method data exists, go to profile_method
++      __ test_method_data_pointer(rax, *profile_method);  //this rax is ldw value
++    }
++
++    __ ldptr(rax, Address(rmethod, Method::method_counters_offset()));
++    // Test to see if we should create a method data oop
++    __ cmpw(rcx, Address(rax, in_bytes(MethodCounters::interpreter_invocation_limit_offset())));
++    __ jcc(Assembler::aboveEqual, *overflow);
++    __ bind(done);
++  }
++}
++
++void TemplateInterpreterGenerator::generate_counter_overflow(Label& do_continue) {BLOCK_COMMENT("generate_counter_overflow enter"); //__ warn("TODO:check function right generate_counter_overflow jzy");
++
++  // Asm interpreter on entry
++  // rlocals - locals
++  // rbcp - bcp
++  // rmethod - method
++  // rfp - interpreter frame
++
++  // On return (i.e. jump to entry_point)
++  // rmethod - method
++  // RA - return address of interpreter caller
++  // tos - the last parameter to Java method
++  // SP - sender_sp
++
++  // InterpreterRuntime::frequency_counter_overflow takes two
++  // arguments, the first (thread) is passed by call_VM, the second
++  // indicates if the counter overflow occurs at a backwards branch
++  // (NULL bcp).  We pass zero for it.  The call returns the address
++  // of the verified entry point for the method or NULL if the
++  // compilation did not complete (either went background or bailed
++  // out).
++  Register rarg = c_rarg1;
++  __ movw(rarg, (u_int32_t)0);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::frequency_counter_overflow),
++             rarg);
++
++  __ ldptr(rmethod, Address(rfp, method_offset));   // restore Method*
++  // Preserve invariant that r13/r14 contain bcp/locals of sender frame
++  // and jump to the interpreted entry.
++  __ jmp(do_continue);BLOCK_COMMENT("generate_counter_overflow leave");
++}
++
++// See if we've got enough room on the stack for locals plus overhead
++// below JavaThread::stack_overflow_limit(). If not, throw a StackOverflowError
++// without going through the signal handler, i.e., reserved and yellow zones
++// will not be made usable. The shadow zone must suffice to handle the
++// overflow.
++// The expression stack grows down incrementally, so the normal guard
++// page mechanism will work for that.
++//
++// NOTE: Since the additional locals are also always pushed (wasn't
++// obvious in generate_method_entry) so the guard should work for them
++// too.
++//
++// Args:
++//      c_rarg5: number of additional locals this frame needs (what we must check)
++//      rmethod: Method*
++//
++void TemplateInterpreterGenerator::generate_stack_overflow_check(void) {BLOCK_COMMENT("generate_stack_overflow_check enter");//__ warn("TODO:check function right generate_stack_overflow_check jzy");
++  // see if we've got enough room on the stack for locals plus overhead.
++  // the expression stack grows down incrementally, so the normal guard
++  // page mechanism will work for that.
++  //
++  // Registers live on entry:
++  //
++  // rmethod: Method*
++  // rdx: number of additional locals this frame needs (what we must check)
++  // register is special, should be same register in generate_normal_entry
++  
++  // killed: V0, rscratch2
++  // NOTE:  since the additional locals are also always pushed (wasn't obvious in
++  // generate_method_entry) so the guard should work for them too.
++  //
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++  // total overhead size: entry_size + (saved fp thru expr stack bottom).
++  // be sure to change this if you add/subtract anything to/from the overhead area
++  const int overhead_size =
++    -(frame::interpreter_frame_initial_sp_offset * wordSize) + entry_size;
++
++  const int page_size = os::vm_page_size();
++
++  Label after_frame_check;
++  //Register rdx = c_rarg5;//TODO:why set this? jzy
++  //Register rax = V0;
++  
++  // see if the frame is greater than one page in size. If so,
++  // then we need to verify there is enough stack space remaining
++  // for the additional locals.
++  __ cmpw(rdx, (page_size - overhead_size) / Interpreter::stackElementSize);
++  __ jcc(Assembler::belowEqual, after_frame_check);
++
++  // compute sp as if this were going to be the last frame on
++  // the stack before the red zone
++
++
++  Label after_frame_check_pop;
++
++  const Address stack_limit(rthread, JavaThread::stack_overflow_limit_offset());
++
++  // locals + overhead, in bytes
++  __ movl(rax, rdx);
++  __ slll(rax, Interpreter::logStackElementSize, rax); // Convert parameter count to bytes.
++  __ addptr(rax, overhead_size, rax);
++
++#ifdef ASSERT
++  Label limit_okay;
++  // Verify that thread stack overflow limit is non-zero.
++  __ cmpptr(stack_limit, R0);
++  __ jcc(Assembler::notEqual, limit_okay);
++  __ stop("stack overflow limit is zero");
++  __ bind(limit_okay);
++#endif
++
++  // Add locals/frame size to stack limit.
++  __ ldptr(rscratch2, stack_limit);
++  __ addptr(rax, rscratch2, rax);
++
++  // check against the current stack bottom
++  __ cmpptr(esp, rax);
++  __ jcc(Assembler::above, after_frame_check);
++
++  // Restore sender's sp as SP. This is necessary if the sender's
++  // frame is an extended compiled frame (see gen_c2i_adapter())
++  // and safer anyway in case of JSR292 adaptations.
++
++  __ movl(esp, rsender);
++  
++  // Note: the restored frame is not necessarily interpreted.
++  // Use the shared runtime version of the StackOverflowError.
++  assert(StubRoutines::throw_StackOverflowError_entry() != NULL, "stub not yet generated");
++  __ jump(ExternalAddress(StubRoutines::throw_StackOverflowError_entry()));
++
++  // all done with frame size check
++  __ bind(after_frame_check);BLOCK_COMMENT("generate_stack_overflow_check leave");
++}
++
++// Allocate monitor and lock method (asm interpreter)
++//
++// Args:
++//      rmethod: Method*
++//      rlocals: locals
++//
++// Kills:
++//      rax
++//      c_rarg0, c_rarg1, c_rarg2, c_rarg3, ...(param regs)
++//      rscratch1, rscratch2 (scratch regs)
++void TemplateInterpreterGenerator::lock_method() {BLOCK_COMMENT("lock_method enter");
++  const Register rbx = rmethod;
++  const Register rax = FSR;
++  
++  // synchronize method
++  const Address access_flags(rbx, Method::access_flags_offset());
++  const Address monitor_block_top(
++        rfp,
++        frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++#ifdef ASSERT
++  {
++    Label L;
++    __ ldw(rax, access_flags);
++    __ testw(rax, JVM_ACC_SYNCHRONIZED);
++    __ jcc(Assembler::notZero, L);
++    __ stop("method doesn't need synchronization");
++    __ bind(L);
++  }
++#endif // ASSERT
++
++  // get synchronization object
++  {
++    Label done;
++    __ ldw(rax, access_flags);
++    __ testw(rax, JVM_ACC_STATIC);
++    // get receiver (assume this is frequent case)
++    __ ldptr(rax, Address(rlocals, Interpreter::local_offset_in_bytes(0)));
++    __ jcc(Assembler::zero, done);
++    __ load_mirror(rax, rbx);
++
++#ifdef ASSERT
++    {
++      Label L;
++      __ jcc(Assembler::notZero, L, rax);
++      __ stop("synchronization object is NULL");
++      __ bind(L);
++  }
++#endif // ASSERT
++
++    __ bind(done);
++  }
++
++  // add space for monitor & lock
++  __ subptr(esp, entry_size, esp);           // add space for a monitor entry
++  __ stptr(esp, monitor_block_top);  // set new monitor block top
++  // store object
++  __ stptr(rax, Address(esp, BasicObjectLock::obj_offset_in_bytes()));
++  const Register lockreg = c_rarg1;
++  __ movl(lockreg, esp);      // object address
++  __ lock_object(lockreg);BLOCK_COMMENT("lock_method leave");
++}
++
++// Generate a fixed interpreter frame. This is identical setup for
++// interpreted methods and for native methods hence the shared code.
++//
++// Args:
++//      RA: return address
++//      rbx: Method*
++//      r14: pointer to locals
++//      r13: sender sp
++//      rdx: cp cache
++void TemplateInterpreterGenerator::generate_fixed_frame(bool native_call) {SCOPEMARK_NAME(generate_fixed_frame, _masm);
++  const Register rbx = rmethod;
++  const Register rdx = rscratch1;
++  
++  // initialize fixed part of activation frame
++  __ enter();          // save old & set new rbp
++  __ push(rsender);        // set sender sp
++  __ push((int)NULL_WORD); // leave last_sp as null
++  __ ldptr(rbcp, Address(rbx, Method::const_offset()));      // get ConstMethod*
++  __ lea(rbcp, Address(rbcp, ConstMethod::codes_offset())); // get codebase
++  __ push(rbx);        // save Method*
++  // Get mirror and store it in the frame as GC root for this Method*
++  __ load_mirror(rdx, rbx, rscratch2); //<TODO:not check carefully jzy>
++  __ push(rdx);  
++  if (ProfileInterpreter) { //<TODO:not check carefully jzy>
++    Label method_data_continue;
++    __ ldptr(rdx, Address(rbx, in_bytes(Method::method_data_offset())));
++    __ testptr(rdx, rdx);
++    __ jcc(Assembler::zero, method_data_continue);
++    __ addptr(rdx, in_bytes(MethodData::data_offset()), rdx);
++    __ bind(method_data_continue);
++    __ push(rdx);      // set the mdp (method data pointer)
++  } else {
++    __ push(0);
++  }
++
++  __ ldptr(rdx, Address(rbx, Method::const_offset()));
++  __ ldptr(rdx, Address(rdx, ConstMethod::constants_offset()));
++  __ ldptr(rdx, Address(rdx, ConstantPool::cache_offset_in_bytes()));
++  __ push(rdx); // set constant pool cache
++  __ push(rlocals); // set locals pointer
++  if (native_call) {
++    __ push(0); // no bcp
++  } else {
++    __ push(rbcp); // set bcp
++  }
++  __ push(0); // reserve word for pointer to expression stack bottom
++  __ stptr(esp, Address(esp, 0)); // set expression stack bottom
++}
++
++// End of helpers
++
++// Method entry for java.lang.ref.Reference.get.
++address TemplateInterpreterGenerator::generate_Reference_get_entry(void) {BLOCK_COMMENT("generate_Reference_get_entry enter");
++  // Code: _aload_0, _getfield, _areturn
++  // parameter size = 1
++  //
++  // The code that gets generated by this routine is split into 2 parts:
++  //    1. The "intrinsified" code performing an ON_WEAK_OOP_REF load,
++  //    2. The slow path - which is an expansion of the regular method entry.
++  //
++  // Notes:-
++  // * An intrinsic is always executed, where an ON_WEAK_OOP_REF load is performed.
++  // * We may jump to the slow path iff the receiver is null. If the
++  //   Reference object is null then we no longer perform an ON_WEAK_OOP_REF load
++  //   Thus we can use the regular method entry code to generate the NPE.
++  //
++  // This code is based on generate_accessor_entry.
++  //
++  // rmethod: Method*
++  // rsender: senderSP must preserve for slow path, set SP to it on fast path (rsender)
++
++  address entry = __ pc();
++
++  const int referent_offset = java_lang_ref_Reference::referent_offset;
++  guarantee(referent_offset > 0, "referent offset not initialized");
++
++  Label slow_path;
++  Register rax = V0;
++  
++  // Check if local 0 != NULL
++  // If the receiver is null then it is OK to jump to the slow path.
++  __ ldptr(rax, Address(esp, 0));
++
++//  __ testptr(rax, rax);
++  __ jcc(Assembler::zero, slow_path, rax);
++
++  // Load the value of the referent field.
++  const Address field_address(rax, referent_offset);
++
++  //__ push(RA);
++  __ load_heap_oop(rax, field_address, /*tmp1*/ rscratch1, /*tmp_thread*/ rscratch2, ON_WEAK_OOP_REF);
++  //__ pop(RA);
++  
++  __ movl(esp, rsender);      // set sp to sender sp
++  __ ret_sw();
++
++  // generate a vanilla interpreter entry as the slow path
++  __ bind(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::zerolocals));
++  BLOCK_COMMENT("generate_Reference_get_entry leave");
++  return entry;
++}
++
++void TemplateInterpreterGenerator::bang_stack_shadow_pages(bool native_call) {SCOPEMARK_NAME(bang_stack_shadow_pages, _masm)
++  // Quick & dirty stack overflow checking: bang the stack & handle trap.
++  // Note that we do the banging after the frame is setup, since the exception
++  // handling code expects to find a valid interpreter frame on the stack.
++  // Doing the banging earlier fails if the caller frame is not an interpreter
++  // frame.
++  // (Also, the exception throwing code expects to unlock any synchronized
++  // method receiever, so do the banging after locking the receiver.)
++
++  // Bang each page in the shadow zone. We can't assume it's been done for
++  // an interpreter frame with greater than a page of locals, so each page
++  // needs to be checked.  Only true for non-native.
++  if (UseStackBanging) {
++    const int page_size = os::vm_page_size();
++    const int n_shadow_pages = ((int)JavaThread::stack_shadow_zone_size()) / page_size;
++    const int start_page = native_call ? n_shadow_pages : 1;
++    for (int pages = start_page; pages <= n_shadow_pages; pages++) {
++      __ bang_stack_with_offset(pages*page_size);
++    }
++  }
++}
++
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.update(int crc, int b)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_update_entry() {BLOCK_COMMENT("generate_CRC32_update_entry enter");
++  if (UseCRC32Intrinsics) {
++    address entry = __ pc();
++
++    Label slow_path;
++    // If we need a safepoint check, generate full interpreter entry.
++//  __ mov(GP, SafepointSynchronize::address_of_state());
++//  __ lw(AT, GP, 0);
++//  __ mov(GP, (SafepointSynchronize::_not_synchronized));
++//  __ bne_c(AT, GP, slow_path);
++    __ safepoint_poll(slow_path, rthread, rscratch1);
++
++    // We don't generate local frame and don't align stack because
++    // we call stub code and there is no safepoint on this path.
++    // Load parameters
++    const Register crc = V0;  // crc
++    const Register val = A0;  // source java byte value
++    const Register tbl = A1;  // scratch
++
++    // Arguments are reversed on java expression stack
++    __ ldw(val, 0, esp); // byte value
++    __ ldw(crc, wordSize, esp); // Initial CRC
++
++    __ lea(tbl, ExternalAddress(StubRoutines::crc_table_addr()));
++    __ notw(crc, crc); // ~crc
++    __ update_byte_crc32(crc, val, tbl);
++    __ notw(crc ,crc); // ~crc
++    
++    // result in V0
++    // _areturn
++    __ addl(rsender, R0, esp);     // set sp to sender sp      
++    __ ret_sw();
++
++    // generate a vanilla native entry as the slow path
++    __ bind(slow_path);
++    __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++    return entry;
++  }BLOCK_COMMENT("generate_CRC32_update_entry leave");
++  return NULL;
++}
++
++/**
++ * Method entry for static native methods:
++ *   int java.util.zip.CRC32.updateBytes(int crc, byte[] b, int off, int len)
++ *   int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
++ */
++address TemplateInterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind) {BLOCK_COMMENT("generate_CRC32_updateBytes_entry enter");
++  if (UseCRC32Intrinsics) {
++      address entry = __ pc();
++
++      //    // rbx,: Method*
++      //    // r13: senderSP must preserved for slow path, set SP to it on fast path
++      //    // If we need a safepoint check, generate full interpreter entry.  
++      // We don't generate local frame and don't align stack because
++      //    // we call stub code and there is no safepoint on this path.
++      Label slow_path;
++//      __ mov(GP, SafepointSynchronize::address_of_state());
++//      __ lw(AT, GP, 0);
++//      __ mov(GP, (SafepointSynchronize::_not_synchronized));
++//      __ bne_c(AT, GP, slow_path);
++      __ safepoint_poll(slow_path, rthread, rscratch1);
++
++      // Load parameters
++      const Register crc = A0; // crc
++      const Register buf = A1; // source java byte array address
++      const Register len = A2; // length
++      const Register off = len; // offset (never overlaps with 'len')
++//      const Register table = A3; // crc_table address
++
++      //    // Arguments are reversed on java expression stack
++      //    // Calculate address of start element
++      if (kind == Interpreter::java_util_zip_CRC32_updateByteBuffer) {
++          __ ldl(buf, 2 * wordSize, esp); // long buf
++          __ ldw(off, 1 * wordSize, esp); // offset
++          __ zapnot(off, 0xF, off);
++          __ addl(buf, off, buf); // + offset
++          __ ldw(crc, 4 * wordSize, esp); // Initial CRC
++      } else {
++          __ ldw(off, 1 * wordSize, esp);
++          __ zapnot(off, 0xF, off);
++          __ ldl(buf, 2 * wordSize, esp); // byte[] array
++          __ addl(buf, arrayOopDesc::base_offset_in_bytes(T_BYTE), buf); // + header size
++          __ addl(buf, off, buf); // offset
++          __ ldw(crc, 3 * wordSize, esp); // Initial CRC
++      }
++      //  Can now load 'len' since we're finished with 'off'
++      __ ldw(len, 0 * wordSize, esp);
++      __ zapnot(len, 0xF, len);
++      __ enter();
++      //__ stop("use SharedRuntime::updateBytesCRC32? jzy");
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::updateBytesCRC32), 3);
++      // _areturn
++      __ leave();
++      __ daddu(esp, rsender, R0); // set sp to sender sp      
++      __ ret_sw();
++      //    generate a vanilla native entry as the slow path
++      __ BIND(slow_path);
++  __ jump_to_entry(Interpreter::entry_for_kind(Interpreter::native));
++      return entry;
++  }BLOCK_COMMENT("generate_CRC32_updateBytes_entry leave");
++  return NULL;
++}
++
++/**
++ * Method entry for intrinsic-candidate (non-native) methods:
++ *   int java.util.zip.CRC32C.updateBytes(int crc, byte[] b, int off, int end)
++ *   int java.util.zip.CRC32C.updateDirectByteBuffer(int crc, long buf, int off, int end)
++ * Unlike CRC32, CRC32C does not have any methods marked as native
++ * CRC32C also uses an "end" variable instead of the length variable CRC32 uses
++ */
++address TemplateInterpreterGenerator::generate_CRC32C_updateBytes_entry(AbstractInterpreter::MethodKind kind) {
++  __ should_not_reach_here("generate_CRC32C_updateBytes_entry not implement");
++  return NULL;
++}
++
++// Interpreter stub for calling a native method. (asm interpreter)
++// This sets up a somewhat different looking stack for calling the
++// native method than the typical interpreter frame setup.
++address TemplateInterpreterGenerator::generate_native_entry(bool synchronized) {SCOPEMARK_NAME(generate_native_entry, _masm);
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++  // rsender: sender's sp
++  // rmethod: Method*
++  const Register rbx = rmethod;
++  const Register rcx = rscratch1;
++  const Register rax = V0;
++  address entry_point = __ pc();
++
++  const Address constMethod       (rbx, Method::const_offset());
++  const Address access_flags      (rbx, Method::access_flags_offset());
++  const Address size_of_parameters(rcx, ConstMethod::
++                                        size_of_parameters_offset());
++  
++
++  // get parameter size (always needed)
++  __ ldptr(rcx, constMethod);
++  __ load_unsigned_short(rcx, size_of_parameters);
++
++  // native calls don't need the stack size check since they have no
++  // expression stack and the arguments are already on the stack and
++  // we only add a handful of words to the stack
++
++  // rmethod: Method*
++  // rcx: size of parameters
++  // rbcp: sender sp //? jzy
++  //__ pop(rax);                                       // different from x86, sw donot need return address
++
++  // for natives the size of locals is zero
++
++  // compute beginning of parameters (rlocals)
++  __ lea(rlocals, Address(esp, rcx, Interpreter::stackElementScale(), -wordSize));
++
++  // add 2 zero-initialized slots for native calls
++  // initialize result_handler slot
++  __ push((int) NULL_WORD);
++  // slot for oop temp
++  // (static native method holder mirror/jni oop result)
++  __ push((int) NULL_WORD);
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(true);
++
++  // make sure method is native & not abstract
++#ifdef ASSERT
++  __ ldw(rax, access_flags);
++  {
++    Label L;
++    __ testw(rax, JVM_ACC_NATIVE);
++    __ jcc(Assembler::notZero, L);
++    __ stop("tried to execute non-native method as native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ testw(rax, JVM_ACC_ABSTRACT);
++    __ jcc(Assembler::zero, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception handler
++  // would try to exit the monitor of synchronized methods which hasn't
++  // been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation will
++  // check this flag.
++  Register thread1 = rthread;
++  const Address do_not_unlock_if_synchronized(thread1,
++        in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  __ stbool(true, do_not_unlock_if_synchronized);
++
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow, NULL, NULL);
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++  bang_stack_shadow_pages(true);
++
++  // reset the _do_not_unlock_if_synchronized flag
++  __ stbool(false, do_not_unlock_if_synchronized);
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    {
++      Label L;
++      __ ldw(rax, access_flags);
++      __ testw(rax, JVM_ACC_SYNCHRONIZED);
++      __ jcc(Assembler::zero, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++    const Address monitor_block_top(rfp,
++                 frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ ldptr(rax, monitor_block_top);
++    __ cmpptr(rax, esp);
++    __ jcc(Assembler::equal, L);
++    __ stop("broken stack frame setup in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti support
++  __ notify_method_entry();
++
++  // work registers
++  const Register method = rmethod;
++  const Register thread = rthread;
++  const Register t      = T12; //will use in call instruction in sw
++
++  // allocate space for parameters
++  __ get_method(method);
++  __ ldptr(t, Address(method, Method::const_offset()));
++  __ load_unsigned_short(t, Address(t, ConstMethod::size_of_parameters_offset()));
++
++  __ slll(t, Interpreter::logStackElementSize, t);
++
++  __ subptr(esp, t, esp);
++  //__ subptr(esp, frame::arg_reg_save_area_bytes, esp); // windows
++  __ andptr(esp, -16, esp); // must be 16 byte boundary (see amd64 ABI) sw need this ? jzy
++  // get signature handler
++  __ block_comment(" get signature handler");
++  {
++    Label L;
++    __ ldptr(t, Address(method, Method::signature_handler_offset()));
++    //__ testptr(t, t);
++    __ jcc(Assembler::notZero, L, t);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::prepare_native_call),
++               method);
++    __ get_method(method);
++    __ ldptr(t, Address(method, Method::signature_handler_offset()));
++    __ bind(L);
++  }
++
++  // call signature handler
++  assert(InterpreterRuntime::SignatureHandlerGenerator::from() == rlocals,
++         "adjust this code");
++  assert(InterpreterRuntime::SignatureHandlerGenerator::to() == esp,
++         "adjust this code");
++  assert(InterpreterRuntime::SignatureHandlerGenerator::temp() == rscratch1,
++         "adjust this code");
++
++  // The generated handlers do not touch RBX (the method oop).
++  // However, large signatures cannot be cached and are generated
++  // each time here.  The slow-path generator can do a GC on return,
++  // so we must reload it after the call.
++  __ call(t);
++  __ get_method(method);        // slow path can do a GC, reload RBX
++
++  
++  // result handler is in V0
++  // set result handler
++  __ stptr(rax,
++          Address(rfp,
++                    (frame::interpreter_frame_result_handler_offset) * wordSize));
++
++  // pass mirror handle if static call
++  {
++    Label L;
++    __ ldw(t, Address(method, Method::access_flags_offset()));
++    __ testw(t, JVM_ACC_STATIC);
++    __ jcc(Assembler::zero, L);
++    // get mirror
++    __ load_mirror(t, method, rax);
++    // copy mirror into activation frame
++    __ stptr(t, 
++           Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    // pass handle to mirror
++    __ lea(c_rarg1,
++           Address(rfp, frame::interpreter_frame_oop_temp_offset * wordSize));
++    __ bind(L);
++  }
++
++  // get native function entry point
++  {
++    Label L;
++    __ ldptr(rax, Address(method, Method::native_function_offset()));
++    ExternalAddress unsatisfied(SharedRuntime::native_method_throw_unsatisfied_link_error_entry());
++    __ cmpptr(rax, unsatisfied.addr());
++    __ jcc(Assembler::notEqual, L);
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::prepare_native_call),
++               method);
++    __ get_method(method);
++    __ ldptr(rax, Address(method, Method::native_function_offset()));
++    __ bind(L);
++  }
++
++  // pass JNIEnv
++  __ lea(c_rarg0, Address(rthread, JavaThread::jni_environment_offset()));
++
++  // Set the last Java PC in the frame anchor to be the return address from
++  // the call to the native method: this will allow the debugger to
++  // generate an accurate stack trace.
++  Label native_return;
++  __ set_last_Java_frame(esp, rfp, native_return, rscratch1); //TODO:check jzy
++
++  // change thread state
++#ifdef ASSERT
++  {
++    Label L;
++    __ ldwu(t, Address(thread, JavaThread::thread_state_offset()));
++    __ cmpw(t, _thread_in_Java);
++    __ jcc(Assembler::equal, L);
++    __ stop("Wrong thread state in native stub");
++    __ bind(L);
++  }
++#endif
++
++  // Change state to native
++  __ mov_immediate32(rscratch1, _thread_in_native);
++  __ stw(rscratch1, Address(thread, JavaThread::thread_state_offset()));
++
++  // call native method
++  __ movl(t, rax); //SW ABI
++  __ call(t, &native_return);//t == T12
++//  __ bind(native_return);
++  // result potentially in V0 or f0
++
++  // via _last_native_pc and not via _last_jave_sp
++  // NOTE: the order of theses push(es) is known to frame::interpreter_frame_result.
++  //  If the order changes or anything else is added to the stack the code in
++  // interpreter_frame_result will have to be changed.
++  //FIXME, should modify here
++  // save return value to keep the value from being destroyed by other calls
++  __ push_d(FSF); //? jzy
++  __ push(V0);
++
++  // change thread state
++  __ mov_immediate32(rscratch1, _thread_in_native_trans);
++  __ stw(rscratch1, Address(thread, JavaThread::thread_state_offset()));
++
++    if (os::is_MP()) {
++        if (UseMembar) {
++            // Force this write out before the read below
++            __ memb();
++        } else {
++            // Write serialization page so VM thread can do a pseudo remote membar.
++            // We use the current thread pointer to calculate a thread specific
++            // offset to write to within the page. This minimizes bus traffic
++            // due to cache line collision.
++            __ serialize_memory(thread, rscratch1);
++        }
++    }
++
++  // check for safepoint operation in progress and/or pending suspend requests
++  {
++    Label Continue;
++    Label slow_path;
++
++    __ safepoint_poll(slow_path, rthread, rscratch2);
++
++    __ cmpw(Address(thread, JavaThread::suspend_flags_offset()), R0);
++    __ jcc(Assembler::equal, Continue);
++    __ bind(slow_path);
++
++    // Don't use call_VM as it will see a possible pending exception
++    // and forward it and never return here preventing us from
++    // clearing _last_native_pc down below.  Also can't use
++    // call_VM_leaf either as it will check to see if r13 & r14 are
++    // preserved and correspond to the bcp/locals pointers. So we do a
++    // runtime call by hand.
++    //
++    __ movl(c_rarg0, rthread);
++    __ movl(r12_heapbase, esp); // remember sp (can only use r12 if not using call_VM)
++//    __ subptr(esp, frame::arg_reg_save_area_bytes); // windows
++    __ subptr(esp, 16, esp); // align stack as required by ABI
++    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, JavaThread::check_special_condition_for_native_trans)));
++    __ movl(esp, r12_heapbase); // restore sp
++    __ reinit_heapbase();
++    __ bind(Continue);
++  }
++
++  // change thread state
++  __ mov_immediate32(rscratch1, _thread_in_Java);
++  __ stw(rscratch1, Address(thread, JavaThread::thread_state_offset()));
++
++  // reset_last_Java_frame
++  __ reset_last_Java_frame(thread, true);
++
++  if (CheckJNICalls) {
++    // clear_pending_jni_exception_check
++    __ stptr(R0, Address(thread, JavaThread::pending_jni_exception_check_fn_offset()));
++  }
++
++  // reset handle block
++  __ ldptr(t, Address(thread, JavaThread::active_handles_offset()));
++  __ stw(R0, Address(t, JNIHandleBlock::top_offset_in_bytes()));
++
++  // If result is an oop unbox and store it in frame where gc will see it
++  // and result handler will pick it up
++
++  {
++    Label no_oop, not_weak, store_result;
++    __ lea(t, ExternalAddress(AbstractInterpreter::result_handler(T_OBJECT)));
++    __ cmpptr(t, Address(rfp, frame::interpreter_frame_result_handler_offset*wordSize));
++    __ jcc(Assembler::notEqual, no_oop);
++    // retrieve result
++    __ pop(rax);
++    // Unbox oop result, e.g. JNIHandles::resolve value.
++    __ resolve_jobject(rax /* value */,
++                       thread /* thread */,
++                       t /* tmp */);
++    __ stptr(rax, Address(rfp, frame::interpreter_frame_oop_temp_offset*wordSize));
++    // keep stack depth as expected by pushing oop which will eventually be discarded
++    __ push(rax);
++    __ BIND(no_oop);
++  }
++  {
++    Label no_reguard; 
++    __ cmpptr(Address(thread, JavaThread::stack_guard_state_offset()),
++            JavaThread::stack_guard_yellow_reserved_disabled);
++    __ jcc(Assembler::notEqual, no_reguard);
++
++    __ pushad();
++    __ movl(r12_heapbase, esp);//<TODO:check use reheapbase to store esp? jzy>
++    __ andptr(esp, 16, esp); // align stack as required by ABI
++    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, SharedRuntime::reguard_yellow_pages)));
++    __ movl(esp, r12_heapbase);
++    __ popad();
++    //add for compressedoops
++    __ reinit_heapbase();
++    __ BIND(no_reguard);
++  }
++
++  
++  // The method register is junk from after the thread_in_native transition
++  // until here.  Also can't call_VM until the bcp has been
++  // restored.  Need bcp for throwing exception below so get it now.
++  __ get_method(method);
++
++  __ ldptr(rbcp, Address(method, Method::const_offset()));   // get ConstMethod*
++  __ lea(rbcp, Address(rbcp, in_bytes(ConstMethod::codes_offset())));
++  // handle exceptions (exception handling will handle unlocking!)
++  {
++    Label L;
++    __ cmpptr(Address(thread, Thread::pending_exception_offset()),R0);
++    __ jcc(Assembler::zero, L);
++    // Note: At some point we may want to unify this with the code
++    // used in call_VM_base(); i.e., we should use the
++    // StubRoutines::forward_exception code. For now this doesn't work
++    // here because the rsp is not correctly set at this point.
++    __ MacroAssembler::call_VM(noreg,
++                               CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::throw_pending_exception));
++    __ should_not_reach_here("1186");
++    __ BIND(L);
++  }
++
++  // do unlocking if necessary
++  {
++    Label L;
++    __ ldw(t, Address(method, Method::access_flags_offset()));
++    __ testw(t, JVM_ACC_SYNCHRONIZED);
++    __ jcc(Assembler::zero, L);
++    // the code below should be shared with interpreter macro
++    // assembler implementation
++    {
++      Label unlock;
++      // BasicObjectLock will be first in list, since this is a
++      // synchronized method. However, need to check that the object
++      // has not been unlocked by an explicit monitorexit bytecode.
++      const Address monitor(rfp,
++                            (int)(frame::interpreter_frame_initial_sp_offset *
++                                       wordSize - (int)sizeof(BasicObjectLock)));
++
++      const Register regmon = c_rarg1;
++
++      // monitor expect in c_rarg1 for slow unlock path
++      __ lea(regmon, monitor); // address of first monitor
++
++      __ ldptr(t, Address(regmon, BasicObjectLock::obj_offset_in_bytes()));
++      __ testptr(t, t);
++      __ jcc(Assembler::notZero, unlock);
++
++      // Entry already unlocked, need to throw exception
++      __ MacroAssembler::call_VM(noreg,
++                                 CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_illegal_monitor_state_exception));
++      __ should_not_reach_here("1220");
++
++      __ BIND(unlock);
++      __ unlock_object(regmon);
++    }
++    __ BIND(L);
++  }
++
++  // jvmti support
++  // Note: This must happen _after_ handling/throwing any exceptions since
++  //       the exception handler code notifies the runtime of method exits
++  //       too. If this happens before, method entry/exit notifications are
++  //       not properly paired (was bug - gri 11/22/99).
++  __ notify_method_exit(vtos, InterpreterMacroAssembler::NotifyJVMTI);//<TODO:check not equal to jdk11. jdk8's code? jzy>
++
++  // restore potential result in edx:eax, call result handler to
++  // restore potential result in ST0 & handle result
++
++  __ pop(rax);
++  __ pop_d();
++  __ ldptr(t, Address(rfp,
++                       (frame::interpreter_frame_result_handler_offset) * wordSize));
++  __ call(t);
++
++  // remove activation
++  __ ldptr(t, Address(rfp,
++                       frame::interpreter_frame_sender_sp_offset *
++                       wordSize)); // get sender sp
++  __ leave();                                // remove frame anchor 
++  //__ pop(rdi);                               // get return address TODO:where set? jzy
++  __ movl(esp, t);                            // set sp to sender sp
++  __ jmp(RA);                                 // return address, set RA in leave()
++
++  if (inc_counter) {
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++// Abstract method entry
++// Attempt to execute abstract method. Throw exception
++address TemplateInterpreterGenerator::generate_abstract_entry(void) {BLOCK_COMMENT("generate_abstract_entry enter");
++  // rmethod: methodOop
++  // V0: receiver (unused)
++  // rsender : sender 's sp
++  address entry_point = __ pc();
++
++  // abstract method entry
++  __ empty_expression_stack();
++  __ restore_bcp();
++  __ restore_locals();
++  // throw exception
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorWithMethod), rmethod);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here("1277");
++BLOCK_COMMENT("generate_abstract_entry leave");
++  return entry_point;
++}
++
++//
++// Generic interpreted method entry to (asm) interpreter
++//
++address TemplateInterpreterGenerator::generate_normal_entry(bool synchronized) {SCOPEMARK_NAME(generate_normal_entry, _masm)
++  // determine code generation flags
++  bool inc_counter  = UseCompiler || CountCompiledCalls || LogTouchedMethods;
++
++  // rmethod: Method*
++  // rsender: sender 's sp
++  //const Register rbx = rmethod;
++  //const Register rdx = c_rarg5;//special,should be same register in generate_stack_overflow_check
++  //const Register rcx = c_rarg4;
++  //const Register rax = V0;
++  address entry_point = __ pc();
++
++  const Address constMethod(rbx, Method::const_offset());
++  const Address access_flags(rbx, Method::access_flags_offset());
++  const Address size_of_parameters(rdx,
++                                   ConstMethod::size_of_parameters_offset());
++  const Address size_of_locals(rdx, ConstMethod::size_of_locals_offset());
++
++
++  // get parameter size (always needed)
++  __ ldptr(rdx, constMethod);
++  __ ldhu(rcx, size_of_parameters);
++
++  // rmethod: Method*
++  // rcx: size of parameters
++  // rsender: sender 's sp ,could be different frome sp if we call via c2i
++  
++  
++  __ ldhu(rdx, size_of_locals);// get size of locals in words
++  __ subl(rdx, rcx, rdx);// rdx = no. of additional locals
++
++  // see if we've got enough room on the stack for locals plus overhead.
++  generate_stack_overflow_check(); //<TODO:not check carefully jzy>
++
++//  // get return address
++//  __ pop(rax);// x86 pushes RA on stack, so pops here, we haven't push
++  
++  // compute beginning of parameters (rlocals)
++  __ lea(rlocals, Address(esp, rcx, Interpreter::stackElementScale(), -wordSize));
++
++  // rdx - # of additional locals
++  // allocate space for locals
++  // explicitly initialize locals
++  {
++    Label exit, loop;
++    __ jcc(Assembler::lessEqual, exit, rdx); // do nothing if rdx <= 0
++    __ bind(loop);
++    __ push((int)NULL_WORD); // initialize local variables
++    __ decrementl(rdx); // until everything initialized
++    __ jcc(Assembler::greater, loop, rdx);
++    __ bind(exit);
++  }
++
++  // initialize fixed part of activation frame
++  generate_fixed_frame(false);
++
++  // make sure method is not native & not abstract
++#ifdef ASSERT
++  __ ldw(rax, access_flags);
++  {
++    Label L;
++    __ testw(rax, JVM_ACC_NATIVE);
++    __ jcc(Assembler::zero, L);
++    __ stop("tried to execute native method as non-native");
++    __ bind(L);
++  }
++  {
++    Label L;
++    __ testw(rax, JVM_ACC_ABSTRACT);
++    __ jcc(Assembler::zero, L);
++    __ stop("tried to execute abstract method in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // Since at this point in the method invocation the exception
++  // handler would try to exit the monitor of synchronized methods
++  // which hasn't been entered yet, we set the thread local variable
++  // _do_not_unlock_if_synchronized to true. The remove_activation
++  // will check this flag.
++
++  const Register thread = rthread;
++  const Address do_not_unlock_if_synchronized(thread,
++        in_bytes(JavaThread::do_not_unlock_if_synchronized_offset()));
++  __ ldi(rscratch1, 1, R0);
++  __ stb(rscratch1, do_not_unlock_if_synchronized);
++
++  __ profile_parameters_type(rax, rcx, rdx); //<TODO:not check carefully jzy>
++  // increment invocation count & check for overflow
++  Label invocation_counter_overflow;
++  Label profile_method;
++  Label profile_method_continue;
++  if (inc_counter) {
++    generate_counter_incr(&invocation_counter_overflow,
++                          &profile_method,
++                          &profile_method_continue);
++    if (ProfileInterpreter) {
++      __ bind(profile_method_continue);
++    }
++  }
++
++  Label continue_after_compile;
++  __ bind(continue_after_compile);
++
++  // check for synchronized interpreted methods
++  bang_stack_shadow_pages(false);
++
++  // reset the _do_not_unlock_if_synchronized flag
++  __ ldi(rscratch1, 0, R0);
++  __ stb(rscratch1, do_not_unlock_if_synchronized);
++
++  // check for synchronized methods
++  // Must happen AFTER invocation_counter check and stack overflow check,
++  // so method is not locked if overflows.
++  if (synchronized) {
++    // Allocate monitor and lock method
++    lock_method();
++  } else {
++    // no synchronization necessary
++#ifdef ASSERT
++    { 
++      Label L;
++      __ ldw(rax, access_flags);
++      __ testw(rax, JVM_ACC_SYNCHRONIZED);
++      __ jcc(Assembler::zero, L);
++      __ stop("method needs synchronization");
++      __ bind(L);
++    }
++#endif
++  }
++
++  // start execution
++#ifdef ASSERT
++  {
++    Label L;
++     const Address monitor_block_top (rfp,
++                 frame::interpreter_frame_monitor_block_top_offset * wordSize);
++    __ ldptr(rax, monitor_block_top);
++    __ cmpptr(rax, esp);
++    __ jcc(Assembler::equal, L);
++    __ stop("broken stack frame setup in interpreter");
++    __ bind(L);
++  }
++#endif
++
++  // jvmti support
++  __ notify_method_entry();
++  __ block_comment("start to execute bytecode");
++  __ dispatch_next(vtos);
++
++  // invocation counter overflow
++  if (inc_counter) {
++    if (ProfileInterpreter) {
++      // We have decided to profile this method in the interpreter
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ set_method_data_pointer_for_bcp();
++      __ get_method(rbx);
++      __ jmp(profile_method_continue);
++    }
++    // Handle overflow of counter and compile method
++    __ bind(invocation_counter_overflow);
++    generate_counter_overflow(continue_after_compile);
++  }
++
++  return entry_point;
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateInterpreterGenerator::generate_throw_exception() {BLOCK_COMMENT("generate_throw_exception enter");//__ warn("TODO:check function right generate_throw_exception jzy ");
++  // Entry point in previous activation (i.e., if the caller was
++  // interpreted)
++  Interpreter::_rethrow_exception_entry = __ pc();
++  Register rax = V0;
++ 
++  // Restore sp to interpreter_frame_last_sp even though we are going
++  // to empty the expression stack for the exception processing.
++  __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // rax: exception
++  // rdx: return address/pc that threw exception
++  __ restore_bcp();    // r13/rsi points to call/send
++  __ restore_locals();
++  //add for compressedoops
++  __ reinit_heapbase();  // restore r12_heapbase as heapbase.
++  // Entry point for exceptions thrown within interpreter code
++  Interpreter::_throw_exception_entry = __ pc();
++  // expression stack is undefined here
++  // V0: exception
++  // rbcp: exception bcp
++  __ verify_oop(rax);
++  Register rarg = c_rarg1;
++  __ movl(rarg, rax);
++    
++  // expression stack must be empty before entering the VM in case of
++  // an exception
++  __ empty_expression_stack();
++  // find exception handler address and preserve exception oop
++  __ call_VM(rdx,
++             CAST_FROM_FN_PTR(address,
++                          InterpreterRuntime::exception_handler_for_exception),
++             rarg);
++  // V0: exception handler entry point
++  // rdx: preserved exception oop
++  // rbcp: bcp for exception handler
++
++  __ push_ptr(rdx);// push exception which is now the only value on the stack
++  __ jmp(rax);                                   // jump to exception handler (may be _remove_activation_entry!)
++
++  // If the exception is not handled in the current frame the frame is removed and
++  // the exception is rethrown (i.e. exception continuation is _rethrow_exception).
++  //
++  // Note: At this point the bci is still the bxi for the instruction which caused
++  //       the exception and the expression stack is empty. Thus, for any VM calls
++  //       at this point, GC will find a legal oop map (with empty expression stack).
++
++  // In current activation
++  // V0: exception
++  // rbcp: exception bcp
++
++  //
++  // JVMTI PopFrame support
++  //
++
++  Interpreter::_remove_activation_preserving_args_entry = __ pc();
++  __ empty_expression_stack();
++  // Set the popframe_processing bit in pending_popframe_condition
++  // indicating that we are currently handling popframe, so that
++  // call_VMs that may happen later do not trigger new popframe
++  // handling cycles.
++  const Register thread = rthread;
++  __ ldw(rdx, Address(thread, JavaThread::popframe_condition_offset()));
++  __ orw(rdx, JavaThread::popframe_processing_bit, rdx);
++  __ stw(rdx, Address(thread, JavaThread::popframe_condition_offset()));
++
++  {
++    // Check to see whether we are returning to a deoptimized frame.
++    // (The PopFrame call ensures that the caller of the popped frame is
++    // either interpreted or compiled and deoptimizes it if compiled.)
++    // In this case, we can't call dispatch_next() after the frame is
++    // popped, but instead must save the incoming arguments and restore
++    // them after deoptimization has occurred.
++    //
++    // Note that we don't compare the return PC against the
++    // deoptimization blob's unpack entry because of the presence of
++    // adapter frames in C2.
++    Label caller_not_deoptimized;
++    Register rarg = c_rarg1; //<TODO:check use c_rarg1 not c_rarg0? why? jzy>
++    __ ldptr(rarg, Address(rfp, frame::return_addr_offset * wordSize));
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
++                               InterpreterRuntime::interpreter_contains), rarg);
++    //__ testl(rax, rax);
++    __ jcc(Assembler::notZero, caller_not_deoptimized, rax);
++
++    // Compute size of arguments for saving when returning to
++    // deoptimized caller
++    __ get_method(rax);
++    __ ldptr(rax, Address(rax, Method::const_offset()));
++    __ load_unsigned_short(rax, Address(rax, in_bytes(ConstMethod::
++                                                size_of_parameters_offset())));
++    __ slll(rax, Interpreter::logStackElementSize, rax);
++    __ restore_locals();
++    __ subptr(rlocals, rax, rlocals);
++    __ addptr(rlocals, wordSize, rlocals);
++    // Save these arguments
++    __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
++                                           Deoptimization::
++                                           popframe_preserve_args),
++                          thread, rax, rlocals);
++
++    __ remove_activation(vtos, rdx,
++                         /* throw_monitor_exception */ false,
++                         /* install_monitor_exception */ false,
++                         /* notify_jvmdi */ false);
++
++    // Inform deoptimization that it is responsible for restoring these arguments
++    __ movw(rscratch3, JavaThread::popframe_force_deopt_reexecution_bit);
++    __ stw(rscratch3, Address(thread, JavaThread::popframe_condition_offset()));
++    // Continue in deoptimization handler
++    __ jmp(rdx);
++
++    __ bind(caller_not_deoptimized);
++  }
++
++  __ remove_activation(vtos, rdx,
++                       /* throw_monitor_exception */ false,
++                       /* install_monitor_exception */ false,
++                       /* notify_jvmdi */ false);
++
++  // Finish with popframe handling
++  // A previous I2C followed by a deoptimization might have moved the
++  // outgoing arguments further up the stack. PopFrame expects the
++  // mutations to those outgoing arguments to be preserved and other
++  // constraints basically require this frame to look exactly as
++  // though it had previously invoked an interpreted activation with
++  // no space between the top of the expression stack (current
++  // last_sp) and the top of stack. Rather than force deopt to
++  // maintain this kind of invariant all the time we call a small
++  // fixup routine to move the mutated arguments onto the top of our
++  // expression stack if necessary.
++  
++  __ movl(c_rarg1, esp);
++  __ ldptr(c_rarg2, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  // PC must point into interpreter here
++  __ set_last_Java_frame(noreg, rfp, __ pc(), rscratch1);
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address, InterpreterRuntime::popframe_move_outgoing_args), rthread, c_rarg1, c_rarg2);
++  __ reset_last_Java_frame(thread, true);
++  // Restore the last_sp and null it out
++  __ ldptr(esp, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++  __ stptr(R0, Address(rfp, frame::interpreter_frame_last_sp_offset * wordSize));
++
++  __ restore_bcp();
++  __ restore_locals();
++  // The method data pointer was incremented already during
++  // call profiling. We have to restore the mdp for the current bcp.
++  if (ProfileInterpreter) {
++    __ set_method_data_pointer_for_bcp();
++  }
++  // Clear the popframe condition flag
++  __ movw(rscratch1, JavaThread::popframe_inactive);
++  __ stw(rscratch1, Address(thread, JavaThread::popframe_condition_offset()));
++#if INCLUDE_JVMTI
++  {
++    Label L_done;
++    const Register local0 = rlocals;
++
++    __ cmpb(Address(rbcp, 0), Bytecodes::_invokestatic);
++    __ jcc(Assembler::notEqual, L_done);
++    
++    // The member name argument must be restored if _invokestatic is re-executed after a PopFrame call.
++    // Detect such a case in the InterpreterRuntime function and return the member name argument, or NULL.
++
++    __ get_method(rdx);
++    __ ldptr(rax, Address(local0, 0));
++    __ call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::member_name_arg_or_null), rax, rdx, rbcp);
++
++    //__ testptr(rax, rax);
++    __ jcc(Assembler::zero, L_done, rax);
++
++    __ stptr(rax, Address(esp, 0)); //store 64bits
++    __ bind(L_done);
++  }
++#endif // INCLUDE_JVMTI
++  __ dispatch_next(vtos);
++  // end of PopFrame support
++
++  Interpreter::_remove_activation_entry = __ pc();
++
++  // preserve exception over this code sequence
++  __ pop_ptr(rax);
++  __ stptr(rax, Address(thread, JavaThread::vm_result_offset()));
++  // remove the activation (without doing throws on illegalMonitorExceptions)
++  __ remove_activation(vtos, rdx, false, true, false);
++  // restore exception
++  __ get_vm_result(rax, thread);
++  //__ verify_oop(rax);
++
++  // Inbetween activations - previous activation type unknown yet
++  // compute continuation point - the continuation point expects
++  // the following registers set up:
++  //
++   // rax: exception <TODO:check why? jzy>
++  // rscratch1: return address/pc that threw exception
++  // rsp: expression stack of caller
++  // rbp: ebp of caller
++  __ push(rax);                                  // save exception
++  __ push(rdx);                                  // save return address
++  __ super_call_VM_leaf(CAST_FROM_FN_PTR(address,
++                          SharedRuntime::exception_handler_for_return_address),
++                        thread, rdx);
++  __ movl(rbx, rax);                              // save exception handler
++  __ pop(rdx);                                   // restore return address TODO:here need check jzy
++  __ pop(rax);    
++  // Note that an "issuing PC" is actually the next PC after the call
++  __ jmp(rbx);                                   // jump to exception handler of caller
++  BLOCK_COMMENT("generate_throw_exception leave");
++}
++
++
++//
++// JVMTI ForceEarlyReturn support
++//
++address TemplateInterpreterGenerator::generate_earlyret_entry_for(TosState state) {BLOCK_COMMENT("generate_earlyret_entry_for enter");
++  address entry = __ pc();
++
++  __ restore_bcp();
++  __ restore_locals();
++  __ empty_expression_stack();
++  __ empty_FPU_stack();
++  __ load_earlyret_value(state);
++
++  __ ldptr(rscratch1, Address(rthread, JavaThread::jvmti_thread_state_offset()));
++
++  const Address cond_addr(rscratch1, in_bytes(JvmtiThreadState::earlyret_state_offset()));
++  // Clear the earlyret state
++  __ movw(rscratch2, JvmtiThreadState::earlyret_inactive);
++  __ stw(rscratch2, cond_addr);
++  __ memb();
++
++  __ remove_activation(state, rscratch1,
++                       false, /* throw_monitor_exception */
++                       false, /* install_monitor_exception */
++                       true); /* notify_jvmdi */
++  __ memb();
++  __ jmp(rscratch1);BLOCK_COMMENT("generate_earlyret_entry_for leave");
++  return entry;
++} // end of ForceEarlyReturn support
++
++
++//-----------------------------------------------------------------------------
++// Helper for vtos entry point generation
++
++void TemplateInterpreterGenerator::set_vtos_entry_points(Template* t,
++                                                         address& bep,
++                                                         address& cep,
++                                                         address& sep,
++                                                         address& aep,
++                                                         address& iep,
++                                                         address& lep,
++                                                         address& fep,
++                                                         address& dep,
++                                                         address& vep) {BLOCK_COMMENT("TemplateInterpreterGenerator::set_vtos_entry_points enter");
++  assert(t->is_valid() && t->tos_in() == vtos, "illegal template");
++  Label L;
++  fep = __ pc(); __ push(ftos); __ beq_l(R0, L);
++  dep = __ pc(); __ push(dtos); __ beq_l(R0, L);
++  lep = __ pc(); __ push(ltos); __ beq_l(R0, L);
++  aep  =__ pc(); __ push(atos); __ beq_l(R0, L);
++  bep = cep = sep = iep = __ pc(); __ push(itos);
++  vep = __ pc();
++  __ BIND(L);    // fall through
++  generate_and_dispatch(t);BLOCK_COMMENT("TemplateInterpreterGenerator::set_vtos_entry_points leave");
++}
++
++//-----------------------------------------------------------------------------
++
++// Non-product code
++#ifndef PRODUCT
++address TemplateInterpreterGenerator::generate_trace_code(TosState state) {//__ warn("TODO:check function right generate_trace_code jzy ");
++  address entry = __ pc();
++
++  // prepare expression stack
++  __ push(state);       // save tosca
++  // tos & tos2
++  // trace_bytecode need actually 4 args, the last two is tos&tos2
++  // this work fine for x86. but mips o32 call convention will store A2-A3
++  // to the stack position it think is the tos&tos2
++  // when the expression stack have no more than 2 data, error occur.
++  __ ldptr(A2, Address(esp, 0));//<TODO:don't know why? jzy>
++  __ ldptr(A3, Address(esp, 1 * wordSize));
++  // pass arguments & call tracer
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::trace_bytecode), RA, A2, A3);
++  __ movl(RA, V0);    // make sure return address is not destroyed by pop(state)
++
++  // restore expression stack
++  __ pop(state);        // restore tosca
++
++  // return
++  __ ret_sw();
++
++  return entry;
++}
++
++void TemplateInterpreterGenerator::count_bytecode() {//__ warn("TODO:check function right count_bytecode jzy ");
++  __ incrementw(ExternalAddress((address) &BytecodeCounter::_counter_value));
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode(Template* t) {//__ warn("TODO:check function right histogram_bytecode jzy ");
++  __ incrementw(ExternalAddress((address) &BytecodeHistogram::_counters[t->bytecode()]));
++}
++
++void TemplateInterpreterGenerator::histogram_bytecode_pair(Template* t) {
++    const Register rbx = T11;
++
++  __ movw(rbx, ExternalAddress((address) &BytecodePairHistogram::_index));
++  __ srll(rbx, BytecodePairHistogram::log2_number_of_codes, rbx);
++  __ orw(rbx,
++         ((int) t->bytecode()) <<
++         BytecodePairHistogram::log2_number_of_codes, rbx);
++  __ movw(ExternalAddress((address) &BytecodePairHistogram::_index), rbx);
++  __ lea(rscratch1, ExternalAddress((address) BytecodePairHistogram::_counters));
++  __ incrementw(Address(rscratch1, rbx, Address::times_4));
++}
++
++
++void TemplateInterpreterGenerator::trace_bytecode(Template* t) {SCOPEMARK_NAME(trace_bytecode,_masm)
++  // Call a little run-time stub to avoid blow-up for each bytecode.
++  // The run-time runtime saves the right registers, depending on
++  // the tosca in-state for the given template.
++
++  assert(Interpreter::trace_code(t->tos_in()) != NULL,
++         "entry must have been generated");
++
++  __ movl(r12_heapbase, esp); // remember sp (can only use r12 if not using call_VM)
++  __ andptr(esp, -16, esp); // align stack as required by ABI
++  __ call(RuntimeAddress(Interpreter::trace_code(t->tos_in())));
++  __ movl(esp, r12_heapbase); // restore sp
++  __ reinit_heapbase();
++}
++
++
++void TemplateInterpreterGenerator::stop_interpreter_at() {SCOPEMARK_NAME(stop_interpreter_at,_masm)
++  Label L;
++  __ mov_immediate32(rscratch1, (int)StopInterpreterAt);//TODO:current cmpw just compare 16bit jzy
++  __ cmpw(ExternalAddress((address) &BytecodeCounter::_counter_value), rscratch1);
++  __ jcc(Assembler::notEqual, L);
++//  __ mov(GP, 1);//<TODO:don't understand? jzy>
++//  __ bne(GP, -1);
++  __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, os::breakpoint)));
++  __ bind(L);
++}
++
++#endif // !PRODUCT
++
++address TemplateInterpreterGenerator::generate_slow_signature_handler() {SCOPEMARK_NAME(generate_slow_signature_handler,_masm)
++  address entry = __ pc();
++
++  const Register rbx = rmethod;
++  const Register r14 = rlocals;
++  
++  // rbx: method
++  // r14: pointer to locals
++  // c_rarg3: first stack arg - wordSize
++  __ movl(c_rarg3, esp);
++  __ push(RA); //position is subtle, you can move it's postion if you know its influence
++  __ subptr(esp, 6 * wordSize, esp);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::slow_signature_handler),
++             rbx, r14, c_rarg3);
++
++  // rax: result handler
++
++  // Stack layout:
++  // -------- 0 <- sp
++  // -------- 1
++  // -------- 2
++  // -------- 3
++  // -------- 4
++  // -------- 1 float/double identifiers
++  // -------- RA
++  // -------- c_rarg3
++  
++  // rsp: 5 integer or float args (if static first is unused)
++  //      1 float/double identifiers
++  //        return address
++  //        stack args
++  //        garbage
++  //        expression stack bottom
++  //        bcp (NULL)
++  //        ...
++
++  // Do FP first so we can use c_rarg3 as temp
++  Register identifier = T12;
++  __ ldw(identifier, Address(esp, 5 * wordSize)); // float/double identifiers
++  int floatreg_start_index = FloatRegisterImpl::float_arg_base + 1; //because a0(16) must be env in JNI
++  for ( int i= 0; i < Argument::n_int_register_parameters_c-1; i++ ) {
++    FloatRegister floatreg = as_FloatRegister(i + floatreg_start_index);
++    Label isfloatordouble, isdouble, next;
++
++    __ testw(identifier, 1 << (i*2));      // Float or Double?
++    __ jcc(Assembler::notZero, isfloatordouble);
++
++    // Do Int register here
++    switch ( i ) {
++      case 0:
++        __ ldptr(rscratch1, Address(esp, 0));
++        __ ldw(rscratch2, Address(rbx, Method::access_flags_offset()));
++        __ testw(rscratch2, JVM_ACC_STATIC, rcc);
++        assert_different_registers(rscratch1, rcc);
++        __ cmove(Assembler::zero, c_rarg1, rscratch1, c_rarg1);
++        break;
++      case 1:
++        __ ldptr(c_rarg2, Address(esp, wordSize));
++        break;
++      case 2:
++        __ ldptr(c_rarg3, Address(esp, 2 * wordSize));
++        break;
++      case 3:
++        __ ldptr(c_rarg4, Address(esp, 3 * wordSize));
++        break;
++      case 4:
++        __ ldptr(c_rarg5, Address(esp, 4 * wordSize));
++        break;
++      default:
++        break;
++    }
++
++    __ jmp (next);
++
++    __ bind(isfloatordouble);
++    __ testw(identifier, 1 << ((i*2)+1));     // Double?
++    __ jcc(Assembler::notZero, isdouble);
++
++    // Do Float Here
++    __ load_float(floatreg, Address(esp, i * wordSize));
++    __ jmp(next);
++
++    // Do Double here
++    __ bind(isdouble);
++    __ load_double(floatreg, Address(esp, i * wordSize));
++
++    __ bind(next);
++  }
++
++
++  // restore rsp
++  __ addptr(esp, 6 * wordSize, esp);
++
++  // Restore RA
++  __ pop(RA);
++  
++  __ ret_sw();
++
++  return entry;
++}
++
++address TemplateInterpreterGenerator::generate_math_entry(AbstractInterpreter::MethodKind kind) {BLOCK_COMMENT("generate_math_entry enter");
++  // rmethod: methodOop
++  // V0: scratrch
++  // rsender: send 's sp
++
++  if (!InlineIntrinsics) return NULL; // Generate a vanilla entry
++
++  address entry_point = __ pc();
++
++  // These don't need a safepoint check because they aren't virtually
++  // callable. We won't enter these intrinsics from compiled code.
++  // If in the future we added an intrinsic which was virtually callable
++  // we'd have to worry about how to safepoint so that this code is used.
++
++  // mathematical functions inlined by compiler
++  // (interpreter must provide identical implementation
++  // in order to avoid monotonicity bugs when switching
++  // from interpreter to compiler in the middle of some
++  // computation)
++  //
++  // stack: [ lo(arg) ] <-- sp
++  //        [ hi(arg) ]
++  //
++
++  __ subl(esp, 2 * wordSize, esp);
++  __ stptr(RA, Address(esp, wordSize));
++  __ stptr(rfp, Address(esp, 0));
++  __ ldi(rfp,  2 * wordSize, esp);
++
++  // [ fp     ] <-- sp
++  // [ ra     ]
++  // [ lo     ] <-- fp
++  // [ hi     ]
++  //FIXME, need consider this
++  
++  if (kind == Interpreter::java_lang_math_sqrt) {
++    __ fldd(F16, 2 * wordSize, esp);
++    __ sqrt_d(f0, F16);
++  } else if (kind == Interpreter::java_lang_math_exp) {
++    __ fldd(F16, 2 * wordSize, esp);
++    __ subl(esp, wordSize * 2, esp);
++    if (StubRoutines::dexp() != NULL) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dexp())));
++    } else {
++      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dexp));
++    }
++    __ addl(esp, wordSize * 2, esp);
++  } else if (kind == Interpreter::java_lang_math_log) {
++    __ fldd(F16, 2 * wordSize, esp);
++    __ subl(esp, wordSize * 2, esp);
++    if (StubRoutines::dlog() != NULL) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog())));
++    } else {
++      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog));
++    }
++    __ addl(esp, wordSize * 2, esp);
++  } else if (kind == Interpreter::java_lang_math_log10) {
++    __ fldd(F16, 2 * wordSize, esp);
++    __ subl(esp, wordSize * 2, esp);
++    if (StubRoutines::dlog10() != NULL) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dlog10())));
++    } else {
++      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dlog10));
++    }
++    __ addl(esp, wordSize * 2, esp);
++  } else if (kind == Interpreter::java_lang_math_sin) {
++    __ fldd(F16, 2 * wordSize, esp);
++    __ subl(esp, wordSize * 2, esp);
++    if (StubRoutines::dsin() != NULL) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dsin())));
++    } else {
++      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dsin));
++    }
++    __ addl(esp, wordSize * 2, esp);
++  } else if (kind == Interpreter::java_lang_math_cos) {
++    __ fldd(F16, 2 * wordSize, esp);
++    __ subl(esp, wordSize * 2, esp);
++    if (StubRoutines::dcos() != NULL) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dcos())));
++    } else {
++      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dcos));
++    }
++    __ addl(esp, wordSize * 2, esp);
++  } else if (kind == Interpreter::java_lang_math_pow) {
++    __ fldd(F17, 2 * wordSize, esp);
++    __ fldd(F16, 4 * wordSize, esp);
++    __ subl(esp, wordSize * 2, esp);
++    if (StubRoutines::dpow() != NULL) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dpow())));
++    } else {
++      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dpow));
++    }
++    __ addiu(esp, wordSize * 2, esp);
++  } else if (kind == Interpreter::java_lang_math_tan) {
++    __ fldd(F16, 2 * wordSize, esp);
++    __ subl(esp, wordSize * 2, esp);
++    if (StubRoutines::dtan() != NULL) {
++      __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::dtan())));
++    } else {
++      __ call_VM_leaf0(CAST_FROM_FN_PTR(address, SharedRuntime::dtan));
++    }
++    __ addiu(esp, wordSize * 2, esp);
++  } else {
++    __ fldd(F16, 2 * wordSize, esp);
++    switch (kind) {
++    case Interpreter::java_lang_math_abs:
++      __ abs_d(f0, F16);
++      break;
++    default:
++      ShouldNotReachHere();
++    }
++
++  }
++  
++  // must maintain return value in f0:f1
++  __ ldptr(RA, Address(rfp, (-1) * wordSize));
++  //FIXME
++  __ movl(esp, rsender);
++  __ ldptr(rfp, Address(rfp, (-2) * wordSize));
++  __ ret_sw();BLOCK_COMMENT("generate_math_entry leave");
++  return entry_point;
++}
+diff --git a/src/hotspot/cpu/sw64/templateTable_sw64.cpp b/src/hotspot/cpu/sw64/templateTable_sw64.cpp
+new file mode 100755
+index 0000000000..1ae3a6437e
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/templateTable_sw64.cpp
+@@ -0,0 +1,4190 @@
++/*
++ * Copyright (c) 1997, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "interpreter/interpreter.hpp"
++#include "interpreter/interpreterRuntime.hpp"
++#include "interpreter/interp_masm.hpp"
++#include "interpreter/templateTable.hpp"
++#include "memory/universe.hpp"
++#include "oops/methodData.hpp"
++#include "oops/objArrayKlass.hpp"
++#include "oops/oop.inline.hpp"
++#include "prims/methodHandles.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/safepointMechanism.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/synchronizer.hpp"
++#include "utilities/macros.hpp"
++#include "assembler_sw64.hpp"
++
++#define __ _masm->
++#ifdef PRODUCT
++#define BLOCK_COMMENT(str) /* nothing */
++#else
++#define BLOCK_COMMENT(str) { char line[1024];sprintf(line,"%s:%s:%d",str,__FILE__, __LINE__); __ block_comment(line);}
++#endif
++#define BIND(label) bind(label); BLOCK_COMMENT(#label ":")
++// Platform-dependent initialization
++void TemplateTable::pd_initialize() {
++  // No sw64 specific initialization
++}
++
++// Address Computation: local variables
++static inline Address iaddress(int n) {
++  return Address(rlocals, Interpreter::local_offset_in_bytes(n));
++}
++
++static inline Address laddress(int n) {
++  return iaddress(n + 1);
++}
++
++static inline Address faddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address daddress(int n) {
++  return laddress(n);
++}
++
++static inline Address aaddress(int n) {
++  return iaddress(n);
++}
++
++static inline Address iaddress(Register r) {
++  return Address(rlocals, r, Address::times_ptr);
++}
++
++static inline Address laddress(Register r) {
++  return Address(rlocals, r, Address::times_ptr, Interpreter::local_offset_in_bytes(1));
++}
++
++static inline Address faddress(Register r) {
++  return iaddress(r);
++}
++
++static inline Address daddress(Register r) {
++  return laddress(r);
++}
++
++static inline Address aaddress(Register r) {
++  return iaddress(r);
++}
++
++
++// expression stack
++// (Note: Must not use symmetric equivalents at_rsp_m1/2 since they store
++// data beyond the rsp which is potentially unsafe in an MT environment;
++// an interrupt may overwrite that data.)
++static inline Address at_rsp() {
++  return Address(esp, 0);
++}
++
++// At top of Java expression stack which may be different than esp().  It
++// isn't for category 1 objects.
++static inline Address at_tos   () {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(0));
++}
++
++static inline Address at_tos_p1() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(1));
++}
++
++static inline Address at_tos_p2() {
++  return Address(esp,  Interpreter::expr_offset_in_bytes(2));
++}
++
++// Condition conversion
++static Assembler::Condition j_not(TemplateTable::Condition cc) {
++  switch (cc) {
++  case TemplateTable::equal        : return Assembler::notEqual;
++  case TemplateTable::not_equal    : return Assembler::equal;
++  case TemplateTable::less         : return Assembler::greaterEqual;
++  case TemplateTable::less_equal   : return Assembler::greater;
++  case TemplateTable::greater      : return Assembler::lessEqual;
++  case TemplateTable::greater_equal: return Assembler::less;
++  }
++  ShouldNotReachHere();
++  return Assembler::zero;
++}
++
++
++
++// Miscelaneous helper routines
++// Store an oop (or NULL) at the address described by obj.
++// If val == noreg this means store a NULL
++
++
++static void do_oop_store(InterpreterMacroAssembler* _masm,
++                         Address dst,
++                         Register val,
++                         DecoratorSet decorators = 0) {SCOPEMARK_NAME(do_oop_store, _masm)
++  assert(val == noreg || val == V0, "parameter is just for looks");
++  __ store_heap_oop(dst, val, T9, T11, decorators);//It's OK to use register like this? Can use rscratch* to replace? TODO:check jzy
++}
++
++static void do_oop_load(InterpreterMacroAssembler* _masm,
++                        Address src,
++                        Register dst,
++                        DecoratorSet decorators = 0) {
++  __ load_heap_oop(dst, src, T9, T11, decorators);
++}
++
++Address TemplateTable::at_bcp(int offset) {
++  assert(_desc->uses_bcp(), "inconsistent uses_bcp information");
++  return Address(rbcp, offset);
++}
++
++
++void TemplateTable::patch_bytecode(Bytecodes::Code bc, Register bc_reg,
++                                   Register temp_reg, bool load_bc_into_bc_reg/*=true*/,
++                                   int byte_no) {SCOPEMARK_NAME(patch_bytecode, _masm)
++  if (!RewriteBytecodes)  return;
++  Label L_patch_done;
++
++  switch (bc) {
++  case Bytecodes::_fast_aputfield:
++  case Bytecodes::_fast_bputfield:
++  case Bytecodes::_fast_zputfield:
++  case Bytecodes::_fast_cputfield:
++  case Bytecodes::_fast_dputfield:
++  case Bytecodes::_fast_fputfield:
++  case Bytecodes::_fast_iputfield:
++  case Bytecodes::_fast_lputfield:
++  case Bytecodes::_fast_sputfield:
++    {
++      // We skip bytecode quickening for putfield instructions when
++      // the put_code written to the constant pool cache is zero.
++      // This is required so that every execution of this instruction
++      // calls out to InterpreterRuntime::resolve_get_put to do
++      // additional, required work.
++      assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++      assert(load_bc_into_bc_reg, "we use bc_reg as temp");
++      __ get_cache_and_index_and_bytecode_at_bcp(temp_reg, bc_reg, temp_reg, byte_no, 1);
++      __ movw(bc_reg, bc);
++      __ cmpw(temp_reg, (int) 0);
++      __ jcc(Assembler::zero, L_patch_done);  // don't patch
++    }
++    break;
++  default:
++    assert(byte_no == -1, "sanity");
++    // the pair bytecodes have already done the load.
++    if (load_bc_into_bc_reg) {
++      __ movw(bc_reg, bc);
++    }
++  }
++
++  if (JvmtiExport::can_post_breakpoint()) {
++    Label L_fast_patch;
++    // if a breakpoint is present we can't rewrite the stream directly
++    __ ldbu(temp_reg, at_bcp(0));
++    __ cmpw(temp_reg, Bytecodes::_breakpoint);
++    __ jcc(Assembler::notEqual, L_fast_patch);
++    __ get_method(temp_reg);
++    // Let breakpoint table handling rewrite to quicker bytecode
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::set_original_bytecode_at), temp_reg, rbcp, bc_reg);
++    __ jmp(L_patch_done);
++    __ bind(L_fast_patch);
++  }
++
++#ifdef ASSERT
++  Label L_okay;
++  __ load_unsigned_byte(temp_reg, at_bcp(0));
++  __ cmpw(temp_reg, (int) Bytecodes::java_code(bc));
++  __ jcc(Assembler::equal, L_okay);
++  __ cmpw(temp_reg, bc_reg);
++  __ jcc(Assembler::equal, L_okay);
++  __ stop("patching the wrong bytecode");
++  __ bind(L_okay);
++#endif
++
++  // patch bytecode
++  __ stb(bc_reg, at_bcp(0));
++  __ bind(L_patch_done);
++}
++// Individual instructions
++
++
++void TemplateTable::nop() {
++  transition(vtos, vtos);
++  // nothing to do
++}
++
++void TemplateTable::shouldnotreachhere() {
++  transition(vtos, vtos);
++  __ stop("shouldnotreachhere bytecode");
++}
++
++void TemplateTable::aconst_null() {
++  transition(vtos, atos);
++  __ bis(R0, R0, FSR);
++}
++
++void TemplateTable::iconst(int value) {
++  transition(vtos, itos);
++  if (value == 0) {
++    __ bis(R0, R0, FSR);
++  } else {
++    __ ldi(FSR, value, R0);
++  }
++}
++
++void TemplateTable::lconst(int value) {
++  transition(vtos, ltos);
++  if (value == 0) {
++    __ bis(R0, R0, FSR);
++  } else {
++    __ ldi(FSR, value, R0);
++  }
++}
++
++
++
++void TemplateTable::fconst(int value) {
++  transition(vtos, ftos);
++  static float  _f1 = 1.0, _f2 = 2.0;
++  float* p;
++  switch( value ) {
++    default: ShouldNotReachHere();
++    case 0:  __ fcpys(f31, f31, FSF);  return;
++    case 1:  p = &_f1;   break;
++    case 2:  p = &_f2;   break;
++  }
++  __ load_float(FSF, ExternalAddress((address)p));
++}
++
++void TemplateTable::dconst(int value) {
++  transition(vtos, dtos);
++  static double _d1 = 1.0;
++  double* p;
++  switch( value ) {
++    default: ShouldNotReachHere();
++    case 0:  __ fcpys(f31, f31, FSF);  return;
++    case 1:  p = &_d1;   break;
++  }
++  __ load_double(FSF, ExternalAddress((address)p));
++}
++
++void TemplateTable::bipush() {
++  transition(vtos, itos);
++  __ load_signed_byte64(FSR, at_bcp(1));
++}
++
++void TemplateTable::sipush() {SCOPEMARK
++  transition(vtos, itos);
++// the following code is an optimization on sw64 since 
++// we dont have unaligned load insn  
++  __ load_signed_byte64(FSR, at_bcp(1));
++  __ ldbu(AT, at_bcp(2));
++  __ slll(FSR, 8, FSR);
++  __ bis(FSR, AT, FSR);
++  __ sexth(FSR, FSR);
++}
++
++void TemplateTable::ldc(bool wide) {SCOPEMARK
++  transition(vtos, vtos);
++  Register rarg = c_rarg1;
++  Label call_ldc, notFloat, notClass, notInt, Done;
++  
++  if (wide) {
++    __ get_unsigned_2_byte_index_at_bcp(T2, 1);
++  } else {
++    __ load_unsigned_byte(T2, at_bcp(1));
++  }
++
++  __ get_cpool_and_tags(T3, T1);
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  __ ldbu(T4, Address(T1, T2, Address::times_1, tags_offset));
++
++  // unresolved class - get the resolved class
++  __ cmpw(T4, JVM_CONSTANT_UnresolvedClass);
++  __ jcc(Assembler::equal, call_ldc);
++
++  // unresolved class in error state - call into runtime to throw the error
++  // from the first resolution attempt
++  __ cmpw(T4, JVM_CONSTANT_UnresolvedClassInError);
++  __ jcc(Assembler::equal, call_ldc);
++
++  // resolved class - need to call vm to get java mirror of the class
++  __ cmpw(T4, JVM_CONSTANT_Class);
++  __ jcc(Assembler::notEqual, notClass);
++
++  __ bind(call_ldc);
++  
++  __ movw(rarg, wide);
++  call_VM(FSR, CAST_FROM_FN_PTR(address, InterpreterRuntime::ldc), rarg);
++  
++  __ push(atos);
++  __ jmp(Done);
++
++  __ bind(notClass);
++  __ cmpw(T4, JVM_CONSTANT_Float);
++  __ jcc(Assembler::notEqual, notFloat);
++  
++  // ftos
++  __ load_float(FSF, Address(T3, T2, Address::times_ptr, base_offset));
++  __ push(ftos);
++  __ jmp(Done);
++  
++  __ bind(notFloat);
++  __ cmpw(T4, JVM_CONSTANT_Integer);
++  __ jcc(Assembler::notEqual, notInt);
++
++  // itos
++  __ ldws(FSR, Address(T3, T2, Address::times_ptr, base_offset));
++  __ push(itos);
++  __ jmp(Done);
++
++  // assume the tag is for condy; if not, the VM runtime will tell us
++  __ bind(notInt);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++// Fast path for caching oop constants.
++void TemplateTable::fast_aldc(bool wide) {SCOPEMARK
++  transition(vtos, atos);
++
++  Register result = FSR;
++  Register tmp = SSR;
++  Register rarg = c_rarg1;
++  int index_size = wide ? sizeof(u2) : sizeof(u1);
++
++  Label resolved;
++
++  // We are resolved if the resolved reference cache entry contains a
++  // non-null object (String, MethodType, etc.)
++  assert_different_registers(result, tmp);
++  __ get_cache_index_at_bcp(tmp, 1, index_size);
++  __ load_resolved_reference_at_index(result, tmp);
++  __ testptr(result, result);
++  __ jcc(Assembler::notZero, resolved);
++
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc);
++  
++  // first time invocation - must resolve first
++  __ movw(rarg, (int)bytecode());
++  __ call_VM(result, entry, rarg);
++  __ bind(resolved);
++
++  { // Check for the null sentinel.
++    // If we just called the VM, it already did the mapping for us,
++    // but it's harmless to retry.
++    Label notNull;
++    ExternalAddress null_sentinel((address)Universe::the_null_sentinel_addr());
++    __ ldptr(tmp, null_sentinel);
++    __ cmpptr(tmp, result);
++    __ jcc(Assembler::notEqual, notNull);
++    __ bis(R0, R0, result);  // NULL object reference
++    __ bind(notNull);
++  }
++
++  if (VerifyOops) {
++    __ verify_oop(result);
++  }
++}
++
++void TemplateTable::ldc2_w() {SCOPEMARK
++  transition(vtos, vtos);
++  Label notDouble, notLong, Done;
++  __ get_unsigned_2_byte_index_at_bcp(T2, 1);  // get index in cpool
++
++  __ get_cpool_and_tags(T3, T1);
++  const int base_offset = ConstantPool::header_size() * wordSize;
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++
++  // get type
++  __ ldbu(AT, Address(T1, T2, Address::times_1, tags_offset));
++  __ cmpw(AT, JVM_CONSTANT_Double);
++  __ jcc(Assembler::notEqual, notDouble);
++
++  // dtos
++  __ load_double(FSF, Address(T3, T2, Address::times_ptr, base_offset));
++  __ push(dtos);
++  
++  __ jmp(Done);
++  __ bind(notDouble);
++  __ cmpw(AT, JVM_CONSTANT_Long);
++  __ jcc(Assembler::notEqual, notLong);
++
++  // ltos
++  __ ldptr(FSR, Address(T3, T2, Address::times_ptr, base_offset + 0 * wordSize));
++  __ push(ltos);
++  __ jmp(Done);
++
++  __ bind(notLong);
++  condy_helper(Done);
++
++  __ bind(Done);
++}
++
++void TemplateTable::condy_helper(Label& Done) {SCOPEMARK
++  const Register obj = T0;
++  const Register off = T1;
++  const Register flags = T2;
++  const Register rarg = A1;
++  __ movw(rarg, (int)bytecode());
++  call_VM(obj, CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_ldc), rarg);
++  __ get_vm_result_2(flags, rthread);
++  // VMr = obj = base address to find primitive value to push
++  // VMr2 = flags = (tos, off) using format of CPCE::_flags
++  __ andw(flags, ConstantPoolCacheEntry::field_index_mask, off);
++  const Address field(obj, off, Address::times_1, 0*wordSize);
++
++  // What sort of thing are we loading?
++  __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags);
++  __ andw(flags, ConstantPoolCacheEntry::tos_state_mask, flags);
++
++  switch (bytecode()) {
++  case Bytecodes::_ldc:
++  case Bytecodes::_ldc_w:
++    {
++      // tos in (itos, ftos, stos, btos, ctos, ztos)
++      Label notInt, notFloat, notShort, notByte, notChar, notBool;
++      __ cmpw(flags, itos);
++      __ jcc(Assembler::notEqual, notInt);
++      // itos
++      __ ldws(FSR, field);
++      __ push(itos);
++      __ jmp(Done);
++
++      __ bind(notInt);
++      __ cmpw(flags, ftos);
++      __ jcc(Assembler::notEqual, notFloat);
++      // ftos
++      __ load_float(FSF, field);
++      __ push(ftos);
++      __ jmp(Done);
++
++      __ bind(notFloat);
++      __ cmpw(flags, stos);
++      __ jcc(Assembler::notEqual, notShort);
++      // stos
++      __ load_signed_short(FSR, field);
++      __ push(stos);
++      __ jmp(Done);
++
++      __ bind(notShort);
++      __ cmpw(flags, btos);
++      __ jcc(Assembler::notEqual, notByte);
++      // btos
++      __ load_signed_byte64(FSR, field);
++      __ push(btos);
++      __ jmp(Done);
++
++      __ bind(notByte);
++      __ cmpw(flags, ctos);
++      __ jcc(Assembler::notEqual, notChar);
++      // ctos
++      __ load_unsigned_short(FSR, field);
++      __ push(ctos);
++      __ jmp(Done);
++
++      __ bind(notChar);
++      __ cmpw(flags, ztos);
++      __ jcc(Assembler::notEqual, notBool);
++      // ztos
++      __ load_signed_byte64(FSR, field);
++      __ push(ztos);
++      __ jmp(Done);
++
++      __ bind(notBool);
++      break;
++    }
++
++  case Bytecodes::_ldc2_w:
++    {
++      Label notLong, notDouble;
++      __ cmpw(flags, ltos);
++      __ jcc(Assembler::notEqual, notLong);
++      // ltos
++      // Loading high word first because movptr clobbers rax
++      __ ldptr(FSR, field);
++      __ push(ltos);
++      __ jmp(Done);
++
++      __ bind(notLong);
++      __ cmpw(flags, dtos);
++      __ jcc(Assembler::notEqual, notDouble);
++      // dtos
++      __ load_double(FSF, field);
++      __ push(dtos);
++      __ jmp(Done);
++
++      __ bind(notDouble);
++      break;
++    }
++
++  default:
++    ShouldNotReachHere();
++  }
++
++  __ stop("bad ldc/condy");
++}
++
++void TemplateTable::locals_index(Register reg, int offset) {SCOPEMARK
++  __ load_unsigned_byte(reg, at_bcp(offset));
++  __ subl(R0, reg, reg);
++}
++
++void TemplateTable::iload() {SCOPEMARK
++  iload_internal();
++}
++
++void TemplateTable::nofast_iload() {
++  iload_internal(may_not_rewrite);
++}
++
++void TemplateTable::iload_internal(RewriteControl rc) {SCOPEMARK_NAME(iload_internal, _masm)
++  transition(vtos, itos);
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    const Register bc = c_rarg3;
++
++    // get next byte
++    __ load_unsigned_byte(T2,
++            at_bcp(Bytecodes::length_for(Bytecodes::_iload)));
++    // if _iload, wait to rewrite to iload2.  We only want to rewrite the
++    // last two iloads in a pair.  Comparing against fast_iload means that
++    // the next bytecode is neither an iload or a caload, and therefore
++    // an iload pair.
++    __ cmpw(T2, Bytecodes::_iload);
++    __ jcc(Assembler::equal, done);
++
++    __ cmpw(T2, Bytecodes::_fast_iload);
++    __ movw(bc, Bytecodes::_fast_iload2);
++    
++    __ jcc(Assembler::equal, rewrite);
++
++    // if _caload, rewrite to fast_icaload
++    __ cmpw(T2, Bytecodes::_caload);
++    __ movw(bc, Bytecodes::_fast_icaload);
++    __ jcc(Assembler::equal, rewrite);
++
++    // rewrite so iload doesn't check again.
++    __ movw(bc, Bytecodes::_fast_iload);
++
++    // rewrite
++    // bc: fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_iload, bc, T2, false);
++    __ bind(done);
++  }
++
++  // Get the local value into tos
++  locals_index(T2);
++  __ ldws(FSR, iaddress(T2));
++}
++
++void TemplateTable::fast_iload2() {SCOPEMARK
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ldws(FSR, iaddress(T2));
++  __ push(itos);
++  locals_index(T2, 3);
++  __ ldws(FSR, iaddress(T2));
++}
++
++void TemplateTable::fast_iload() {SCOPEMARK
++  transition(vtos, itos);
++  locals_index(T2);
++  __ ldws(FSR, iaddress(T2));
++}
++
++void TemplateTable::lload() {SCOPEMARK
++  transition(vtos, ltos);
++  locals_index(T2);
++  __ ldptr(FSR, laddress(T2));
++}
++
++void TemplateTable::fload() {SCOPEMARK
++  transition(vtos, ftos);
++  locals_index(T2);
++  __ load_float(FSF, faddress(T2));
++}
++
++void TemplateTable::dload() {SCOPEMARK
++  transition(vtos, dtos);
++  locals_index(T2);
++  __ load_double(FSF, daddress(T2));
++}
++
++void TemplateTable::aload() {SCOPEMARK
++  transition(vtos, atos);
++  locals_index(T2);
++  __ ldptr(FSR, aaddress(T2));
++}
++
++void TemplateTable::locals_index_wide(Register reg) {SCOPEMARK
++  __ ldhu_unaligned_be(reg, at_bcp(2));
++  __ subl(R0, reg, reg);
++}
++
++void TemplateTable::wide_iload() {
++  transition(vtos, itos);
++  locals_index_wide(T2);
++  __ ldws(FSR, iaddress(T2));
++}
++
++void TemplateTable::wide_lload() {
++  transition(vtos, ltos);
++  locals_index_wide(T2);
++  __ ldptr(FSR, laddress(T2));
++}
++
++void TemplateTable::wide_fload() {
++  transition(vtos, ftos);
++  locals_index_wide(T2);
++  __ load_float(FSF, faddress(T2));
++}
++
++void TemplateTable::wide_dload() {
++  transition(vtos, dtos);
++  locals_index_wide(T2);
++  __ load_double(FSF, daddress(T2));
++}
++
++void TemplateTable::wide_aload() {
++  transition(vtos, atos);
++  locals_index_wide(T2);
++  __ ldptr(FSR, aaddress(T2));
++}
++
++void TemplateTable::index_check(Register array, Register index) {
++  // Pop ptr into array
++  __ pop_ptr(array);
++  index_check_without_pop(array, index);
++}
++
++void TemplateTable::index_check_without_pop(Register array, Register index) {SCOPEMARK_NAME(index_check_without_pop, _masm)
++  // destroys A2
++  // check array
++  __ null_check(array, arrayOopDesc::length_offset_in_bytes());
++
++  // check index
++  __ cmpwu(index, Address(array, arrayOopDesc::length_offset_in_bytes()));
++    //throw_ArrayIndexOutOfBoundsException assume abberrant index in c_rarg2, should extend c_rarg2 as valid value because of negativen number
++  if (c_rarg2 != index) __ movl(c_rarg2, index);
++
++  Label skip;
++  __ jcc(Assembler::below, skip);
++  // Pass array to create more detailed exceptions.
++  __ movl(c_rarg1, array);
++  __ jump(ExternalAddress(Interpreter::_throw_ArrayIndexOutOfBoundsException_entry));
++  __ bind(skip);
++}
++
++void TemplateTable::iaload() {
++  transition(itos, itos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_INT, IN_HEAP | IS_ARRAY, FSR,
++                    Address(SSR, FSR, Address::times_4,
++                            arrayOopDesc::base_offset_in_bytes(T_INT)),
++                    noreg, noreg);
++}
++
++void TemplateTable::laload() {
++  transition(itos, ltos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_LONG, IN_HEAP | IS_ARRAY, noreg /* ltos */,
++                    Address(SSR, FSR, Address::times_8,
++                            arrayOopDesc::base_offset_in_bytes(T_LONG)),
++                    noreg, noreg);
++}
++
++
++
++void TemplateTable::faload() {
++  transition(itos, ftos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_FLOAT, IN_HEAP | IS_ARRAY, noreg /* ftos */,
++                    Address(SSR, FSR,
++                            Address::times_4,
++                            arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
++                    noreg, noreg);
++}
++
++void TemplateTable::daload() {
++  transition(itos, dtos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_DOUBLE, IN_HEAP | IS_ARRAY, noreg /* dtos */,
++                    Address(SSR, FSR,
++                            Address::times_8,
++                            arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
++                    noreg, noreg);
++}
++
++void TemplateTable::aaload() {
++  transition(itos, atos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  do_oop_load(_masm,
++              Address(SSR, FSR,
++                      UseCompressedOops ? Address::times_4 : Address::times_ptr,
++                      arrayOopDesc::base_offset_in_bytes(T_OBJECT)),
++              FSR,
++              IS_ARRAY);
++}
++
++void TemplateTable::baload() {
++  transition(itos, itos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_BYTE, IN_HEAP | IS_ARRAY, FSR,
++                    Address(SSR, FSR, Address::times_1, arrayOopDesc::base_offset_in_bytes(T_BYTE)),
++                    noreg, noreg);
++}
++
++void TemplateTable::caload() {
++  transition(itos, itos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR,
++                    Address(SSR, FSR, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)),
++                    noreg, noreg);
++}
++
++// iload followed by caload frequent pair
++void TemplateTable::fast_icaload() {
++  transition(vtos, itos);
++  // load index out of locals
++  locals_index(T2);
++  __ ldws(FSR, iaddress(T2));
++  
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_CHAR, IN_HEAP | IS_ARRAY, FSR,
++                    Address(SSR, FSR, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_CHAR)),
++                    noreg, noreg);
++}
++
++
++void TemplateTable::saload() {
++  transition(itos, itos);
++  // FSR: index
++  // SSR: array
++  index_check(SSR, FSR);
++  __ access_load_at(T_SHORT, IN_HEAP | IS_ARRAY, FSR,
++                    Address(SSR, FSR, Address::times_2, arrayOopDesc::base_offset_in_bytes(T_SHORT)),
++                    noreg, noreg);
++}
++
++void TemplateTable::iload(int n) {
++  transition(vtos, itos);
++  __ ldws(FSR, iaddress(n));
++}
++
++void TemplateTable::lload(int n) {
++  transition(vtos, ltos);
++  __ ldptr(FSR, laddress(n));
++}
++
++void TemplateTable::fload(int n) {
++  transition(vtos, ftos);
++  __ load_float(FSF, faddress(n));
++}
++
++void TemplateTable::dload(int n) {
++  transition(vtos, dtos);
++  __ load_double(FSF, daddress(n));
++}
++
++void TemplateTable::aload(int n) {
++  transition(vtos, atos);
++  __ ldptr(FSR, aaddress(n));
++}
++
++void TemplateTable::aload_0() {
++  aload_0_internal();
++}
++
++void TemplateTable::nofast_aload_0() {
++  aload_0_internal(may_not_rewrite);
++}
++
++void TemplateTable::aload_0_internal(RewriteControl rc) {
++  transition(vtos, atos);
++  // According to bytecode histograms, the pairs:
++  //
++  // _aload_0, _fast_igetfield
++  // _aload_0, _fast_agetfield
++  // _aload_0, _fast_fgetfield
++  //
++  // occur frequently. If RewriteFrequentPairs is set, the (slow)
++  // _aload_0 bytecode checks if the next bytecode is either
++  // _fast_igetfield, _fast_agetfield or _fast_fgetfield and then
++  // rewrites the current bytecode into a pair bytecode; otherwise it
++  // rewrites the current bytecode into _fast_aload_0 that doesn't do
++  // the pair check anymore.
++  //
++  // Note: If the next bytecode is _getfield, the rewrite must be
++  //       delayed, otherwise we may miss an opportunity for a pair.
++  //
++  // Also rewrite frequent pairs
++  //   aload_0, aload_1
++  //   aload_0, iload_1
++  // These bytecodes with a small amount of code are most profitable
++  // to rewrite
++  if (RewriteFrequentPairs && rc == may_rewrite) {
++    Label rewrite, done;
++    
++    const Register bc = c_rarg3;
++    
++    // get next byte
++    __ load_unsigned_byte(T2, at_bcp(Bytecodes::length_for(Bytecodes::_aload_0)));
++
++    // if _getfield then wait with rewrite
++    __ cmpw(T2, Bytecodes::_getfield);
++    __ jcc(Assembler::equal, done);
++
++    // if _igetfield then rewrite to _fast_iaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_iaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ cmpw(T2, Bytecodes::_fast_igetfield);
++    __ movw(bc, Bytecodes::_fast_iaccess_0);
++    __ jcc(Assembler::equal, rewrite);
++
++    // if _agetfield then rewrite to _fast_aaccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aaccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ cmpw(T2, Bytecodes::_fast_agetfield);
++    __ movw(bc, Bytecodes::_fast_aaccess_0);
++    __ jcc(Assembler::equal, rewrite);
++
++    // if _fgetfield then rewrite to _fast_faccess_0
++    assert(Bytecodes::java_code(Bytecodes::_fast_faccess_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ cmpw(T2, Bytecodes::_fast_fgetfield);
++    __ movw(bc, Bytecodes::_fast_faccess_0);
++    __ jcc(Assembler::equal, rewrite);
++
++    // else rewrite to _fast_aload0
++    assert(Bytecodes::java_code(Bytecodes::_fast_aload_0) == Bytecodes::_aload_0, "fix bytecode definition");
++    __ movw(bc, Bytecodes::_fast_aload_0);
++
++    // rewrite
++    // bc: fast bytecode
++    __ bind(rewrite);
++    patch_bytecode(Bytecodes::_aload_0, bc, T2, false);
++
++    __ bind(done);
++  }
++
++  // Do actual aload_0 (must do this after patch_bytecode which might call VM and GC might change oop).
++  aload(0);
++}
++
++void TemplateTable::istore() {
++  transition(itos, vtos);
++  locals_index(T2);
++  __ stw(FSR, iaddress(T2));
++}
++
++void TemplateTable::lstore() {
++  transition(ltos, vtos);
++  locals_index(T2);
++  __ stptr(FSR, laddress(T2));
++}
++
++void TemplateTable::fstore() {
++  transition(ftos, vtos);
++  locals_index(T2);
++  __ store_float(FSF, faddress(T2));
++}
++
++void TemplateTable::dstore() {
++  transition(dtos, vtos);
++  locals_index(T2);
++  __ store_double(FSF, daddress(T2));
++}
++
++void TemplateTable::astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index(T2);
++  __ stptr(FSR, aaddress(T2));
++}
++
++void TemplateTable::wide_istore() {
++  transition(vtos, vtos);
++  __ pop_i();
++  locals_index_wide(T2);
++  __ stw(FSR, iaddress(T2));
++}
++
++void TemplateTable::wide_lstore() {
++  transition(vtos, vtos);
++  __ pop_l();
++  locals_index_wide(T2);
++  __ stptr(FSR, laddress(T2));
++}
++
++void TemplateTable::wide_fstore() {
++  transition(vtos, vtos);
++  __ pop_f(FSF);
++  locals_index_wide(T2);
++  __ fsts(FSF, faddress(T2));
++}
++
++void TemplateTable::wide_dstore() {
++  transition(vtos, vtos);
++  __ pop_d(FSF);
++  locals_index_wide(T2);
++  __ fstd(FSF, daddress(T2));
++}
++
++void TemplateTable::wide_astore() {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  locals_index_wide(T2);
++  __ stptr(FSR, aaddress(T2));
++}
++
++void TemplateTable::iastore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  // FSR: value
++  // SSR: index
++  // T2 : array
++  index_check(T2, SSR); // prefer index in SSR
++  __ access_store_at(T_INT, IN_HEAP | IS_ARRAY,
++                     Address(T2, SSR, Address::times_4,
++                             arrayOopDesc::base_offset_in_bytes(T_INT)),
++                     FSR, noreg, noreg);
++}
++
++void TemplateTable::lastore() {
++  transition(ltos, vtos);
++  __ pop_i(SSR);
++  // FSR: value
++  // SSR: index
++  // T2 : array  
++  index_check(T2, SSR);
++  __ access_store_at(T_LONG, IN_HEAP | IS_ARRAY,
++                     Address(T2, SSR, Address::times_8,
++                             arrayOopDesc::base_offset_in_bytes(T_LONG)),
++                     noreg /* ltos */, noreg, noreg);
++}
++
++
++void TemplateTable::fastore() {
++  transition(ftos, vtos);
++  __ pop_i(SSR);
++  // FSR: value
++  // SSR: index
++  // T2 : array
++  index_check(T2, SSR);
++  __ access_store_at(T_FLOAT, IN_HEAP | IS_ARRAY,
++                     Address(T2, SSR, Address::times_4,
++                             arrayOopDesc::base_offset_in_bytes(T_FLOAT)),
++                     noreg /* ftos */, noreg, noreg);
++}
++
++void TemplateTable::dastore() {
++  transition(dtos, vtos);
++  __ pop_i(SSR);
++  // FSR: value
++  // SSR: index
++  // T2 : array
++  index_check(T2, SSR);
++  __ access_store_at(T_DOUBLE, IN_HEAP | IS_ARRAY,
++                     Address(T2, SSR, Address::times_8,
++                             arrayOopDesc::base_offset_in_bytes(T_DOUBLE)),
++                     noreg /* dtos */, noreg, noreg);
++}
++
++void TemplateTable::aastore() {
++  Label is_null, ok_is_subtype, done;
++  transition(vtos, vtos);
++  // stack: ..., array, index, value
++  __ ldptr(FSR, at_tos());    // value
++  __ ldws(SSR, at_tos_p1()); // index
++  __ ldptr(T2, at_tos_p2()); // array
++
++  Address element_address(T2, SSR,
++                          UseCompressedOops? Address::times_4 : Address::times_ptr,
++                          arrayOopDesc::base_offset_in_bytes(T_OBJECT));
++
++  index_check_without_pop(T2, SSR);
++  __ testptr(FSR, FSR);
++  __ jcc(Assembler::zero, is_null);
++  
++  // Move subklass into T3
++  __ load_klass(T3, FSR);
++  // Move superklass into FSR
++  __ load_klass(FSR, T2);
++  __ ldptr(FSR, Address(FSR,
++                         ObjArrayKlass::element_klass_offset()));
++
++  // Generate subtype check.  Blows T0, T1
++  // Superklass in FSR.  Subklass in T3.
++  __ gen_subtype_check(T3, ok_is_subtype);
++  
++  // Come here on failure
++  // object is at TOS
++  __ jump(ExternalAddress(Interpreter::_throw_ArrayStoreException_entry));
++  
++  // Come here on success
++  __ bind(ok_is_subtype);
++  
++  // Get the value we will store
++  __ ldptr(FSR, at_tos());
++  __ ldws(SSR, at_tos_p1()); // index
++  // Now store using the appropriate barrier
++  do_oop_store(_masm, element_address, FSR, IS_ARRAY);
++  __ jmp(done);
++  
++  // Have a NULL in FSR, T2=array, SSR=index.  Store NULL at ary[idx]
++  __ bind(is_null);
++  __ profile_null_seen(T3);
++  
++  // Store a NULL
++  do_oop_store(_masm, element_address, noreg, IS_ARRAY);
++
++  // Pop stack arguments
++  __ bind(done);
++  __ addptr(esp, 3 * Interpreter::stackElementSize, esp);
++}
++
++void TemplateTable::bastore() {
++  transition(itos, vtos);
++  Register rbx = SSR;
++  Register rdx = T2;
++  Register rcx = T12;
++  Register rax = FSR;
++  __ pop_i(rbx);
++  // rax: value
++  // rbx: index
++  // rdx: array
++  index_check(rdx, rbx); // prefer index in rbx
++  // Need to check whether array is boolean or byte
++  // since both types share the bastore bytecode.
++  __ load_klass(rcx, rdx);
++  __ ldw(rcx, Address(rcx, Klass::layout_helper_offset()));
++  int diffbit = Klass::layout_helper_boolean_diffbit();
++  __ testw(rcx, diffbit);
++  Label L_skip;
++  __ jcc(Assembler::zero, L_skip);
++  __ andw(rax, 0x1, rax);  // if it is a T_BOOLEAN array, mask the stored value to 0/1
++  __ bind(L_skip);
++  __ access_store_at(T_BYTE, IN_HEAP | IS_ARRAY,
++                     Address(rdx, rbx, Address::times_1,
++                             arrayOopDesc::base_offset_in_bytes(T_BYTE)),
++                     rax, noreg, noreg);
++}
++
++void TemplateTable::castore() {
++  transition(itos, vtos);
++  __ pop_i(SSR);
++  // FSR: value
++  // SSR: index
++  // T2:  array
++  index_check(T2, SSR);
++  __ access_store_at(T_CHAR, IN_HEAP | IS_ARRAY,
++                     Address(T2, SSR, Address::times_2,
++                             arrayOopDesc::base_offset_in_bytes(T_CHAR)),
++                     FSR, noreg, noreg);
++}
++
++
++void TemplateTable::sastore() {
++  castore();
++}
++
++void TemplateTable::istore(int n) {
++  transition(itos, vtos);
++  __ stw(FSR, iaddress(n));
++}
++
++void TemplateTable::lstore(int n) {
++  transition(ltos, vtos);
++  __ stptr(FSR, laddress(n));
++}
++
++void TemplateTable::fstore(int n) {
++  transition(ftos, vtos);
++  __ store_float(FSF, faddress(n));
++}
++
++void TemplateTable::dstore(int n) {
++  transition(dtos, vtos);
++  __ store_double(FSF, laddress(n));
++}
++
++void TemplateTable::astore(int n) {
++  transition(vtos, vtos);
++  __ pop_ptr(FSR);
++  __ stptr(FSR, aaddress(n));
++}
++
++void TemplateTable::pop() {
++  transition(vtos, vtos);
++  __ addptr(esp, Interpreter::stackElementSize, esp);
++}
++
++void TemplateTable::pop2() {
++  transition(vtos, vtos);
++  __ addptr(esp, 2 * Interpreter::stackElementSize, esp);
++}
++
++void TemplateTable::dup() {
++  transition(vtos, vtos);
++  __ load_ptr(0, FSR);
++  __ push_ptr(FSR);
++  // stack: ..., a, a
++}
++
++void TemplateTable::dup_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr( 0, FSR);  // load b
++  __ load_ptr( 1, A5 );  // load a
++  __ store_ptr(1, FSR);  // store b
++  __ store_ptr(0, A5 );  // store a
++  __ push_ptr(FSR);      // push b
++  // stack: ..., b, a, b
++}
++
++void TemplateTable::dup_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr( 0, FSR);  // load c
++  __ load_ptr( 2, A5 );  // load a
++  __ store_ptr(2, FSR);  // store c in a
++  __ push_ptr(FSR);      // push c
++  // stack: ..., c, b, c, c
++  __ load_ptr( 2, FSR);  // load b
++  __ store_ptr(2, A5 );  // store a in b
++  // stack: ..., c, a, c, c
++  __ store_ptr(1, FSR);  // store b in c
++  // stack: ..., c, a, b, c
++}
++
++void TemplateTable::dup2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, FSR);  // load a
++  __ push_ptr(FSR);     // push a
++  __ load_ptr(1, FSR);  // load b
++  __ push_ptr(FSR);     // push b
++  // stack: ..., a, b, a, b
++}
++
++void TemplateTable::dup2_x1() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c
++  __ load_ptr( 0, T2);  // load c
++  __ load_ptr( 1, FSR); // load b
++  __ push_ptr(FSR);     // push b
++  __ push_ptr(T2);      // push c
++  // stack: ..., a, b, c, b, c
++  __ store_ptr(3, T2);  // store c in b
++  // stack: ..., a, c, c, b, c
++  __ load_ptr( 4, T2);  // load a
++  __ store_ptr(2, T2);  // store a in 2nd c
++  // stack: ..., a, c, a, b, c
++  __ store_ptr(4, FSR); // store b in a
++  // stack: ..., b, c, a, b, c
++}
++
++void TemplateTable::dup2_x2() {
++  transition(vtos, vtos);
++  // stack: ..., a, b, c, d
++  __ load_ptr(0, T2);   // load d
++  __ load_ptr(1, FSR);  // load c
++  __ push_ptr(FSR);     // push c
++  __ push_ptr(T2);      // push d
++  // stack: ..., a, b, c, d, c, d
++  __ load_ptr(4, FSR);  // load b
++  __ store_ptr(2, FSR); // store b in d
++  __ store_ptr(4, T2);  // store d in b
++  // stack: ..., a, d, c, b, c, d
++  __ load_ptr(5, T2);   // load a
++  __ load_ptr(3, FSR);  // load c
++  __ store_ptr(3, T2);  // store a in c
++  __ store_ptr(5, FSR); // store c in a
++  // stack: ..., c, d, a, b, c, d
++}
++
++void TemplateTable::swap() {
++  transition(vtos, vtos);
++  // stack: ..., a, b
++  __ load_ptr(1, A5);   // load a
++  __ load_ptr(0, FSR);  // load b
++  __ store_ptr(0, A5);  // store a in b
++  __ store_ptr(1, FSR); // store b in a
++  // stack: ..., b, a
++}
++
++void TemplateTable::iop2(Operation op) {
++  transition(itos, itos);
++
++  __ pop_i(SSR);
++  if (UseSW8A) {
++    switch (op) {
++      case add  : __ addwu(SSR, FSR, FSR);    break;
++      case sub  : __ subwu(SSR, FSR, FSR);    break;
++      case mul  : __ mulwu(SSR, FSR, FSR);    break;
++      case _and : __ andw(SSR, FSR, FSR);     break;
++      case _or  : __ orw(SSR, FSR, FSR);      break;
++      case _xor : __ xorw(SSR, FSR, FSR);     break;
++      case shl  : __ sllw(SSR, FSR, FSR);     break;
++      case shr  : __ sraw(SSR, FSR, FSR);     break;
++      case ushr : __ srlw(SSR, FSR, FSR);     break;
++      default   : ShouldNotReachHere();
++    }
++    __ movws(FSR, FSR);
++  } else {
++    switch (op) {
++      case add  : __ addwu(SSR, FSR, FSR); break;
++      case sub  : __ subwu(SSR, FSR, FSR); break;
++      case mul  : __ mulwu(SSR, FSR, FSR);    break;
++      case _and : __ andw(SSR, FSR, FSR);   break;
++      case _or  : __ orw(SSR, FSR, FSR);    break;
++      case _xor : __ xorw(SSR, FSR, FSR);   break;
++      case shl  : __ and_ins(FSR, 0x1f, FSR); __ slll(SSR, FSR, FSR);   break;
++      case shr  : __ and_ins(FSR, 0x1f, FSR); __ addw(SSR, 0, SSR); __ sral(SSR, FSR, FSR);  break;
++      case ushr : __ and_ins(FSR, 0x1f, FSR); __ movwu(SSR, SSR); __ srll(SSR, FSR, FSR);   break;
++      default   : ShouldNotReachHere();
++    }
++    __ movws(FSR, FSR);
++  }
++}
++
++void TemplateTable::lop2(Operation op) {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++
++  switch (op) {
++    case add : __ addptr(T2, FSR, FSR); break;
++    case sub : __ subptr(T2, FSR, FSR); break;
++    case _and: __ andptr(T2, FSR, FSR);  break;
++    case _or : __ orptr(T2, FSR, FSR);   break;
++    case _xor: __ xorptr(T2, FSR, FSR);  break;
++    default : ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::idiv() {
++  transition(itos, itos);
++  Label not_zero;
++
++  __ bne_l(FSR, not_zero);
++  __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
++  __ bind(not_zero);
++
++  __ pop_i(SSR);
++  if (UseSW8A) {
++    __ corrected_idivw(SSR, FSR, FSR);
++  } else if (FastIntDiv) { 
++    __ stop("check idiv_sw");
++    __ idiv_sw(SSR, FSR, FSR);//TODO:need check jzy
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::sdiv), FSR, SSR);
++    //__ movws(FSR, FSR);//clear high 32bits
++  }
++}
++
++void TemplateTable::irem() {
++  transition(itos, itos);
++  Label not_zero;
++  __ pop_i(SSR);
++
++  __ bne_l(FSR, not_zero);
++  __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
++
++  __ bind(not_zero);
++  if (UseSW8A) {
++    __ remw(SSR, FSR, FSR);
++  } else if (FastIntRem) {
++    __ stop("check irem_sw");
++    __ irem_sw(SSR, FSR, FSR);//TODO:need check jzy
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::srem), FSR, SSR);
++    //__ movws(FSR, FSR);//clear high 32bits
++  }
++}
++
++void TemplateTable::lmul() {
++  transition(ltos, ltos);
++  __ pop_l(T2);
++  __ mull(FSR, T2, FSR);
++}
++
++void TemplateTable::ldiv() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne_l(FSR, normal);
++
++  __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
++
++  __ bind(normal);
++  __ pop_l(A2);
++  if (UseSW8A) {
++    __ corrected_idivl(A2, FSR, FSR);
++  } else if (FastLongDiv) {
++    Label ldiv, exit;
++    __ slll(A2, 0xb, T7);
++    __ sral(T7, 0xb, T7);
++    __ cmpeq(A2, T7, T7);
++    __ bne_l(T7, ldiv);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), FSR, A2);
++    __ jmp(exit);
++    
++    __ bind(ldiv);
++    __ ldiv_sw(A2, FSR, FSR);
++    
++    __ bind(exit);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::ldiv), FSR, A2);
++  }
++}
++
++void TemplateTable::lrem() {
++  transition(ltos, ltos);
++  Label normal;
++
++  __ bne_l(FSR, normal);
++
++  __ jump(ExternalAddress(Interpreter::_throw_ArithmeticException_entry));
++
++  __ bind(normal);
++  __ pop_l (A2);
++  if (UseSW8A) {
++    __ reml(A2, FSR, FSR);
++  } else if (FastLongRem) {
++    Label lrem, exit;
++    __ slll(A2, 0xb, T7);
++    __ sral(T7, 0xb, T7);
++    __ cmpeq(A2, T7, T7);
++    __ bne_l(T7, lrem);
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), FSR, A2);
++    __ jmp(exit);
++    
++    __ bind(lrem);
++    __ lrem_sw(A2, FSR, FSR);
++    
++    __ bind(exit);
++  } else {
++    __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::lrem), FSR, A2);
++  }
++}
++
++void TemplateTable::lshl() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ slll(T0, FSR, FSR);
++}
++
++void TemplateTable::lshr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ sral(T0, FSR, FSR);
++}
++
++void TemplateTable::lushr() {
++  transition(itos, ltos);
++  __ pop_l(T0);
++  __ srll(T0, FSR, FSR);
++}
++
++void TemplateTable::fop2(Operation op) {
++  transition(ftos, ftos);
++  switch (op) {
++    case add:
++      __ flds(FTF, 0, esp);
++      __ add_s(FSF, FTF, FSF);   
++      break;
++    case sub:
++      __ flds(FTF, 0, esp);
++      __ sub_s(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ flds(FTF, 0, esp);
++      __ mul_s(FSF, FTF, FSF);
++      break;
++    case div:
++      __ flds(FTF, 0, esp);
++      __ div_s(FSF, FTF, FSF);
++      break;
++    case rem:
++    {
++      __ flds(f16, 0, esp);  //x
++      __ fcpys(FSF, FSF, f17);
++      Label nan, cont, end;
++      
++      // y = 0.0f
++      __ ffbeq(f17, nan);
++      // x = NaN infinity
++      __ boundary_test(f16, GP);
++      __ beq_l(GP, nan);
++      // y = NaN 
++      __ boundary_test(f17, GP);
++      __ bne_l(GP, cont);
++      __ fimovd(f17, AT);
++      __ slll(AT, 12, GP);
++      __ bne_l(GP, nan);
++      
++      __ bind(cont);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::frem), 2);
++      __ jmp(end);
++      
++      __ bind(nan);
++      __ fdivd(f31, f31, FSF);
++      __ bind(end);
++    }
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addptr(esp, 1 * wordSize, esp);
++}
++
++void TemplateTable::dop2(Operation op) {
++  transition(dtos, dtos);
++  switch (op) {
++    case add:
++      __ fldd(FTF, 0, esp);
++      __ add_d(FSF, FTF, FSF);
++      break;
++    case sub:
++      __ fldd(FTF, 0, esp);
++      __ sub_d(FSF, FTF, FSF);
++      break;
++    case mul:
++      __ fldd(FTF, 0, esp);
++      __ mul_d(FSF, FTF, FSF);
++      break;
++    case div:
++      __ fldd(FTF, 0, esp);
++      __ div_d(FSF, FTF, FSF);
++      break;
++    case rem:
++    {
++      __ fldd(f16, 0, esp);  //x
++      __ fcpys(FSF, FSF, f17);
++      Label nan, cont, end;
++      // y = 0.0f
++      __ ffbeq(f17, nan);
++      // x = NaN infinity
++      __ boundary_test(f16, GP);
++      __ beq_l(GP, nan);
++      // y = NaN 
++      __ boundary_test(f17, GP);
++      __ bne_l(GP, cont);
++      __ fimovd(f17, AT);
++      __ slll(AT, 12, GP);
++      __ bne_l(GP, nan);
++      
++      __ bind(cont);
++      __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::drem), 2);
++      __ jmp(end);
++      
++      __ bind(nan);
++      __ fdivd(f31, f31, FSF);
++      __ bind(end);
++    }
++      break;
++    default : ShouldNotReachHere();
++  }
++
++  __ addptr(esp, 2 * wordSize, esp);
++}
++
++void TemplateTable::ineg() {
++  transition(itos, itos);
++  __ subw(R0, FSR, FSR);
++}
++
++void TemplateTable::lneg() {
++  transition(ltos, ltos);
++  __ subl(R0, FSR, FSR);
++}
++
++void TemplateTable::fneg() {
++  transition(ftos, ftos);
++  __ fcpysn(FSF, FSF, FSF);
++}
++
++void TemplateTable::dneg() {
++  transition(dtos, dtos);
++  __ fcpysn(FSF, FSF, FSF);
++}
++
++void TemplateTable::iinc() {
++  transition(vtos, vtos);
++  __ load_signed_byte64(AT, at_bcp(2)); // get constant
++  locals_index(T2);
++  __ ldws(FSR, iaddress(T2));
++  __ addl(FSR, AT, FSR);
++  __ stw(FSR, iaddress(T2));
++}
++
++void TemplateTable::wide_iinc() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ get_unsigned_2_byte_index_at_bcp(FSR, 4);
++  __ sexth(FSR, FSR);
++  __ ldws(AT, iaddress(T2));
++  __ addl(AT, FSR, FSR);
++  __ stw(FSR, iaddress(T2));
++}
++
++void TemplateTable::convert() {
++  // Checking
++#ifdef ASSERT
++  {
++    TosState tos_in  = ilgl;
++    TosState tos_out = ilgl;
++    switch (bytecode()) {
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_in = itos; break;
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_l2d: tos_in = ltos; break;
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_f2d: tos_in = ftos; break;
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_d2l: // fall through
++      case Bytecodes::_d2f: tos_in = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    switch (bytecode()) {
++      case Bytecodes::_l2i: // fall through
++      case Bytecodes::_f2i: // fall through
++      case Bytecodes::_d2i: // fall through
++      case Bytecodes::_i2b: // fall through
++      case Bytecodes::_i2c: // fall through
++      case Bytecodes::_i2s: tos_out = itos; break;
++      case Bytecodes::_i2l: // fall through
++      case Bytecodes::_f2l: // fall through
++      case Bytecodes::_d2l: tos_out = ltos; break;
++      case Bytecodes::_i2f: // fall through
++      case Bytecodes::_l2f: // fall through
++      case Bytecodes::_d2f: tos_out = ftos; break;
++      case Bytecodes::_i2d: // fall through
++      case Bytecodes::_l2d: // fall through
++      case Bytecodes::_f2d: tos_out = dtos; break;
++      default             : ShouldNotReachHere();
++    }
++    transition(tos_in, tos_out);
++  }
++#endif // ASSERT
++
++  // Conversion
++  switch (bytecode()) {
++    case Bytecodes::_i2l:
++      __ movws(FSR, FSR);
++      break;
++    case Bytecodes::_i2f:
++      //__ movws(FSR, FSR);
++        if (UseSW8A) {
++            __ cmovws(FSF, FSR);
++        }else {
++            __ ifmovd(FSR, f30);
++            __ fcvtls(f30, FSF);
++        }
++      break;
++    case Bytecodes::_i2d:
++      //__ movws(FSR, FSR);
++        if (UseSW8A) {
++            __ cmovwd(FSF, FSR);
++        }else {
++            __ ifmovd(FSR, f30);
++            __ fcvtld(f30, FSF);
++        }
++      break;
++    case Bytecodes::_i2b:
++      __ sextb(FSR, FSR);
++      //__ movw(FSR, FSR);
++      break;
++    case Bytecodes::_i2c:
++      __ zapnot(FSR, 0x3, FSR);
++      break;
++    case Bytecodes::_i2s:
++      __ sexth(FSR, FSR);
++      //__ movws(FSR, FSR);
++      break;
++    case Bytecodes::_l2i:
++      __ movws(FSR, FSR);
++      break;
++    case Bytecodes::_l2f:
++        if (UseSW8A) {
++            __ cmovls(FSF, FSR);
++        }else {
++            __ ifmovd(FSR, FSF);
++            __ cvt_s_l(FSF, FSF);
++        }
++      break;
++    case Bytecodes::_l2d:
++        if (UseSW8A) {
++            __ cmovld(FSF, FSR);
++        }else {
++            __ ifmovd(FSR, FSF);
++            __ cvt_d_l(FSF, FSF);
++        }
++      break;
++    case Bytecodes::_f2i:
++    {
++        if (UseSW8A) {
++            __ cmovdw_z(FSR, FSF);
++        }else {
++            Label L;
++            __ fcpys(FSF, FSF, f16);
++            __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2i), 1);
++            //__ movws(FSR, FSR);
++            __ bind(L);
++        }
++    }
++      break;
++    case Bytecodes::_f2l:
++    {
++        if (UseSW8A) {
++            __ cmovdl_z(FSR, FSF);
++        }else {
++            Label L;
++            __ fcpys(FSF, FSF, f16);
++            __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::f2l), 1);
++            __ bind(L);
++        }
++    }
++      break;
++    case Bytecodes::_f2d:
++      __ cvt_d_s(FSF, FSF);
++      break;
++    case Bytecodes::_d2i:
++    {
++        if (UseSW8A) {
++            __ cmovdw_z(FSR, FSF);
++        }else {
++            Label L;
++            __ fcpys(FSF, FSF, f16);
++            __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2i), 1);
++            //__ movws(FSR, FSR);
++            __ bind(L);
++        }
++    }
++      break;
++    case Bytecodes::_d2l:
++    {
++        if (UseSW8A) {
++            __ cmovdl_z(FSR, FSF);
++        }else {
++            Label L;
++            __ fcpys(FSF, FSF, f16);
++            __ call_VM_leaf(CAST_FROM_FN_PTR(address, SharedRuntime::d2l), 1);
++            __ bind(L);
++        }
++    }
++      break;
++    case Bytecodes::_d2f:
++      __ cvt_s_d(FSF, FSF);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++}
++
++void TemplateTable::lcmp() {
++  transition(ltos, itos);
++
++  Label done;
++  __ pop_l(SSR);
++  __ cmpl(SSR, FSR);
++  __ ldi(FSR, -1, R0);
++  __ jcc(Assembler::less, done);
++  __ ldi(FSR, 0, R0);
++  __ jcc(Assembler::equal, done);
++  __ ldi(FSR, 1, R0);
++  __ bind(done);
++}
++
++void TemplateTable::float_cmp(bool is_float, int unordered_result) {
++  Label less, done;
++
++  __ bis(R0, R0, FSR);
++  if (is_float) {
++    __ flds(FTF, 0, esp);
++    __ fcmpeq(FTF, FSF, FcmpRES);
++    __ addiu(esp, 1 * wordSize, esp);
++    __ ffbne(FcmpRES, done);
++
++    if (unordered_result < 0)
++      __ c_ult_s(FTF, FSF);
++    else
++      __ c_olt_s(FTF, FSF);
++  } else {
++    __ fldd(FTF, 0, esp);
++    __ fcmpeq(FTF, FSF, FcmpRES);
++    __ addiu(esp, 2 * wordSize, esp);
++    __ ffbne(FcmpRES, done);
++
++    if (unordered_result<0)
++      __ c_ult_d(FTF, FSF);
++    else
++      __ c_olt_d(FTF, FSF);
++  }
++  __ ffbne(FcmpRES, less);
++  __ ldi(FSR, 1, R0);
++  __ jmp(done);
++  __ bind(less);
++  __ ldi(FSR, -1, R0);
++  __ bind(done);
++}
++
++void TemplateTable::branch(bool is_jsr, bool is_wide) {SCOPEMARK_NAME(TemplateTable_branch, _masm)
++  Register rcx = rmethod;
++  Register rax = T5;
++  Register rbx = T2;
++  Register rdx = T7;
++  __ get_method(rcx);
++  __ profile_taken_branch(rax, rbx); // T5 holds updated MDP, T2
++                                   // holds bumped taken count
++  
++  const ByteSize be_offset = MethodCounters::backedge_counter_offset() +
++                             InvocationCounter::counter_offset();
++  const ByteSize inv_offset = MethodCounters::invocation_counter_offset() +
++                              InvocationCounter::counter_offset();
++
++  // Load up T7 with the branch displacement TODO:check jzy
++  if (is_wide) {
++    __ ldbu(T7, at_bcp(1));
++    __ ldbu(AT, at_bcp(2));
++    __ slll(T7, 8, T7);
++    __ bis(T7, AT, T7);
++    __ ldbu(AT, at_bcp(3));
++    __ slll(T7, 8, T7);
++    __ bis(T7, AT, T7);
++    __ ldbu(AT, at_bcp(4));
++    __ slll(T7, 8, T7);
++    __ bis(T7, AT, T7);
++    __ movws(T7, T7);
++  } else {
++    __ load_signed_byte64(T7, at_bcp(1));
++    __ ldbu(AT, at_bcp(2));
++    __ slll(T7, 8, T7);
++    __ bis(T7, AT, T7);
++  }
++  
++  // Handle all the JSR stuff here, then exit.
++  // It's much shorter and cleaner than intermingling with the non-JSR
++  // normal-branch stuff occurring below.
++  if (is_jsr) {
++    // Pre-load the next target bytecode into rnext
++    __ load_unsigned_byte(rnext, Address(rbcp, T7, Address::times_1, 0));
++
++    // compute return address as bci in FSR
++    __ lea(FSR, at_bcp((is_wide ? 5 : 3) -
++                        in_bytes(ConstMethod::codes_offset())));
++    __ ldptr(AT, Address(rmethod, Method::const_offset()));
++    __ subptr(FSR, AT, FSR);
++    // Adjust the bcp in rbcp by the displacement in T7
++    __ addptr(rbcp, T7, rbcp);
++    // jsr returns atos that is not an oop
++    __ push_i(FSR);
++    __ dispatch_only(vtos, true);
++    return;
++  }
++
++  // Normal (non-jsr) branch handling
++
++  // Adjust the bcp in S0 by the displacement in T7
++  __ addptr(rbcp, T7, rbcp);
++
++  assert(UseLoopCounter || !UseOnStackReplacement, 
++         "on-stack-replacement requires loop counters");
++  Label backedge_counter_overflow;
++  Label profile_method;
++  Label dispatch;
++  if (UseLoopCounter) {
++    // increment backedge counter for backward branches
++    // T5: MDO
++    // T2: MDO bumped taken-count
++    // rmethod: method
++    // T7: target offset
++    // rbcp: target bcp
++    // rlocals: locals pointer
++                                               // check if forward or backward branch
++    __ jcc(Assembler::positive, dispatch, T7); // count only if backward branch
++
++    // check if MethodCounters exists
++    Label has_counters;
++    __ ldptr(rcc, Address(rmethod, Method::method_counters_offset()));
++    __ jcc(Assembler::notZero, has_counters);
++    __ push(T7);
++    __ push(T2);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::build_method_counters),
++               rmethod);
++    __ pop(T2);
++    __ pop(T7);
++    __ ldptr(T5, Address(rmethod, Method::method_counters_offset()));
++    __ jcc(Assembler::zero, dispatch, T5);
++    __ bind(has_counters);
++
++    if (TieredCompilation) {
++      Label no_mdo;
++      int increment = InvocationCounter::count_increment;
++      if (ProfileInterpreter) {
++        // Are we profiling?
++        __ ldptr(T2, Address(rmethod, Method::method_data_offset())); //T2 for p1876 used
++        __ jcc(Assembler::zero, no_mdo, T2);
++        // Increment the MDO backedge counter
++        const Address mdo_backedge_counter(T2, in_bytes(MethodData::backedge_counter_offset()) +
++                                           in_bytes(InvocationCounter::counter_offset()));
++        const Address mask(T2, in_bytes(MethodData::backedge_mask_offset()));
++        __ increment_mask_and_jump(mdo_backedge_counter, increment, mask, T5, false, Assembler::zero,
++                                   UseOnStackReplacement ? &backedge_counter_overflow : NULL);
++        __ jmp(dispatch);
++      }
++      __ bind(no_mdo);
++      // Increment backedge counter in MethodCounters*
++      __ ldptr(T0, Address(rmethod, Method::method_counters_offset()));
++      const Address mask(T0, in_bytes(MethodCounters::backedge_mask_offset()));
++      __ increment_mask_and_jump(Address(T0, be_offset), increment, mask,
++                                 T5, false, Assembler::zero,
++                                 UseOnStackReplacement ? &backedge_counter_overflow : NULL);
++    } else { // not TieredCompilation
++      // increment counter
++      Register rmcs = T4;
++      Register rbe  = T3;
++      Register rinv = T1; //backedge counter & invocation counter
++      __ ldptr(rmcs, Address(rmethod, Method::method_counters_offset()));
++      __ ldws(rbe, Address(rmcs, be_offset));        // load backedge counter
++      __ addl(rbe, InvocationCounter::count_increment, rbe); // increment counter
++      __ stw(rbe, Address(rmcs, be_offset));        // store counter
++
++      __ ldws(rinv, Address(rmcs, inv_offset));    // load invocation counter
++
++      __ andw(rinv, InvocationCounter::count_mask_value, rinv); // and the status bits
++      __ addwu(rinv, rbe, rinv);        // add both counters TODO:check jzy
++
++      if (ProfileInterpreter) {
++        // Test to see if we should create a method data oop
++        __ cmpw(rinv, Address(rmcs, in_bytes(MethodCounters::interpreter_profile_limit_offset())));
++        __ jcc(Assembler::less, dispatch);
++
++        // if no method data exists, go to profile method
++        __ test_method_data_pointer(rinv, profile_method);
++
++        if (UseOnStackReplacement) {
++          // check for overflow against T2 which is the MDO taken count
++          __ cmpw(T2, Address(rmcs, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ jcc(Assembler::below, dispatch);
++
++          // When ProfileInterpreter is on, the backedge_count comes
++          // from the MethodData*, which value does not get reset on
++          // the call to frequency_counter_overflow().  To avoid
++          // excessive calls to the overflow routine while the method is
++          // being compiled, add a second test to make sure the overflow
++          // function is called only once every overflow_frequency.
++          const int overflow_frequency = 1024;
++          __ andw(T2, overflow_frequency - 1, rcc);// TODO check lsp zero extend is ok??
++//          __ ldi(rscratch3, overflow_frequency-1, R0);
++//          __ and_ins(rscratch3, T2, rcc);
++          __ jcc(Assembler::zero, backedge_counter_overflow);
++          
++        }
++      } else {
++        if (UseOnStackReplacement) {
++          // check for overflow against rax, which is the sum of the
++          // counters
++          __ cmpw(rinv, Address(rmcs, in_bytes(MethodCounters::interpreter_backward_branch_limit_offset())));
++          __ jcc(Assembler::aboveEqual, backedge_counter_overflow);
++          
++        }
++      }
++    }
++    __ bind(dispatch);
++  }
++
++  // Pre-load the next target bytecode into rnext
++  __ load_unsigned_byte(rnext, Address(rbcp, 0));
++
++  // continue with the bytecode @ target
++  // FSR: return bci for jsr's, unused otherwise
++  // rnext: target bytecode
++  // rbcp: target bcp
++  __ dispatch_only(vtos, true);
++
++  if (UseLoopCounter) {
++    if (ProfileInterpreter && !TieredCompilation) {
++      // Out-of-line code to allocate method data oop.
++      __ bind(profile_method);
++      __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::profile_method));
++      __ load_unsigned_byte(rnext, Address(rbcp, 0)); //swjdk8 and aarch64  use it lsp??
++      __ set_method_data_pointer_for_bcp();
++      __ jmp(dispatch);
++    }
++
++    if (UseOnStackReplacement) {
++      // invocation counter overflow
++      __ bind(backedge_counter_overflow);
++      __ subptr(R0, T7, T7); // yj todo: ?? why neg T7
++      __ addptr(T7, rbcp, T7); // branch bcp
++      // IcoResult frequency_counter_overflow([JavaThread*], address branch_bcp)
++      __ call_VM(noreg,
++                 CAST_FROM_FN_PTR(address,
++                                  InterpreterRuntime::frequency_counter_overflow),
++                 T7);
++      __ load_unsigned_byte(rnext, Address(rbcp, 0)); //swjdk8 and aarch64  use it lsp??
++      // V0: osr nmethod (osr ok) or NULL (osr not possible) return by the call_vm
++      __ testptr(V0, V0);                        // test result
++      __ jcc(Assembler::zero, dispatch);         // no osr if null
++      // nmethod may have been invalidated (VM may block upon call_VM return)
++      __ cmpb(Address(V0, nmethod::state_offset()), nmethod::in_use);
++      __ jcc(Assembler::notEqual, dispatch);
++      
++      // We have the address of an on stack replacement routine in V0.
++      // In preparation of invoking it, first we must migrate the locals
++      // and monitors from off the interpreter frame on the stack.
++      // Ensure to save the osr nmethod over the migration call,
++      // it will be preserved in rbcp.
++      __ movl(rbcp, V0);
++      
++      call_VM(noreg, CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_begin));
++
++      // V0 is OSR buffer, move it to expected parameter location
++      __ movl(j_rarg0, V0);
++      // We use j_rarg definitions here so that registers don't conflict as parameter
++      // registers change across platforms as we are in the midst of a calling
++      // sequence to the OSR nmethod and we don't want collision. These are NOT parameters.
++
++      const Register retaddr   = j_rarg2;
++      const Register sender_sp = j_rarg1;
++
++      // pop the interpreter frame
++      __ ldptr(sender_sp, Address(rfp, frame::interpreter_frame_sender_sp_offset * wordSize)); // get sender
++      __ leave();                                // remove frame anchor
++      __ move(retaddr, RA);                           // get return address
++      // set sp to sender sp
++      // Ensure compiled code always sees stack at proper alignment
++      //__ andptr(sender_sp, -(StackAlignmentInBytes), esp); //TODO: jzy check why need alignment?
++      __ movl(esp, sender_sp);
++      
++      // unlike x86 we need no specialized return from compiled code
++      // to the interpreter or the call stub.
++
++      // push the return address
++//      __ push(retaddr);
++
++      // and begin the OSR nmethod
++      __ jmp(Address(rbcp, nmethod::osr_entry_point_offset()));
++    }
++  }
++}
++
++void TemplateTable::if_0cmp(Condition cc) {SCOPEMARK_NAME(if_0cmp, _masm)
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ cmpw(FSR, R0);
++  __ jcc(j_not(cc), not_taken);
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_icmp(Condition cc) {
++  transition(itos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ pop_i(SSR);
++  __ cmpw(SSR, FSR);
++  __ jcc(j_not(cc), not_taken);
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_nullcmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ testptr(FSR, FSR);
++  __ jcc(j_not(cc), not_taken);
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::if_acmp(Condition cc) {
++  transition(atos, vtos);
++  // assume branch is more often taken than not (loops use backward branches)
++  Label not_taken;
++  __ pop_ptr(SSR);
++  __ cmpoop(SSR, FSR);
++  __ jcc(j_not(cc), not_taken);
++  branch(false, false);
++  __ bind(not_taken);
++  __ profile_not_taken_branch(FSR);
++}
++
++void TemplateTable::ret() {SCOPEMARK_NAME(TemplateTable::ret, _masm)
++  transition(vtos, vtos);
++  locals_index(T2);
++  __ ldptr(T2, iaddress(T2)); // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++  __ get_method(T1);
++  __ ldptr(rbcp, Address(T1, Method::const_offset()));
++  __ lea(rbcp, Address(rbcp, T2, Address::times_1,
++                      ConstMethod::codes_offset()));
++  __ dispatch_next(vtos, 0, true);
++}
++
++void TemplateTable::wide_ret() {
++  transition(vtos, vtos);
++  locals_index_wide(T2);
++  __ ldptr(T2, aaddress(T2)); // get return bci, compute return bcp
++  __ profile_ret(T2, T3);
++  __ get_method(T1);
++  __ ldptr(rbcp, Address(T1, Method::const_offset()));
++  __ lea(rbcp, Address(rbcp, T2, Address::times_1, ConstMethod::codes_offset()));
++  __ dispatch_next(vtos, 0, true);
++}
++
++void TemplateTable::tableswitch() {
++  Label default_case, continue_execution;
++  transition(itos, vtos);
++  Register rbx = T2;
++  Register rcx = T3;
++  Register rdx = T7;
++  Register rax = FSR;
++  
++  // align rbcp
++  __ lea(rbx, at_bcp(BytesPerInt));
++  __ andptr(rbx, -BytesPerInt, rbx);
++  // load lo & hi
++  __ ldwu(rcx, Address(rbx, BytesPerInt));
++  __ ldwu(rdx, Address(rbx, 2 * BytesPerInt));
++  __ bswapw(rcx);
++  __ bswapw(rdx);
++  // check against lo & hi
++  __ cmpw(rax, rcx);
++  __ jcc(Assembler::less, default_case);
++  __ cmpw(rax, rdx);
++  __ jcc(Assembler::greater, default_case);
++  // lookup dispatch offset
++  __ subwu(rax, rcx, rax);
++  __ ldwu(rdx, Address(rbx, rax, Address::times_4, 3 * BytesPerInt));
++  __ profile_switch_case(rax, rbx, rcx);
++  // continue execution
++  __ bind(continue_execution);
++  __ bswapw(rdx);
++  __ addw(rdx, R0, rdx);// sign extend T7
++  __ load_unsigned_byte(rnext, Address(rbcp, rdx, Address::times_1));
++  __ addptr(rbcp, rdx, rbcp);
++  __ dispatch_only(vtos, true);
++  // handle default
++  __ bind(default_case);
++  __ profile_switch_default(rax);
++  __ ldw(rdx, Address(rbx, 0));
++  __ jmp(continue_execution);
++}
++
++void TemplateTable::lookupswitch() {
++  transition(itos, itos);
++  __ stop("lookupswitch bytecode should have been rewritten");
++}
++
++void TemplateTable::fast_linearswitch() {
++  transition(itos, vtos);
++  Label loop_entry, loop, found, continue_execution;
++  const Register rbx = T2;
++  const Register rcx = T3;
++  const Register rdx = T7;
++  // swap FSR so we can avoid swapping the table entries
++  __ bswapw(FSR);
++  // align rbcp
++  __ lea(rbx, at_bcp(BytesPerInt)); // btw: should be able to get rid of
++                                    // this instruction (change offsets
++                                    // below)
++  __ andptr(rbx, -BytesPerInt, rbx);
++  // set counter
++  __ ldwu(rcx, Address(rbx, BytesPerInt));
++  __ bswapw(rcx);
++  __ jmp(loop_entry);
++  // table search
++  __ bind(loop);
++  __ cmpw(FSR, Address(rbx, rcx, Address::times_8, 2 * BytesPerInt));
++  __ jcc(Assembler::equal, found);
++  __ bind(loop_entry);
++  __ decrementl(rcx);
++  __ jcc(Assembler::greaterEqual, loop, rcx);
++  // default case
++  __ profile_switch_default(FSR);
++  __ ldw(rdx, Address(rbx, 0));
++  __ jmp(continue_execution);
++  // entry found -> get offset
++  __ bind(found);
++  __ ldwu(rdx, Address(rbx, rcx, Address::times_8, 3 * BytesPerInt));
++  __ profile_switch_case(rcx, FSR, rbx);
++  // continue execution
++  __ bind(continue_execution);
++  __ bswapw(rdx);
++  __ addw(rdx, R0, rdx);// sign extend rdx
++  __ load_unsigned_byte(rnext, Address(rbcp, rdx, Address::times_1));
++  __ addptr(rbcp, rdx, rbcp);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::fast_binaryswitch() {
++  transition(itos, vtos);
++  // Implementation using the following core algorithm:
++  //
++  // int binary_search(int key, LookupswitchPair* array, int n) {
++  //   // Binary search according to "Methodik des Programmierens" by
++  //   // Edsger W. Dijkstra and W.H.J. Feijen, Addison Wesley Germany 1985.
++  //   int i = 0;
++  //   int j = n;
++  //   while (i+1 < j) {
++  //     // invariant P: 0 <= i < j <= n and (a[i] <= key < a[j] or Q)
++  //     // with      Q: for all i: 0 <= i < n: key < a[i]
++  //     // where a stands for the array and assuming that the (inexisting)
++  //     // element a[n] is infinitely big.
++  //     int h = (i + j) >> 1;
++  //     // i < h < j
++  //     if (key < array[h].fast_match()) {
++  //       j = h;
++  //     } else {
++  //       i = h;
++  //     }
++  //   }
++  //   // R: a[i] <= key < a[i+1] or Q
++  //   // (i.e., if key is within array, i is the correct index)
++  //   return i;
++  // }
++
++  // Register allocation
++  const Register key   = FSR; // already set (tosca)
++  const Register array = T2;
++  const Register i     = T3;
++  const Register j     = T7;
++  const Register h     = T1;
++  const Register temp  = T0;
++ 
++  //__ subw(FSR, R0, key);//sign extend
++  // Find array start
++  __ lea(array, at_bcp(3 * BytesPerInt)); // btw: should be able to
++                                          // get rid of this
++                                          // instruction (change
++                                          // offsets below)
++  __ andptr(array, -BytesPerInt, array);
++
++  // initialize i & j
++  __ movw(i, R0);                       // i = 0;
++  __ ldwu(j, Address(array, -BytesPerInt)); // j = length(array);
++  
++  // Convert j into native byteordering
++  __ bswapw(j);
++
++  // And start
++  Label entry;
++  __ jmp(entry);
++  BLOCK_COMMENT("binary search loop");
++  // binary search loop
++  {
++    Label loop;
++    __ bind(loop);
++    // int h = (i + j) >> 1;
++    __ addw(i, j, h); // h = i + j;
++    __ srll(h, 1, h); // h = (i + j) >> 1;
++    // if (key < array[h].fast_match()) {
++    //   j = h;
++    // } else {
++    //   i = h;
++    // }
++    // Convert array[h].match to native byte-ordering before compare
++    __ ldwu(temp, Address(array, h, Address::times_8));
++    __ bswapw(temp);
++    __ subw(temp, R0, temp);
++    __ cmpl(key, temp);
++     // j = h if (key <  array[h].fast_match())
++    __ cmove(Assembler::less, j, h, j);
++    // i = h if (key >= array[h].fast_match())
++    __ cmove(Assembler::greaterEqual, i, h, i);
++    // while (i+1 < j)
++    __ bind(entry);
++    __ addwu(i, 1, h); // i+1
++    __ cmpw(h, j);             // i+1 < j
++    __ jcc(Assembler::less, loop);
++  }
++
++  // end of binary search, result index is i (must check again!)
++  Label default_case;
++  // Convert array[i].match to native byte-ordering before compare
++  __ ldwu(temp, Address(array, i, Address::times_8));
++  __ bswapw(temp);
++  __ subw(temp, R0, temp);
++  __ cmpl(key, temp);
++  __ jcc(Assembler::notEqual, default_case);
++
++  // entry found -> j = offset
++  __ ldwu(j , Address(array, i, Address::times_8, BytesPerInt));
++  __ profile_switch_case(i, key, array);
++  __ bswapw(j);
++  __ addw(j, R0, j);// sign extend j
++
++  __ load_unsigned_byte(rnext, Address(rbcp, j, Address::times_1));
++  __ addptr(rbcp, j, rbcp);
++  __ dispatch_only(vtos, true);
++
++  // default case -> j = default offset
++  __ bind(default_case);
++  __ profile_switch_default(i);
++  __ ldwu(j, Address(array, -2 * BytesPerInt));
++  __ bswapw(j);
++  __ addw(j, R0, j);
++  
++  __ movws(key, key);//clear hi-32bit
++  
++  __ load_unsigned_byte(rnext, Address(rbcp, j, Address::times_1));
++  __ addptr(rbcp, j, rbcp);
++  __ dispatch_only(vtos, true);
++}
++
++void TemplateTable::_return(TosState state) {SCOPEMARK_NAME(TemplateTable::_return, _masm)
++  transition(state, state);
++
++  assert(_desc->calls_vm(),
++      "inconsistent calls_vm information"); // call in remove_activation
++
++  if (_desc->bytecode() == Bytecodes::_return_register_finalizer) {
++    assert(state == vtos, "only valid state");
++    Register robj = c_rarg1;
++    __ ldptr(robj, aaddress(0));
++    __ load_klass(T1, robj);
++    __ ldw(T1, Address(T1, Klass::access_flags_offset()));
++    __ testw(T1, JVM_ACC_HAS_FINALIZER);
++    Label skip_register_finalizer;
++    __ jcc(Assembler::zero, skip_register_finalizer);
++    
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::register_finalizer), robj);
++    
++    __ bind(skip_register_finalizer);
++  }
++
++  if (SafepointMechanism::uses_thread_local_poll() && _desc->bytecode() != Bytecodes::_return_register_finalizer) {
++    Label no_safepoint;
++    NOT_PRODUCT(__ block_comment("Thread-local Safepoint poll"));
++    __ ldbu(AT, Address(rthread, Thread::polling_page_offset()));
++    __ and_ins(AT, SafepointMechanism::poll_bit(), rcc);
++    __ jcc(Assembler::zero, no_safepoint);
++    __ push(state);
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                                    InterpreterRuntime::at_safepoint));
++    __ pop(state);
++    __ bind(no_safepoint);
++  }
++
++  // Narrow result if state is itos but result type is smaller.
++  // Need to narrow in the return bytecode rather than in generate_return_entry
++  // since compiled code callers expect the result to already be narrowed.
++  if (state == itos) {
++    __ narrow(FSR);
++  }
++
++  __ remove_activation(state, T12);
++  if(UseWmemb)
++    __ wmemb();
++  else
++    __ memb();
++
++  __ jmp(T12);
++}
++
++// ----------------------------------------------------------------------------
++// Volatile variables demand their effects be made known to all CPU's
++// in order.  Store buffers on most chips allow reads & writes to
++// reorder; the JMM's ReadAfterWrite.java test fails in -Xint mode
++// without some kind of memory barrier (i.e., it's not sufficient that
++// the interpreter does not reorder volatile references, the hardware
++// also must not reorder them).
++//
++// According to the new Java Memory Model (JMM):
++// (1) All volatiles are serialized wrt to each other.  ALSO reads &
++//     writes act as aquire & release, so:
++// (2) A read cannot let unrelated NON-volatile memory refs that
++//     happen after the read float up to before the read.  It's OK for
++//     non-volatile memory refs that happen before the volatile read to
++//     float down below it.
++// (3) Similar a volatile write cannot let unrelated NON-volatile
++//     memory refs that happen BEFORE the write float down to after the
++//     write.  It's OK for non-volatile memory refs that happen after the
++//     volatile write to float up before it.
++//
++// We only put in barriers around volatile refs (they are expensive),
++// not _between_ memory refs (that would require us to track the
++// flavor of the previous memory refs).  Requirements (2) and (3)
++// require some barriers before volatile stores and after volatile
++// loads.  These nearly cover requirement (1) but miss the
++// volatile-store-volatile-load case.  This final case is placed after
++// volatile-stores although it could just as well go before
++// volatile-loads.
++
++void TemplateTable::volatile_barrier() {
++  if(os::is_MP()) __ memb();
++}
++
++void TemplateTable::resolve_cache_and_index(int byte_no,
++                                            Register Rcache,
++                                            Register index,
++                                            size_t index_size) {SCOPEMARK_NAME(resolve_cache_and_index, _masm)
++  const Register temp = A1;
++  assert_different_registers(Rcache, index, temp);
++
++  Label resolved, Ldone;
++
++  Bytecodes::Code code = bytecode();
++  switch (code) {
++  case Bytecodes::_nofast_getfield: code = Bytecodes::_getfield; break;
++  case Bytecodes::_nofast_putfield: code = Bytecodes::_putfield; break;
++  default: break;
++  }
++
++  assert(byte_no == f1_byte || byte_no == f2_byte, "byte_no out of range");
++  __ get_cache_and_index_and_bytecode_at_bcp(Rcache, index, temp, byte_no, 1, index_size);
++  __ cmpw(temp, code);  // have we resolved this bytecode?
++  __ jcc(Assembler::equal, resolved);
++  
++  // resolve first time through
++  address entry = CAST_FROM_FN_PTR(address, InterpreterRuntime::resolve_from_cache);
++  __ movw(temp, code);
++  __ call_VM(noreg, entry, temp);
++  // Update registers with resolved info
++  __ get_cache_and_index_at_bcp(Rcache, index, 1, index_size);
++  __ jmp(Ldone);
++
++  __ bind(resolved);
++  __ memb(); // Order load wrt. succeeding loads.
++  __ bind(Ldone);
++}
++
++// The Rcache and index registers must be set before call
++// n.b unlike x86 cache already includes the index offset// yj todo: ??
++void TemplateTable::load_field_cp_cache_entry(Register obj,
++                                              Register cache,
++                                              Register index,
++                                              Register off,
++                                              Register flags,
++                                              bool is_static = false) {SCOPEMARK_NAME(load_field_cp_cache_entry, _masm)
++  assert_different_registers(cache, index, flags, off);
++
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++  // Field offset
++  __ ldptr(off, Address(cache, index, Address::times_ptr,
++                         in_bytes(cp_base_offset +
++                                  ConstantPoolCacheEntry::f2_offset())));
++  // Flags
++  __ ldwu(flags, Address(cache, index, Address::times_ptr,
++                         in_bytes(cp_base_offset +
++                                  ConstantPoolCacheEntry::flags_offset())));
++
++  // klass overwrite register
++  if (is_static) {
++    __ ldptr(obj, Address(cache, index, Address::times_ptr,
++                           in_bytes(cp_base_offset +
++                                    ConstantPoolCacheEntry::f1_offset())));
++    const int mirror_offset = in_bytes(Klass::java_mirror_offset());
++    __ ldptr(obj, Address(obj, mirror_offset));
++    __ resolve_oop_handle(obj);
++  }
++}
++
++void TemplateTable::load_invoke_cp_cache_entry(int byte_no,
++                                               Register method,
++                                               Register itable_index,
++                                               Register flags,
++                                               bool is_invokevirtual,
++                                               bool is_invokevfinal, /*unused*/
++                                               bool is_invokedynamic) {SCOPEMARK_NAME(load_invoke_cp_cache_entry, _masm)
++  // setup registers
++  const Register cache = T3;
++  const Register index = T1;
++  assert_different_registers(method, flags);
++  assert_different_registers(method, cache, index);
++  assert_different_registers(itable_index, flags);
++  assert_different_registers(itable_index, cache, index);
++  // determine constant pool cache field offsets
++  assert(is_invokevirtual == (byte_no == f2_byte), "is_invokevirtual flag redundant");
++  const int method_offset = in_bytes(
++    ConstantPoolCache::base_offset() +
++      ((byte_no == f2_byte)
++       ? ConstantPoolCacheEntry::f2_offset()
++       : ConstantPoolCacheEntry::f1_offset()));
++  const int flags_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::flags_offset());
++  // access constant pool cache fields
++  const int index_offset = in_bytes(ConstantPoolCache::base_offset() +
++                                    ConstantPoolCacheEntry::f2_offset());
++
++  size_t index_size = (is_invokedynamic ? sizeof(u4) : sizeof(u2));
++  resolve_cache_and_index(byte_no, cache, index, index_size);
++  __ ldptr(method, Address(cache, index, Address::times_ptr, method_offset));
++
++  if (itable_index != noreg) {
++    // pick up itable or appendix index from f2 also:
++    __ ldptr(itable_index, Address(cache, index, Address::times_ptr, index_offset));
++  }
++  __ ldwu(flags, Address(cache, index, Address::times_ptr, flags_offset));
++}
++
++// The registers cache and index expected to be set before call.
++// Correct values of the cache and index registers are preserved.
++void TemplateTable::jvmti_post_field_access(Register cache, 
++                                            Register index,
++                                            bool is_static, 
++                                            bool has_tos) {
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we take
++    // the time to call into the VM.
++    Label L1;
++    assert_different_registers(cache, index, rax);
++    __ ldws(rax, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
++    __ jcc(Assembler::zero, L1, rax);
++
++    // cache entry pointer
++    __ addptr(cache, in_bytes(ConstantPoolCache::base_offset()), cache);
++    __ slll(index, LogBytesPerWord, index);
++    __ addptr(cache, index, cache);
++    if (is_static) {
++      __ movl(rax, R0);      // NULL object reference
++    } else {
++      __ pop(atos);
++      __ verify_oop(rax);
++      __ push(atos);
++    }
++    // FSR: object pointer or NULL
++    // cache: cache entry pointer
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               rax, cache);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::pop_and_check_object(Register r) {SCOPEMARK_NAME(pop_and_check_object, _masm)
++  __ pop_ptr(r);
++  __ null_check(r);  // for field access must check obj.
++  __ verify_oop(r);
++}
++
++void TemplateTable::getfield_or_static(int byte_no, bool is_static, RewriteControl rc) {SCOPEMARK_NAME(getfield_or_static, _masm)
++  transition(vtos, vtos);
++  
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = c_rarg3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = c_rarg3; // uses same reg as obj, so don't mix them
++  
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_access(cache, index, is_static, false);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  const Register bVolatile = T11;// don't clobber it
++  {// yj todo: x86 seems don't care for the volatile, but aarch64 cares.
++    __ andw(flags, 1 << ConstantPoolCacheEntry::is_volatile_shift, bVolatile);
++
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, bVolatile);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  if (!is_static) pop_and_check_object(obj);
++  
++  const Address field(obj, off, Address::times_1, 0*wordSize);
++  
++  Label Done, notByte, notBool, notInt, notShort, notChar, notLong, notFloat, notObj, notDouble;
++
++  __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags);
++  // Make sure we don't need to mask edx after the above shift
++  assert(btos == 0, "change code, btos != 0");
++  
++  __ andw(flags, ConstantPoolCacheEntry::tos_state_mask, flags);
++  
++  __ jcc(Assembler::notZero, notByte, flags);
++  // btos
++  __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(btos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_bgetfield, bc, T2);
++  }
++  __ jmp(Done);
++
++  __ bind(notByte);
++  __ cmpw(flags, ztos);
++  __ jcc(Assembler::notEqual, notBool);
++
++  // ztos (same code as btos)
++  __ access_load_at(T_BOOLEAN, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ztos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    // use btos rewriting, no truncating to t/f bit is needed for getfield.
++    patch_bytecode(Bytecodes::_fast_bgetfield, bc, T2);
++  }
++  __ jmp(Done);
++
++  __ bind(notBool);
++  __ cmpw(flags, atos);
++  __ jcc(Assembler::notEqual, notObj);
++  // atos
++  do_oop_load(_masm, field, FSR);
++  __ push(atos);
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_agetfield, bc, T2);
++  }
++  __ jmp(Done);
++
++  __ bind(notObj);
++  __ cmpw(flags, itos);
++  __ jcc(Assembler::notEqual, notInt);
++  // itos
++  __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(itos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_igetfield, bc, T2);
++  }
++  __ jmp(Done);
++
++  __ bind(notInt);
++  __ cmpw(flags, ctos);
++  __ jcc(Assembler::notEqual, notChar);
++  // ctos
++  __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(ctos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_cgetfield, bc, T2);
++  }
++  __ jmp(Done);
++
++  __ bind(notChar);
++  __ cmpw(flags, stos);
++  __ jcc(Assembler::notEqual, notShort);
++  // stos
++  __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++  __ push(stos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_sgetfield, bc, T2);
++  }
++  __ jmp(Done);
++
++  __ bind(notShort);
++  __ cmpw(flags, ltos);
++  __ jcc(Assembler::notEqual, notLong);
++  // ltos
++    // yj todo: ??Generate code as if volatile (x86_32).  There just aren't enough registers to
++    // save that information and this code is faster than the test.
++  __ access_load_at(T_LONG, IN_HEAP | MO_RELAXED, noreg /* ltos */, field, noreg, noreg);
++  __ push(ltos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) patch_bytecode(Bytecodes::_fast_lgetfield, bc, T2);
++  __ jmp(Done);
++
++  __ bind(notLong);
++  __ cmpw(flags, ftos);
++  __ jcc(Assembler::notEqual, notFloat);
++  // ftos
++
++  __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++  __ push(ftos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_fgetfield, bc, T2);
++  }
++  __ jmp(Done);
++
++  __ bind(notFloat);
++#ifdef ASSERT
++  __ cmpw(flags, dtos);
++  __ jcc(Assembler::notEqual, notDouble);
++#endif
++  // dtos
++  __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++  __ push(dtos);
++  // Rewrite bytecode to be faster
++  if (!is_static && rc == may_rewrite) {
++    patch_bytecode(Bytecodes::_fast_dgetfield, bc, T2);
++  }
++#ifdef ASSERT
++  __ jmp(Done);
++
++
++  __ bind(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ bind(Done);
++
++  {
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, bVolatile);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::getfield(int byte_no) {
++  getfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_getfield(int byte_no) {
++  getfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::getstatic(int byte_no) {
++  getfield_or_static(byte_no, true);
++}
++
++
++// The registers cache and index expected to be set before call.
++// The function may destroy various registers, just not the cache and index registers.
++void TemplateTable::jvmti_post_field_mod(Register cache, Register index, bool is_static) {
++
++  const Register robj = c_rarg2;
++  const Register RBX  = c_rarg1;
++  const Register RCX  = c_rarg3;
++  const Register RDX  = rscratch1;
++  
++  ByteSize cp_base_offset = ConstantPoolCache::base_offset();
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L1;
++    assert_different_registers(cache, index, rcc);
++    __ ldws(rcc, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
++    __ jcc(Assembler::zero, L1);
++
++    __ get_cache_and_index_at_bcp(robj, RDX, 1);
++
++
++    if (is_static) {
++      // Life is simple.  Null out the object pointer.
++      __ movw(RBX, R0);
++      
++    } else {
++      // Life is harder. The stack holds the value on top, followed by
++      // the object.  We don't know the size of the value, though; it
++      // could be one or two words depending on its type. As a result,
++      // we must find the type to determine where the object is.
++      __ ldwu(RCX, Address(robj, RDX,
++                           Address::times_ptr,
++                           in_bytes(cp_base_offset +
++                                     ConstantPoolCacheEntry::flags_offset())));
++      __ srll(RCX, ConstantPoolCacheEntry::tos_state_shift, RCX);
++
++      // Make sure we don't need to mask rcx after the above shift
++      ConstantPoolCacheEntry::verify_tos_state_shift();
++      __ ldptr(c_rarg1, at_tos_p1());  // initially assume a one word jvalue
++      __ cmpw(c_rarg3, ltos);
++      __ ldptr(AT, at_tos_p2());
++      __ cmove(Assembler::equal,
++                 c_rarg1, AT, c_rarg1); // ltos (two word jvalue)
++      __ cmpw(c_rarg3, dtos);
++      __ cmove(Assembler::equal,
++                 c_rarg1, AT, c_rarg1); // dtos (two word jvalue)
++    }
++    // cache entry pointer
++    __ addptr(robj, in_bytes(cp_base_offset), robj);
++    __ slll(RDX, LogBytesPerWord, RDX);
++    __ addptr(robj, RDX, robj);
++    // object (tos)
++    __ movl(RCX, esp);
++    // c_rarg1: object pointer set up above (NULL if static)
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on the stack
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address,
++                                InterpreterRuntime::post_field_modification),
++               RBX, robj, RCX);
++    __ get_cache_and_index_at_bcp(cache, index, 1);
++    __ bind(L1);
++  }
++}
++
++void TemplateTable::putfield_or_static(int byte_no, bool is_static, RewriteControl rc) {SCOPEMARK_NAME(putfield_or_static, _masm)
++  transition(vtos, vtos);
++
++  const Register cache = T3;
++  const Register index = T0;
++  const Register obj   = T3;
++  const Register off   = T2;
++  const Register flags = T1;
++  const Register bc    = c_rarg3;
++
++  resolve_cache_and_index(byte_no, cache, index, sizeof(u2));
++  jvmti_post_field_mod(cache, index, is_static);
++  load_field_cp_cache_entry(obj, cache, index, off, flags, is_static);
++
++  //x64 dont need mb since its mem seq model is strong, but we are weak, we ref aarch64 here.
++  const Register bVolatile = T11;// yj todo: will T11 be clobber??
++  Label notVolatile, Done;
++  __ andw(flags, 1 << ConstantPoolCacheEntry::is_volatile_shift, bVolatile);
++  __ jcc(Assembler::zero, notVolatile, bVolatile);
++  volatile_barrier();
++  __ BIND(notVolatile);
++
++  // field addresses
++  const Address field(obj, off, Address::times_1, 0*wordSize);
++
++  Label notByte, notBool, notInt, notShort, notChar,
++        notLong, notFloat, notObj, notDouble;
++
++  __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags);
++
++  assert(btos == 0, "change code, btos != 0");
++  __ andw(flags, ConstantPoolCacheEntry::tos_state_mask, flags);
++  __ jcc(Assembler::notZero, notByte, flags);
++  
++  // btos
++  {
++    __ pop(btos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_BYTE, IN_HEAP, field, FSR, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_bputfield, bc, T2, true, byte_no);
++    }
++    __ jmp(Done);
++  }
++  
++  __ BIND(notByte);
++  __ cmpw(flags, ztos);
++  __ jcc(Assembler::notEqual, notBool);
++
++  // ztos
++  {
++    __ pop(ztos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_BOOLEAN, IN_HEAP, field, FSR, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_zputfield, bc, T2, true, byte_no);
++    }
++    __ jmp(Done);
++  }
++
++  __ BIND(notBool);
++  __ cmpw(flags, atos);
++  __ jcc(Assembler::notEqual, notObj);
++
++  // atos
++  {
++    __ pop(atos);
++    if (!is_static) pop_and_check_object(obj);
++    // Store into the field
++    do_oop_store(_masm, field, FSR);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_aputfield, bc, T2, true, byte_no);
++  }
++    __ jmp(Done);
++  }
++
++  __ BIND(notObj);
++  __ cmpw(flags, itos);
++  __ jcc(Assembler::notEqual, notInt);
++
++  // itos
++  {
++    __ pop(itos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_INT, IN_HEAP, field, FSR, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_iputfield, bc, T2, true, byte_no);
++    }
++    __ jmp(Done);
++  }
++
++  __ BIND(notInt);
++  __ cmpw(flags, ctos);
++  __ jcc(Assembler::notEqual, notChar);
++
++  // ctos
++  {
++    __ pop(ctos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_CHAR, IN_HEAP, field, FSR, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_cputfield, bc, T2, true, byte_no);
++    }
++    __ jmp(Done);
++  }
++
++  __ BIND(notChar);
++  __ cmpw(flags, stos);
++  __ jcc(Assembler::notEqual, notShort);
++
++  // stos
++  {
++    __ pop(stos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_SHORT, IN_HEAP, field, FSR, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_sputfield, bc, T2, true, byte_no);
++    }
++    __ jmp(Done);
++  }
++
++  __ BIND(notShort);
++  __ cmpw(flags, ltos);
++  __ jcc(Assembler::notEqual, notLong);
++
++  // ltos
++  {
++    __ pop(ltos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos*/, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_lputfield, bc, T2, true, byte_no);
++    }
++    __ jmp(Done);
++  }
++
++  __ BIND(notLong);
++  __ cmpw(flags, ftos);
++  __ jcc(Assembler::notEqual, notFloat);
++
++  // ftos
++  {
++    __ pop(ftos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos */, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_fputfield, bc, T2, true, byte_no);
++    }
++    __ jmp(Done);
++  }
++
++  __ BIND(notFloat);
++#ifdef ASSERT
++  __ cmpw(flags, dtos);
++  __ jcc(Assembler::notEqual, notDouble);
++#endif
++
++  // dtos
++  {
++    __ pop(dtos);
++    if (!is_static) pop_and_check_object(obj);
++    __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos */, noreg, noreg);
++    if (!is_static && rc == may_rewrite) {
++      patch_bytecode(Bytecodes::_fast_dputfield, bc, T2, true, byte_no);
++    }
++  }
++
++#ifdef ASSERT
++  __ jmp(Done);
++  
++  __ BIND(notDouble);
++  __ stop("Bad state");
++#endif
++
++  __ BIND(Done);
++
++  // Check for volatile store
++  {
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, bVolatile);
++    volatile_barrier();
++    __ BIND(notVolatile);
++  }
++}
++
++void TemplateTable::putfield(int byte_no) {
++  putfield_or_static(byte_no, false);
++}
++
++void TemplateTable::nofast_putfield(int byte_no) {
++  putfield_or_static(byte_no, false, may_not_rewrite);
++}
++
++void TemplateTable::putstatic(int byte_no) {
++  putfield_or_static(byte_no, true);
++}
++
++void TemplateTable::jvmti_post_fast_field_mod() {
++
++  const Register scratch = c_rarg3;
++  const Register rbx = T2;
++
++  if (JvmtiExport::can_post_field_modification()) {
++    // Check to see if a field modification watch has been set before
++    // we take the time to call into the VM.
++    Label L2;
++    __ ldws(scratch, ExternalAddress((address)JvmtiExport::get_field_modification_count_addr()));
++    __ jcc(Assembler::zero, L2, scratch);
++    __ pop_ptr(rbx);                  // copy the object pointer from tos
++    __ verify_oop(rbx);
++    __ push_ptr(rbx);                 // put the object pointer back on tos
++    // Save tos values before call_VM() clobbers them. Since we have
++    // to do it for every data type, we use the saved values as the
++    // jvalue object.
++    switch (bytecode()) {          // load values into the jvalue object
++    case Bytecodes::_fast_aputfield: __ push_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ push_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ push(dtos); break;
++    case Bytecodes::_fast_fputfield: __ push(ftos); break;
++    case Bytecodes::_fast_lputfield: __ push_l(FSR); break;
++    
++    default:
++      ShouldNotReachHere();
++    }
++    __ movl(scratch, esp);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(c_rarg2, FSR, 1);
++    __ verify_oop(rbx);
++    // rbx: object pointer copied above
++    // c_rarg2: cache entry pointer
++    // c_rarg3: jvalue object on the stack
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_modification), rbx, c_rarg2, c_rarg3);
++
++    switch (bytecode()) {             // restore tos values
++    case Bytecodes::_fast_aputfield: __ pop_ptr(FSR); break;
++    case Bytecodes::_fast_bputfield: // fall through
++    case Bytecodes::_fast_zputfield: // fall through
++    case Bytecodes::_fast_sputfield: // fall through
++    case Bytecodes::_fast_cputfield: // fall through
++    case Bytecodes::_fast_iputfield: __ pop_i(FSR); break;
++    case Bytecodes::_fast_dputfield: __ pop(dtos); break;
++    case Bytecodes::_fast_fputfield: __ pop(ftos); break;
++    case Bytecodes::_fast_lputfield: __ pop_l(FSR); break;
++    default: break;
++    }
++    __ bind(L2);
++  }
++}
++
++void TemplateTable::fast_storefield(TosState state) {
++  transition(state, vtos);
++
++  const Register scratch = T11;
++  const Register rbx = T2;
++  const Register rcx = T3;
++  const Register rdx = T1;
++
++  ByteSize base = ConstantPoolCache::base_offset();
++
++  jvmti_post_fast_field_mod();
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
++
++  // test for volatile with rdx but rdx is tos register for lputfield.
++  __ ldwu(rdx, Address(rcx, rbx, Address::times_ptr,
++                       in_bytes(base +
++                                ConstantPoolCacheEntry::flags_offset())));
++
++  // replace index with field offset from cache entry
++  __ ldptr(rbx, Address(rcx, rbx, Address::times_ptr,
++                         in_bytes(base + ConstantPoolCacheEntry::f2_offset())));
++
++  //x64 dont need mb since its mem seq model is strong, but we are weak, we ref aarch64 here.
++  {
++    __ andw(rdx, 1 << ConstantPoolCacheEntry::is_volatile_shift, scratch);
++
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, scratch);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // Get object from stack
++  pop_and_check_object(rcx);
++
++    // field address
++  const Address field(rcx, rbx, Address::times_1);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_aputfield:
++    do_oop_store(_masm, field, FSR);
++      break;
++    case Bytecodes::_fast_lputfield:
++      __ access_store_at(T_LONG, IN_HEAP, field, noreg /* ltos */, noreg, noreg);
++      break;
++    case Bytecodes::_fast_iputfield:
++      __ access_store_at(T_INT, IN_HEAP, field, FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_zputfield:
++      __ access_store_at(T_BOOLEAN, IN_HEAP, field, FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bputfield:
++      __ access_store_at(T_BYTE, IN_HEAP, field, FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sputfield:
++      __ access_store_at(T_SHORT, IN_HEAP, field, FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cputfield:
++      __ access_store_at(T_CHAR, IN_HEAP, field, FSR, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fputfield:
++      __ access_store_at(T_FLOAT, IN_HEAP, field, noreg /* ftos*/, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dputfield:
++      __ access_store_at(T_DOUBLE, IN_HEAP, field, noreg /* dtos*/, noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, scratch);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::fast_accessfield(TosState state) {
++  transition(atos, state);
++
++  const Register scratch = T11;
++  const Register rcx = T3;
++  const Register rbx = T2;
++  
++  // Do the JVMTI work here to avoid disturbing the register state below
++  if (JvmtiExport::can_post_field_access()) {
++    // Check to see if a field access watch has been set before we
++    // take the time to call into the VM.
++    Label L1;
++    __ ldws(rcx, ExternalAddress((address) JvmtiExport::get_field_access_count_addr()));
++    __ jcc(Assembler::zero, L1, rcx);
++    // access constant pool cache entry
++    __ get_cache_entry_pointer_at_bcp(c_rarg2, rcx, 1);
++    __ verify_oop(FSR);
++    __ push(FSR);  // save object pointer before call_VM() clobbers it
++    __ movl(c_rarg1, FSR);
++    // c_rarg1: object pointer copied above
++    // c_rarg2: cache entry pointer
++    __ call_VM(noreg,
++               CAST_FROM_FN_PTR(address, InterpreterRuntime::post_field_access),
++               c_rarg1, c_rarg2);
++    __ pop_ptr(FSR);
++    __ bind(L1);
++  }
++
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(rcx, rbx, 1);
++  // replace index with field offset from cache entry
++  {
++    __ ldw(AT, Address(rcx, rbx, Address::times_8,
++                          in_bytes(ConstantPoolCache::base_offset() +
++                                   ConstantPoolCacheEntry::flags_offset())));
++    __ andw(AT, 1 << ConstantPoolCacheEntry::is_volatile_shift, scratch);
++
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, scratch);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++  __ ldptr(rbx, Address(rcx, rbx, Address::times_ptr,
++                         in_bytes(ConstantPoolCache::base_offset() +
++                                  ConstantPoolCacheEntry::f2_offset())));
++
++  // FSR: object
++  __ verify_oop(FSR);
++  __ null_check(FSR);
++  // field addresses
++  Address field(FSR, rbx, Address::times_1);
++
++  // access field
++  switch (bytecode()) {
++    case Bytecodes::_fast_agetfield:
++      do_oop_load(_masm, field, FSR);
++      __ verify_oop(FSR);
++      break;
++    case Bytecodes::_fast_lgetfield:
++      __ access_load_at(T_LONG, IN_HEAP, noreg /* ltos */, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_igetfield:
++      __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_bgetfield:
++      __ access_load_at(T_BYTE, IN_HEAP, FSR, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_sgetfield:
++      __ access_load_at(T_SHORT, IN_HEAP, FSR, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_cgetfield:
++      __ access_load_at(T_CHAR, IN_HEAP, FSR, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_fgetfield:
++      __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++      break;
++    case Bytecodes::_fast_dgetfield:
++      __ access_load_at(T_DOUBLE, IN_HEAP, noreg /* dtos */, field, noreg, noreg);
++      break;
++    default:
++      ShouldNotReachHere();
++  }
++  
++  {
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, scratch);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++}
++
++void TemplateTable::fast_xaccess(TosState state) {
++  transition(vtos, state);
++
++  const Register scratch = T11;
++  const Register rcx = T3;
++  const Register rdx = T2;
++  const Register rbx = T1;
++  
++  // get receiver
++  __ ldptr(FSR, aaddress(0));
++  // access constant pool cache
++  __ get_cache_and_index_at_bcp(rcx, rdx, 2);
++  __ ldptr(rbx,
++            Address(rcx, rdx, Address::times_ptr,
++                    in_bytes(ConstantPoolCache::base_offset() +
++                             ConstantPoolCacheEntry::f2_offset())));
++
++  {
++    __ ldw(AT, Address(rcx, rdx, Address::times_8,
++                          in_bytes(ConstantPoolCache::base_offset() +
++                                   ConstantPoolCacheEntry::flags_offset())));
++    __ andw(AT, 1 << ConstantPoolCacheEntry::is_volatile_shift, scratch);
++
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, scratch);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++
++  // make sure exception is reported in correct bcp range (getfield is
++  // next instruction)
++  __ incrementl(rbcp);
++  __ null_check(FSR);
++  const Address field = Address(FSR, rbx, Address::times_1, 0*wordSize);
++  switch (state) {
++  case itos:
++    __ access_load_at(T_INT, IN_HEAP, FSR, field, noreg, noreg);
++    break;
++  case atos:
++    do_oop_load(_masm, field, FSR);
++    __ verify_oop(FSR);
++    break;
++  case ftos:
++    __ access_load_at(T_FLOAT, IN_HEAP, noreg /* ftos */, field, noreg, noreg);
++    break;
++  default:
++    ShouldNotReachHere();
++  }
++
++  {
++    Label notVolatile;
++    __ jcc(Assembler::zero, notVolatile, scratch);
++    volatile_barrier();
++    __ bind(notVolatile);
++  }
++  
++  __ decrementl(rbcp);
++}
++
++//-----------------------------------------------------------------------------
++// Calls
++
++void TemplateTable::count_calls(Register method, Register temp) {
++  // implemented elsewhere
++  ShouldNotReachHere();
++}
++
++void TemplateTable::prepare_invoke(int byte_no,
++                                   Register method,  // linked method (or i-klass)
++                                   Register index,   // itable index, MethodType, etc.
++                                   Register recv,    // if caller wants to see it
++                                   Register flags    // if caller wants to test it
++                                   ) {SCOPEMARK_NAME(prepare_invoke, _masm)
++  const Register rdx = T1;
++  const Register rcx = T3;
++  
++  // determine flags
++  const Bytecodes::Code code = bytecode();
++  const bool is_invokeinterface  = code == Bytecodes::_invokeinterface;
++  const bool is_invokedynamic    = code == Bytecodes::_invokedynamic;
++  const bool is_invokehandle     = code == Bytecodes::_invokehandle;
++  const bool is_invokevirtual    = code == Bytecodes::_invokevirtual;
++  const bool is_invokespecial    = code == Bytecodes::_invokespecial;
++  const bool load_receiver       = (recv  != noreg);
++  const bool save_flags          = (flags != noreg);
++  assert(load_receiver == (code != Bytecodes::_invokestatic && code != Bytecodes::_invokedynamic), "");
++  assert(save_flags    == (is_invokeinterface || is_invokevirtual), "need flags for vfinal");
++  assert(flags == noreg || flags == rdx, "");
++  assert(recv  == noreg || recv  == rcx, "");
++  //assert(method  == rmethod, "rmethod is a S reg");
++
++  // setup registers & access constant pool cache
++  if (recv  == noreg)  recv  = rcx;
++  if (flags == noreg)  flags = rdx;
++  assert_different_registers(method, index, recv, flags);
++
++  // save 'interpreter return address'
++  __ save_bcp();
++
++  load_invoke_cp_cache_entry(byte_no, method, index, flags, is_invokevirtual, false, is_invokedynamic);
++
++  // maybe push appendix to arguments (just before return address)
++  if (is_invokedynamic || is_invokehandle) {//<TODO:not check carefully jzy>
++   Label L_no_push;
++   Register rbx = rmethod;
++     __ testw(flags, (1 << ConstantPoolCacheEntry::has_appendix_shift));
++     __ jcc(Assembler::zero, L_no_push);
++     // Push the appendix as a trailing parameter.
++     // This must be done before we get the receiver,
++     // since the parameter_size includes it.
++     __ push(rbx);
++     __ movl(rbx, index);
++     assert(ConstantPoolCacheEntry::_indy_resolved_references_appendix_offset == 0, "appendix expected at index+0");
++     __ load_resolved_reference_at_index(index, rbx);
++     __ pop(rbx);
++     __ push(index);  // push appendix (MethodType, CallSite, etc.)
++     __ bind(L_no_push);
++  }
++
++  // load receiver if needed (after appendix is pushed so parameter size is correct)
++  // Note: no return address pushed yet
++  if (load_receiver) {
++    __ andw(flags, ConstantPoolCacheEntry::parameter_size_mask, recv);
++    const int no_return_pc_pushed_yet = 0;  // argument slot correction before we push return address
++                                            // Since we won't push RA on stack, no_return_pc_pushed_yet should be 0.
++    const int receiver_is_at_end      = -1;  // back off one slot to get receiver
++    Address recv_addr = __ argument_address(recv, no_return_pc_pushed_yet + receiver_is_at_end);
++    __ ldptr(recv, recv_addr);
++    __ verify_oop(recv);
++  }
++  
++  if (save_flags) {
++    __ movw(rbcp, flags);
++  }
++
++  // compute return type
++  __ srll(flags, ConstantPoolCacheEntry::tos_state_shift, flags);
++  // Make sure we don't need to mask flags after the above shift
++  ConstantPoolCacheEntry::verify_tos_state_shift();
++  // load return address
++  {
++    const address table_addr = (address) Interpreter::invoke_return_entry_table_for(code);
++    ExternalAddress table(table_addr);    
++    __ lea(rscratch1, table);
++    __ ldptr(RA, Address(rscratch1, flags, Address::times_ptr));
++  }
++
++  // push return address
++  // __ push(RA);// yj: we dont't push ret addr
++  
++  if (save_flags) {
++    __ movw(flags, rbcp);
++    __ restore_bcp();
++  }
++}
++
++void TemplateTable::invokevirtual_helper(Register index,
++                                         Register recv,
++                                         Register flags) {SCOPEMARK_NAME(invokevirtual_helper, _masm)
++  const Register rdx = T2;
++  const Register rax = FSR;
++  
++  // Uses temporary registers FSR, rdx
++  assert_different_registers(index, recv, rax, rdx);
++  assert(index == rmethod, "");
++  assert(recv  == T3, "");
++
++  // Test for an invoke of a final method
++  Label notFinal;
++  __ testw(flags, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ jcc(Assembler::zero, notFinal);
++  
++  const Register method = index;  // method must be rmethod
++  assert(method == rmethod, 
++         "method* must be rmethod for interpreter calling convention");
++
++  // do the call - the index is actually the method to call
++  // that is, f2 is a vtable index if !is_vfinal, else f2 is a Method*
++
++  // It's final, need a null check here!
++  __ null_check(recv);
++
++  // profile this call
++  __ profile_final_call(rax);
++  __ profile_arguments_type(rax, method, rbcp, true);
++
++  __ jump_from_interpreted(method, rax);
++
++  __ bind(notFinal);
++
++  // get receiver klass
++  __ null_check(recv, oopDesc::klass_offset_in_bytes());
++  __ load_klass(rax, recv);
++
++  // profile this call
++  __ profile_virtual_call(rax, rlocals, rdx);
++  // get target Method* & entry point
++  __ lookup_virtual_method(rax, index, method);
++  __ profile_called_method(method, rdx, rbcp);
++  
++  __ profile_arguments_type(rdx, method, rbcp, true);
++  __ jump_from_interpreted(method, rdx);
++}
++
++void TemplateTable::invokevirtual(int byte_no) {SCOPEMARK_NAME(invokevirtual, _masm)
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  prepare_invoke(byte_no, 
++                 rmethod,// method or vtable index
++                 noreg,  // unused itable index
++                 T3, T1); // recv, flags
++  
++  // rmethod: index
++  // T3 : receiver
++  // T1 : flags
++  
++  invokevirtual_helper(rmethod, T3, T1);
++}
++
++void TemplateTable::invokespecial(int byte_no) {SCOPEMARK_NAME(invokespecial, _masm)
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  Register rcx = T3; //<TODO:must be equal rcx in prepare_invoke why? jzy>
++  Register rax = V0;
++  Register rbx = rmethod;
++  prepare_invoke(byte_no, rbx, noreg,  // get f1 Method*
++                 rcx);  // get receiver also for null check
++  __ verify_oop(rcx);
++  __ null_check(rcx);
++  // do the call
++  __ profile_call(rax);
++  __ profile_arguments_type(rax, rbx, c_rarg4, false);
++  __ jump_from_interpreted(rbx, rax);
++}
++
++void TemplateTable::invokestatic(int byte_no) {SCOPEMARK_NAME(invokestatic, _masm)
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  Register rax = V0;
++  prepare_invoke(byte_no, rmethod, noreg);
++  // do the call
++  __ profile_call(rax);
++  __ profile_arguments_type(rax, rmethod, c_rarg3, false);
++  __ jump_from_interpreted(rmethod, rax);
++}
++
++
++void TemplateTable::fast_invokevfinal(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f2_byte, "use this argument");
++  __ stop("fast_invokevfinal not used on sw64");
++}
++
++
++void TemplateTable::invokeinterface(int byte_no) {SCOPEMARK_NAME(invokeinterface, _masm)
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  Register rax = V0;
++  Register rcx = T3;
++  Register rdx = T1;
++  
++  prepare_invoke(byte_no, rax, rmethod,  // get f1 Klass*, f2 Method*
++                 rcx, rdx); // recv, flags
++
++  // rax: reference klass (from f1) if interface method
++  // rbx: method (from f2)
++  // rcx: receiver
++  // rdx: flags
++
++  // First check for Object case, then private interface method,
++  // then regular interface method.
++  
++  // Special case of invokeinterface called for virtual method of
++  // java.lang.Object.  See cpCache.cpp for details.
++  Label notObjectMethod;
++  __ testw(rdx, (1 << ConstantPoolCacheEntry::is_forced_virtual_shift));
++  __ jcc(Assembler::zero, notObjectMethod);
++  invokevirtual_helper(rmethod, rcx, rdx);
++  // no return from above
++  __ bind(notObjectMethod);
++
++  Label no_such_interface; // for receiver subtype check
++  Register recvKlass; // used for exception processing
++
++  // Check for private method invocation - indicated by vfinal
++  Label notVFinal;
++  __ testw(rdx, (1 << ConstantPoolCacheEntry::is_vfinal_shift));
++  __ jcc(Assembler::zero, notVFinal);
++
++  // Get receiver klass into rlocals - also a null check
++  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
++  __ load_klass(rlocals, rcx);
++
++  Label subtype;
++  __ check_klass_subtype(rlocals, rax, c_rarg4, subtype);
++  // If we get here the typecheck failed
++  recvKlass = rdx;
++  __ movl(recvKlass, rlocals); // shuffle receiver class for exception use
++  __ jmp(no_such_interface);
++
++  __ bind(subtype);
++
++  // do the call - rbx is actually the method to call
++
++  __ profile_final_call(rdx);
++  __ profile_arguments_type(rdx, rmethod, c_rarg4, true);
++
++  __ jump_from_interpreted(rmethod, rdx);
++  // no return from above
++  __ bind(notVFinal);
++
++  // Get receiver klass into rdx - also a null check
++  __ restore_locals();  // restore r14
++  __ null_check(rcx, oopDesc::klass_offset_in_bytes());
++  __ load_klass(rdx, rcx);
++
++  Label no_such_method;
++
++  // Preserve method for throw_AbstractMethodErrorVerbose.
++  __ movl(rcx, rmethod);
++  // Receiver subtype check against REFC.
++  // Superklass in rax. Subklass in rdx. Blows rcx, rdi.
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             rdx, rax, noreg,
++                             // outputs: scan temp. reg, scan temp. reg
++                             c_rarg4, rlocals,
++                             no_such_interface,
++                             /*return_method=*/false);
++
++  // profile this call
++  __ restore_bcp(); // rbcp was destroyed by receiver type check
++  __ profile_virtual_call(rdx, c_rarg4, rlocals);
++
++  // Get declaring interface class from method, and itable index
++  __ ldptr(rax, Address(rmethod, Method::const_offset()));
++  __ ldptr(rax, Address(rax, ConstMethod::constants_offset()));
++  __ ldptr(rax, Address(rax, ConstantPool::pool_holder_offset_in_bytes()));
++  __ ldws(rmethod, Address(rmethod, Method::itable_index_offset()));
++  __ movw(rscratch1, Method::itable_index_max);
++  __ subw(rmethod, rscratch1, rmethod);
++  __ subw(R0, rmethod, rmethod);
++
++  // Preserve recvKlass for throw_AbstractMethodErrorVerbose.
++  __ movl(rlocals, rdx);
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             rlocals, rax, rmethod,
++                             // outputs: method, scan temp. reg
++                             rmethod, c_rarg4,
++                             no_such_interface);
++
++  // rmethod: Method* to call
++  // rcx: receiver<TODO:when ? jzy>
++  // Check for abstract method error
++  // Note: This should be done more efficiently via a throw_abstract_method_error
++  //       interpreter entry point and a conditional jump to it in case of a null
++  //       method.
++  __ testptr(rmethod, rmethod);
++  __ jcc(Assembler::zero, no_such_method);
++
++  __ profile_called_method(rmethod, c_rarg4, rdx);
++  __ profile_arguments_type(rdx, rmethod, c_rarg4, true);
++
++  // do the call
++  // rcx: receiver
++  // rmethod,: Method*
++  __ jump_from_interpreted(rmethod, rdx);
++  __ should_not_reach_here("3501");
++
++  // exception handling code follows...
++  // note: must restore interpreter registers to canonical
++  //       state for exception handling to work correctly!
++
++  __ bind(no_such_method);
++  // throw exception
++  //__ pop(rbx);           // pop return address (pushed by prepare_invoke)<TODO:check>
++  __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
++  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
++  // Pass arguments for generating a verbose error message.
++
++  recvKlass = c_rarg1;
++  Register method    = c_rarg2;
++  if (recvKlass != rdx) { __ movl(recvKlass, rdx); }
++  if (method != rcx)    { __ movl(method, rcx);    }
++
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_AbstractMethodErrorVerbose),
++             recvKlass, method);
++  // The call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here("3522");
++
++  __ bind(no_such_interface);
++  // throw exception
++  //__ pop(rbx);           // pop return address (pushed by prepare_invoke)
++  __ restore_bcp();      // rbcp must be correct for exception handler   (was destroyed)
++  __ restore_locals();   // make sure locals pointer is correct as well (was destroyed)
++  // Pass arguments for generating a verbose error message.
++  if (recvKlass != rdx) { __ movl(recvKlass, rdx); }
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::throw_IncompatibleClassChangeErrorVerbose),
++             recvKlass, rax);
++  // the call_VM checks for exception, so we should never return here.
++  __ should_not_reach_here("3534");
++}
++
++void TemplateTable::invokehandle(int byte_no) {SCOPEMARK_NAME(invokehandle, _masm)
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++  const Register rbx_method = rmethod;
++  const Register rax_mtype  = V0;
++  const Register rcx_recv   = T3;
++  const Register rdx_flags  = c_rarg3;
++  const Register rdx = c_rarg3;
++  const Register rax = V0;
++  
++  prepare_invoke(byte_no, rbx_method, rax_mtype, rcx_recv);
++  __ verify_method_ptr(rbx_method);
++  __ verify_oop(rcx_recv);
++  __ null_check(rcx_recv);
++  
++  // rax: MethodType object (from cpool->resolved_references[f1], if necessary)
++  // rbx: MH.invokeExact_MT method (from f2)
++
++  // Note:  rax_mtype is already pushed (if necessary) by prepare_invoke
++
++   // FIXME: profile the LambdaForm also
++  __ profile_final_call(rax);
++  __ profile_arguments_type(rdx, rbx_method, c_rarg5, true);
++
++  __ jump_from_interpreted(rbx_method, rdx);
++}
++
++void TemplateTable::invokedynamic(int byte_no) {
++  transition(vtos, vtos);
++  assert(byte_no == f1_byte, "use this argument");
++
++  const Register rbx_method   = rmethod;
++  const Register rax_callsite = V0;
++  Register rdx = c_rarg2;
++  
++  //__ stop("TODO: should check function right:invokedynamic jzy");
++  prepare_invoke(byte_no, rbx_method, rax_callsite);
++
++  // rax: CallSite object (from cpool->resolved_references[f1])
++  // rbx: MH.linkToCallSite method (from f2)
++
++  // Note:  rax_callsite is already pushed by prepare_invoke<TODO:what? jzy>
++
++  // %%% should make a type profile for any invokedynamic that takes a ref argument
++  // profile this call
++  __ profile_call(c_rarg3);
++  __ profile_arguments_type(rdx, rbx_method, c_rarg3, false);
++
++  __ verify_oop(rax_callsite);
++
++  __ jump_from_interpreted(rbx_method, rdx);
++}
++
++//-----------------------------------------------------------------------------
++// Allocation
++
++void TemplateTable::_new() {SCOPEMARK_NAME(TemplateTable::_new, _masm)
++  transition(vtos, atos);
++  Register rax = V0;
++  Register rbx = c_rarg3;
++  Register rcx = c_rarg4;
++  Register rdx = c_rarg5;
++  __ get_unsigned_2_byte_index_at_bcp(rdx, 1);
++  Label slow_case;
++  Label slow_case_no_pop;
++  Label done;
++  Label initialize_header;
++  Label initialize_object;  // including clearing the fields
++
++  __ get_cpool_and_tags(rcx, rax);
++
++  // Make sure the class we're about to instantiate has been resolved.
++  // This is done before loading InstanceKlass to be consistent with the order
++  // how Constant Pool is updated (see ConstantPool::klass_at_put)
++  const int tags_offset = Array<u1>::base_offset_in_bytes();
++  __ cmpb(Address(rax, rdx, Address::times_1, tags_offset), JVM_CONSTANT_Class);
++  __ jcc(Assembler::notEqual, slow_case_no_pop);
++  
++  // get InstanceKlass
++  __ load_resolved_klass_at_index(rcx, rdx, rcx);
++  __ push(rcx);  // save the contexts of klass for initializing the header <TODO:why? aarch64 hasn't jzy >
++
++  // make sure klass is initialized & doesn't have finalizer
++  // make sure klass is fully initialized
++  __ cmpb(Address(rcx, InstanceKlass::init_state_offset()), InstanceKlass::fully_initialized);
++  __ jcc(Assembler::notEqual, slow_case);
++
++  // get instance_size in InstanceKlass (scaled to a count of bytes)
++  __ ldw(rdx, Address(rcx, Klass::layout_helper_offset()));
++  // test to see if it has a finalizer or is malformed in some way
++  __ testw(rdx, Klass::_lh_instance_slow_path_bit);
++  __ jcc(Assembler::notZero, slow_case);
++
++  // Allocate the instance:
++  //  If TLAB is enabled:
++  //    Try to allocate in the TLAB.
++  //    If fails, go to the slow path.
++  //  Else If inline contiguous allocations are enabled:
++  //    Try to allocate in eden.
++  //    If fails due to heap end, go to slow path.
++  //
++  //  If TLAB is enabled OR inline contiguous is enabled:
++  //    Initialize the allocation.
++  //    Exit.
++  //
++  //  Go to slow path.
++
++  const bool allow_shared_alloc =
++    Universe::heap()->supports_inline_contig_alloc();
++
++  const Register thread = rthread;
++  if (UseTLAB) {
++    __ tlab_allocate(thread, rax, rdx, 0, rcx, rbx, slow_case);
++    if (ZeroTLAB) {
++      // the fields have been already cleared
++      __ jmp(initialize_header);
++    } else {
++      // initialize both the header and fields
++      __ jmp(initialize_object);
++    }
++  } else {
++    // Allocation in the shared Eden, if allowed.
++    //
++    // rdx: instance size in bytes
++    __ eden_allocate(thread, rax, rdx, 0, rbx, slow_case);
++  }
++
++  // If UseTLAB or allow_shared_alloc are true, the object is created above and
++  // there is an initialize need. Otherwise, skip and go to the slow path.
++  if (UseTLAB || allow_shared_alloc) {
++    // The object is initialized before the header.  If the object size is
++    // zero, go directly to the header initialization.
++    __ BIND(initialize_object);
++    __ decrementl(rdx, sizeof(oopDesc));
++    __ jcc(Assembler::zero, initialize_header, rdx);
++
++    // Initialize topmost object field, divide rdx by 8, check if odd and
++    // test if zero.
++    __ movw(rcx, R0);    // use zero reg to clear memory (shorter code)
++#ifdef ASSERT    
++    __ movl(rscratch1, rdx);
++#endif
++    __ srll(rdx, LogBytesPerLong, rdx); // divide by 2*oopSize and set result flag if odd
++
++    // rdx must have been multiple of 8
++#ifdef ASSERT
++    // make sure rdx was multiple of 8
++    Label L;
++    // Ignore partial flag stall after shrl() since it is debug VM
++    __ srll(rscratch1, LogBytesPerLong-1, rscratch1);
++    __ testptr(rscratch1, 0x1);// the least significant bit is zero?
++    __ jcc(Assembler::zero, L);
++    __ stop("object size is not multiple of 2 - adjust this code");
++    __ BIND(L);
++    // rdx must be > 0, no extra check needed here
++#endif
++
++    // initialize remaining object fields: rdx was a multiple of 8
++    { Label loop;
++    __ BIND(loop);  
++    __ stptr(rcx, Address(rax, rdx, Address::times_8, sizeof(oopDesc) - 1*oopSize));
++    __ decrementl(rdx);
++    __ jcc(Assembler::notZero, loop, rdx);
++    }
++
++    // initialize object header only.
++    __ BIND(initialize_header);
++    if (UseBiasedLocking) {
++      __ pop(rcx);   // get saved klass back in the register.
++      __ ldptr(rbx, Address(rcx, Klass::prototype_header_offset()));
++      __ stptr(rbx, Address(rax, oopDesc::mark_offset_in_bytes ()));
++    } else {
++      __ mov_immediate64(rscratch1, (intptr_t)markOopDesc::prototype());  
++      __ stptr(rscratch1, Address(rax, oopDesc::mark_offset_in_bytes ())); // header
++      __ pop(rcx);   // get saved klass back in the register.
++    }
++    __ store_klass_gap(rax, R0);
++    __ store_klass(rax, rcx);  // klass
++
++    {
++      SkipIfEqual skip_if(_masm, &DTraceAllocProbes, 0);
++      // Trigger dtrace event for fastpath
++      __ push(atos);
++      __ call_VM_leaf(
++           CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc), rax);
++      __ pop(atos);
++    }
++
++    __ jmp(done);
++  }
++
++  // slow case
++  __ BIND(slow_case);
++  __ pop(rcx);   // restore stack pointer to what it was when we came in.
++  __ BIND(slow_case_no_pop);
++  
++  Register rarg1 = c_rarg1;
++  Register rarg2 = c_rarg2;
++
++  __ get_constant_pool(rarg1);
++  __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
++  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::_new), rarg1, rarg2);
++   __ verify_oop(rax);
++
++  // continue
++  __ BIND(done);
++  if(UseWmemb)
++    __ wmemb();
++  else
++    __ memb();// add for sw64
++}
++
++void TemplateTable::newarray() {
++  transition(itos, atos);
++  Register rarg1 = c_rarg1;
++  Register rax   = V0;
++  __ load_unsigned_byte(rarg1, at_bcp(1));
++  //__ movws(rax, rax);
++  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::newarray),
++          rarg1, rax);
++  if(UseWmemb)
++    __ wmemb();
++  else
++    __ memb();// add for sw64
++}
++
++void TemplateTable::anewarray() {
++  transition(itos, atos);
++
++  Register rarg1 = c_rarg1;
++  Register rarg2 = c_rarg2;
++  Register rax   = V0;
++
++  __ get_unsigned_2_byte_index_at_bcp(rarg2, 1);
++  __ get_constant_pool(rarg1);
++  //__ movws(rax, rax);
++  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::anewarray),
++          rarg1, rarg2, rax);
++  if(UseWmemb)
++    __ wmemb();
++  else
++    __ memb();
++}
++
++void TemplateTable::arraylength() {
++  transition(atos, itos);
++  Register rax = V0;
++  __ null_check(rax, arrayOopDesc::length_offset_in_bytes());
++  __ ldws(rax, Address(rax, arrayOopDesc::length_offset_in_bytes()));
++}
++
++void TemplateTable::checkcast() {
++  transition(atos, atos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  Register rax = V0;
++  Register rcx = c_rarg4;
++  Register rdx = c_rarg3;
++  Register rbx = rmethod;
++  __ testptr(rax, rax); // object is in rax
++  __ jcc(Assembler::zero, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
++  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
++  // See if bytecode has already been quicked
++  __ cmpb(Address(rdx, rbx,
++                  Address::times_1,
++                  Array<u1>::base_offset_in_bytes()),
++          JVM_CONSTANT_Class);
++  __ jcc(Assembler::equal, quicked);
++  __ push(atos); // save receiver for result, and for GC
++  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++
++  // vm_result_2 has metadata result
++  __ get_vm_result_2(rax, rthread);
++
++  __ pop_ptr(rdx); // restore receiver
++  __ jmp(resolved);
++
++  // Get superklass in rax and subklass in rbx
++  __ bind(quicked);
++  __ movl(rdx, rax); // Save object in rdx; rax needed for subtype check
++  __ load_resolved_klass_at_index(rcx, rbx, rax);
++
++  __ bind(resolved);
++  __ load_klass(rbx, rdx);
++
++  // Generate subtype check.  Blows rcx, rdi.  Object in rdx.
++  // Superklass in rax.  Subklass in rbx.
++  __ gen_subtype_check(rbx, ok_is_subtype);
++
++  // Come here on failure
++  __ push_ptr(rdx);
++  // object is at TOS
++  __ jump(ExternalAddress(Interpreter::_throw_ClassCastException_entry));
++
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ movl(rax, rdx); // Restore object in rdx
++
++  // Collect counts on whether this check-cast sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ jmp(done);
++    __ bind(is_null);
++    __ profile_null_seen(rcx);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++}
++
++void TemplateTable::instanceof() {
++  transition(atos, itos);
++  Label done, is_null, ok_is_subtype, quicked, resolved;
++  Register rax = V0;
++  Register rcx = c_rarg4;
++  Register rdx = c_rarg3;
++  Register rbx = rmethod;
++  __ testptr(rax, rax);
++  __ jcc(Assembler::zero, is_null);
++
++  // Get cpool & tags index
++  __ get_cpool_and_tags(rcx, rdx); // rcx=cpool, rdx=tags array
++  __ get_unsigned_2_byte_index_at_bcp(rbx, 1); // rbx=index
++  // See if bytecode has already been quicked
++  __ cmpb(Address(rdx, rbx,
++                  Address::times_1,
++                  Array<u1>::base_offset_in_bytes()),
++          JVM_CONSTANT_Class);
++  __ jcc(Assembler::equal, quicked);    
++  
++  __ push(atos); // save receiver for result, and for GC
++  call_VM(noreg, CAST_FROM_FN_PTR(address, InterpreterRuntime::quicken_io_cc));
++  // vm_result_2 has metadata result
++
++  __ get_vm_result_2(rax, rthread);
++
++  __ pop_ptr(rdx); // restore receiver
++  __ verify_oop(rdx);
++  __ load_klass(rdx, rdx);
++  __ jmp(resolved);
++
++  // Get superklass in rax and subklass in rdx
++  __ bind(quicked);
++  __ load_klass(rdx, rax);
++  __ load_resolved_klass_at_index(rcx, rbx, rax);
++
++  __ bind(resolved);
++
++  // Generate subtype check.  Blows rcx, rdi
++  // Superklass in rax.  Subklass in rdx.
++  __ gen_subtype_check(rdx, ok_is_subtype);
++
++  // Come here on failure
++  __ movl(rax, R0);
++  __ jmp(done);
++  // Come here on success
++  __ bind(ok_is_subtype);
++  __ movw(rax, 1);
++
++  // Collect counts on whether this test sees NULLs a lot or not.
++  if (ProfileInterpreter) {
++    __ jmp(done);
++    __ bind(is_null);
++    __ profile_null_seen(rcx);
++  } else {
++    __ bind(is_null);   // same as 'done'
++  }
++  __ bind(done);
++  // rax = 0: obj == NULL or  obj is not an instanceof the specified klass
++  // rax = 1: obj != NULL and obj is     an instanceof the specified klass
++}
++
++
++//----------------------------------------------------------------------------------------------------
++// Breakpoints
++void TemplateTable::_breakpoint() {
++  // Note: We get here even if we are single stepping..
++  // jbug insists on setting breakpoints at every bytecode
++  // even if we are in single step mode.
++
++  transition(vtos, vtos);
++
++  Register rarg = c_rarg1;
++  Register rax  = V0;
++  
++  // get the unpatched byte code
++  __ get_method(rarg);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address,
++                              InterpreterRuntime::get_original_bytecode_at),
++             rarg, rbcp);
++  __ movl(rmethod, rax);  // why?
++
++  // post the breakpoint event
++  __ get_method(rarg);
++  __ call_VM(noreg,
++             CAST_FROM_FN_PTR(address, InterpreterRuntime::_breakpoint),
++             rarg, rbcp);
++  __ movl(rnext, rmethod);
++  // complete the execution of original bytecode
++  __ dispatch_only_normal(vtos);
++}
++
++//-----------------------------------------------------------------------------
++// Exceptions
++
++void TemplateTable::athrow() {
++  transition(atos, vtos);
++  const Register rax = FSR;
++  __ null_check(rax);
++  __ jump(ExternalAddress(Interpreter::throw_exception_entry()));
++}
++
++//-----------------------------------------------------------------------------
++// Synchronization
++//
++// Note: monitorenter & exit are symmetric routines; which is reflected
++//       in the assembly code structure as well
++//
++// Stack layout:
++//
++// [expressions  ] <--- esp               = expression stack top
++// ..
++// [expressions  ]
++// [monitor entry] <--- monitor block top = expression stack bot
++// ..
++// [monitor entry]
++// [frame data   ] <--- monitor block bot
++// ...
++// [saved rbp    ] <--- rbp
++void TemplateTable::monitorenter() {
++  transition(atos, vtos);
++  Register rax = FSR;
++  
++  // check for NULL object
++  __ null_check(rax);
++
++  const Address monitor_block_top(
++        rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  const Address monitor_block_bot(
++        rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++  Label allocated;
++
++  Register rtop = c_rarg3;
++  Register rbot = c_rarg2;
++  Register rmon = c_rarg1;
++
++  // initialize entry pointer
++  __ movl(rmon, R0); // points to free slot or NULL
++
++  // find a free slot in the monitor block (result in rmon)
++  {
++    Label entry, loop, exit;
++    __ ldptr(rtop, monitor_block_top); // points to current entry,
++                                        // starting with top-most entry
++    __ lea(rbot, monitor_block_bot);    // points to word before bottom
++                                        // of monitor block
++    __ jmp(entry);
++
++    __ bind(loop);
++    // check if current entry is used
++    __ cmpptr(Address(rtop, BasicObjectLock::obj_offset_in_bytes()), (int32_t) NULL_WORD);
++    // if not used then remember entry in rmon
++    __ cmove(Assembler::equal, rmon, rtop, rmon);   // cmov => cmovptr
++    // check if current entry is for same object
++    __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
++    // if same object then stop searching
++    __ jcc(Assembler::equal, exit);
++    // otherwise advance to next entry
++    __ addptr(rtop, entry_size, rtop);
++    __ bind(entry);
++    // check if bottom reached
++    __ cmpptr(rtop, rbot);
++    // if not at bottom then check this entry
++    __ jcc(Assembler::notEqual, loop);
++    __ bind(exit);
++  }
++
++  __ testptr(rmon, rmon); // check if a slot has been found
++  __ jcc(Assembler::notZero, allocated); // if found, continue with that one
++
++  // allocate one if there's no free slot
++  {
++    Label entry, loop;
++    // 1. compute new pointers          // rsp: old expression stack top
++    __ ldptr(rmon, monitor_block_bot); // rmon: old expression stack bottom
++    __ subptr(esp, entry_size, esp);         // move expression stack top
++    __ subptr(rmon, entry_size, rmon);        // move expression stack bottom
++    __ movl(rtop, esp);                  // set start value for copy loop
++    __ stptr(rmon, monitor_block_bot); // set new monitor block bottom
++    __ jmp(entry);
++
++    // 2. move expression stack contents
++    __ bind(loop);
++    __ ldptr(rbot, Address(rtop, entry_size)); // load expression stack
++                                                // word from old location
++    __ stptr(rbot, Address(rtop, 0));          // and store it at new location
++    __ addptr(rtop, wordSize, rtop);                  // advance to next word
++    __ bind(entry);
++    __ cmpptr(rtop, rmon);                      // check if bottom reached
++    __ jcc(Assembler::notEqual, loop);          // if not at bottom then
++                                                // copy next word
++  }
++
++  // call run-time routine
++  // rmon: points to monitor entry
++  __ bind(allocated);
++
++  // Increment bcp to point to the next bytecode, so exception
++  // handling for async. exceptions work correctly.
++  // The object has already been poped from the stack, so the
++  // expression stack looks correct.
++  __ incrementl(rbcp);
++
++  // store object
++  __ stptr(rax, Address(rmon, BasicObjectLock::obj_offset_in_bytes()));
++  __ lock_object(rmon);
++
++  // check to make sure this monitor doesn't cause stack overflow after locking
++  __ save_bcp();  // in case of exception
++  __ generate_stack_overflow_check(0);
++
++  // The bcp has already been incremented. Just need to dispatch to
++  // next instruction.
++  __ dispatch_next(vtos);
++}
++
++void TemplateTable::monitorexit() {
++  transition(atos, vtos);
++  Register rax = FSR;
++  
++  // check for NULL object
++  __ null_check(rax);
++
++  const Address monitor_block_top(
++        rfp, frame::interpreter_frame_monitor_block_top_offset * wordSize);
++  const Address monitor_block_bot(
++        rfp, frame::interpreter_frame_initial_sp_offset * wordSize);
++  const int entry_size = frame::interpreter_frame_monitor_size() * wordSize;
++
++  Register rtop = c_rarg1;
++  Register rbot = c_rarg2;
++
++  Label found;
++
++  // find matching slot
++  {
++    Label entry, loop;
++    __ ldptr(rtop, monitor_block_top); // points to current entry,
++                                        // starting with top-most entry
++    __ lea(rbot, monitor_block_bot);    // points to word before bottom
++                                        // of monitor block
++    __ jmp(entry);
++
++    __ bind(loop);
++    // check if current entry is for same object
++    __ cmpptr(rax, Address(rtop, BasicObjectLock::obj_offset_in_bytes()));
++    // if same object then stop searching
++    __ jcc(Assembler::equal, found);
++    // otherwise advance to next entry
++    __ addptr(rtop, entry_size, rtop);
++    __ bind(entry);
++    // check if bottom reached
++    __ cmpptr(rtop, rbot);
++    // if not at bottom then check this entry
++    __ jcc(Assembler::notEqual, loop);
++  }
++
++  // error handling. Unlocking was not block-structured
++  __ call_VM(noreg, CAST_FROM_FN_PTR(address,
++                   InterpreterRuntime::throw_illegal_monitor_state_exception));
++  __ should_not_reach_here("4101");
++
++  // call run-time routine
++  __ bind(found);
++  __ push_ptr(rax); // make sure object is on stack (contract with oopMaps)
++  __ unlock_object(rtop);
++  __ pop_ptr(rax); // discard object
++}
++
++// Wide instructions
++void TemplateTable::wide() { //__ stop("TODO:check function right:wide jzy");
++  transition(vtos, vtos);
++  __ load_unsigned_byte(rscratch1, at_bcp(1));
++   ExternalAddress wtable((address)Interpreter::_wentry_point);
++  __ jump(ArrayAddress(wtable, Address(noreg, rscratch1, Address::times_ptr)), rscratch2, rcc);
++  // Note: the rbcp increment step is part of the individual wide bytecode implementations
++}
++
++// Multi arrays
++void TemplateTable::multianewarray() {
++  transition(vtos, atos);
++  
++  Register rax = FSR;
++  const Register rbx = rscratch1;
++  Register rarg = c_rarg1;
++  
++  __ load_unsigned_byte(rax, at_bcp(3)); // get number of dimensions
++  // last dim is on top of stack; we want address of first one:
++  // first_addr = last_addr + (ndims - 1) * stackElementSize - 1*wordsize
++  // the latter wordSize to point to the beginning of the array.
++  __ lea(rarg, Address(esp, rax, Interpreter::stackElementScale(), -wordSize));
++  call_VM(rax, CAST_FROM_FN_PTR(address, InterpreterRuntime::multianewarray), rarg);
++  __ load_unsigned_byte(rbx, at_bcp(3));
++  __ lea(esp, Address(esp, rbx, Interpreter::stackElementScale()));  // get rid of counts
++  __ memb();// add for sw64
++}
+diff --git a/src/hotspot/cpu/sw64/templateTable_sw64.hpp b/src/hotspot/cpu/sw64/templateTable_sw64.hpp
+new file mode 100644
+index 0000000000..fe443f8e1b
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/templateTable_sw64.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_TEMPLATETABLE_SW64_64_HPP
++#define CPU_SW64_VM_TEMPLATETABLE_SW64_64_HPP
++
++static void prepare_invoke(int byte_no,
++                             Register method,         // linked method (or i-klass)
++                             Register index = noreg,  // itable index, MethodType, etc.
++                             Register recv  = noreg,  // if caller wants to see it
++                             Register flags = noreg   // if caller wants to test it
++                             );
++  static void invokevirtual_helper(Register index, Register recv,
++                                   Register flags);
++  static void volatile_barrier();
++  
++  // Helpers
++  static void index_check(Register array, Register index);
++  static void index_check_without_pop(Register array, Register index);
++
++#endif // CPU_SW64_VM_TEMPLATETABLE_SW64_64_HPP
+diff --git a/src/hotspot/cpu/sw64/vmStructs_sw64.hpp b/src/hotspot/cpu/sw64/vmStructs_sw64.hpp
+new file mode 100644
+index 0000000000..8eb2c2a4e8
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vmStructs_sw64.hpp
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_VMSTRUCTS_SW64_HPP
++#define CPU_SW64_VM_VMSTRUCTS_SW64_HPP
++
++// These are the CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++  volatile_nonstatic_field(JavaFrameAnchor, _last_Java_fp, intptr_t*)
++
++#define VM_TYPES_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type)
++
++#define VM_INT_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // CPU_SW64_VM_VMSTRUCTS_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/vm_version_ext_sw64.cpp b/src/hotspot/cpu/sw64/vm_version_ext_sw64.cpp
+new file mode 100644
+index 0000000000..c4a7086860
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vm_version_ext_sw64.cpp
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "memory/allocation.hpp"
++#include "memory/allocation.inline.hpp"
++#include "runtime/os.inline.hpp"
++#include "vm_version_ext_sw64.hpp"
++
++// VM_Version_Ext statics
++int VM_Version_Ext::_no_of_threads = 0;
++int VM_Version_Ext::_no_of_cores = 0;
++int VM_Version_Ext::_no_of_sockets = 0;
++bool VM_Version_Ext::_initialized = false;
++char VM_Version_Ext::_cpu_name[CPU_TYPE_DESC_BUF_SIZE] = {0};
++char VM_Version_Ext::_cpu_desc[CPU_DETAILED_DESC_BUF_SIZE] = {0};
++
++void VM_Version_Ext::initialize_cpu_information(void) {
++  // do nothing if cpu info has been initialized
++  if (_initialized) {
++    return;
++  }
++
++  int core_id = -1;
++  int chip_id = -1;
++  int len = 0;
++  char* src_string = NULL;
++
++  _no_of_cores  = os::processor_count();
++  _no_of_threads = _no_of_cores;
++  _no_of_sockets = _no_of_cores;
++  snprintf(_cpu_name, CPU_TYPE_DESC_BUF_SIZE - 1, "Sw64");
++  snprintf(_cpu_desc, CPU_DETAILED_DESC_BUF_SIZE, "Sw64 %s", _features_string);
++  _initialized = true;
++}
++
++int VM_Version_Ext::number_of_threads(void) {
++  initialize_cpu_information();
++  return _no_of_threads;
++}
++
++int VM_Version_Ext::number_of_cores(void) {
++  initialize_cpu_information();
++  return _no_of_cores;
++}
++
++int VM_Version_Ext::number_of_sockets(void) {
++  initialize_cpu_information();
++  return _no_of_sockets;
++}
++
++const char* VM_Version_Ext::cpu_name(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_TYPE_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_name, CPU_TYPE_DESC_BUF_SIZE);
++  return tmp;
++}
++
++const char* VM_Version_Ext::cpu_description(void) {
++  initialize_cpu_information();
++  char* tmp = NEW_C_HEAP_ARRAY_RETURN_NULL(char, CPU_DETAILED_DESC_BUF_SIZE, mtTracing);
++  if (NULL == tmp) {
++    return NULL;
++  }
++  strncpy(tmp, _cpu_desc, CPU_DETAILED_DESC_BUF_SIZE);
++  return tmp;
++}
+diff --git a/src/hotspot/cpu/sw64/vm_version_ext_sw64.hpp b/src/hotspot/cpu/sw64/vm_version_ext_sw64.hpp
+new file mode 100644
+index 0000000000..16ae7063e3
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vm_version_ext_sw64.hpp
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2016, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_VM_VERSION_EXT_SW64_HPP
++#define CPU_SW64_VM_VM_VERSION_EXT_SW64_HPP
++
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++class VM_Version_Ext : public VM_Version {
++ private:
++  static const size_t      CPU_TYPE_DESC_BUF_SIZE = 256;
++  static const size_t      CPU_DETAILED_DESC_BUF_SIZE = 4096;
++
++  static int               _no_of_threads;
++  static int               _no_of_cores;
++  static int               _no_of_sockets;
++  static bool              _initialized;
++  static char              _cpu_name[CPU_TYPE_DESC_BUF_SIZE];
++  static char              _cpu_desc[CPU_DETAILED_DESC_BUF_SIZE];
++
++ public:
++  static int number_of_threads(void);
++  static int number_of_cores(void);
++  static int number_of_sockets(void);
++
++  static const char* cpu_name(void);
++  static const char* cpu_description(void);
++  static void initialize_cpu_information(void);
++
++};
++
++#endif // CPU_SW64_VM_VM_VERSION_EXT_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/vm_version_sw64.cpp b/src/hotspot/cpu/sw64/vm_version_sw64.cpp
+new file mode 100644
+index 0000000000..fb5077d55d
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vm_version_sw64.cpp
+@@ -0,0 +1,568 @@
++/*
++ * Copyright (c) 1997, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "memory/resourceArea.hpp"
++#include "runtime/java.hpp"
++#include "runtime/stubCodeGenerator.hpp"
++#include "runtime/vm_version.hpp"
++#include "utilities/macros.hpp"
++
++#include OS_HEADER_INLINE(os)
++
++//ZHJ #include <sys/auxv.h>
++//ZHJ #include <asm/hwcap.h>
++
++#ifndef HWCAP_AES
++#define HWCAP_AES   (1<<3)
++#endif
++
++#ifndef HWCAP_PMULL
++#define HWCAP_PMULL (1<<4)
++#endif
++
++#ifndef HWCAP_SHA1
++#define HWCAP_SHA1  (1<<5)
++#endif
++
++#ifndef HWCAP_SHA2
++#define HWCAP_SHA2  (1<<6)
++#endif
++
++#ifndef HWCAP_CRC32
++#define HWCAP_CRC32 (1<<7)
++#endif
++
++#ifndef HWCAP_ATOMICS
++#define HWCAP_ATOMICS (1<<8)
++#endif
++
++#ifndef HWCAP_ASIMD
++#define HWCAP_ASIMD (1<<9)
++#endif
++
++////int VM_Version::_cpu;
++////int VM_Version::_model;
++////int VM_Version::_model2;
++////int VM_Version::_variant;
++////int VM_Version::_revision;
++////int VM_Version::_stepping;
++////VM_Version::PsrInfo VM_Version::_psr_info   = { 0, };
++int VM_Version::_features = VM_Version::unknown_m;
++const char* VM_Version::_features_str = "";
++
++////static BufferBlob* stub_blob;
++////static const int stub_size = 550;
++
++////extern "C" {
++////  typedef void (*getPsrInfo_stub_t)(void*);
++////}
++////static getPsrInfo_stub_t getPsrInfo_stub = NULL;
++
++
++////class VM_Version_StubGenerator: public StubCodeGenerator {
++//// public:
++////
++////  VM_Version_StubGenerator(CodeBuffer *c) : StubCodeGenerator(c) {}
++////
++////  address generate_getPsrInfo() {
++////    StubCodeMark mark(this, "VM_Version", "getPsrInfo_stub");
++////#   define __ _masm->
++////    address start = __ pc();
++////
++////    // void getPsrInfo(VM_Version::PsrInfo* psr_info);
++////
++////    address entry = __ pc();
++////
++////    __ enter();
++////
++////    __ get_dczid_el0(rscratch1);
++////    __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::dczid_el0_offset())));
++////
++////    __ get_ctr_el0(rscratch1);
++////    __ strw(rscratch1, Address(c_rarg0, in_bytes(VM_Version::ctr_el0_offset())));
++////
++////    __ leave();
++////    __ ret(lr);
++////
++////#   undef __
++////
++////    return start;
++////  }
++////};
++
++
++////void VM_Version::get_processor_features() {
++////  _supports_cx8 = true;
++////  _supports_atomic_getset4 = true;
++////  _supports_atomic_getadd4 = true;
++////  _supports_atomic_getset8 = true;
++////  _supports_atomic_getadd8 = true;
++////
++////  getPsrInfo_stub(&_psr_info);
++////
++////  int dcache_line = VM_Version::dcache_line_size();
++////
++////  // Limit AllocatePrefetchDistance so that it does not exceed the
++////  // constraint in AllocatePrefetchDistanceConstraintFunc.
++////  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance))
++////    FLAG_SET_DEFAULT(AllocatePrefetchDistance, MIN2(512, 3*dcache_line));
++////
++////  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize))
++////    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, dcache_line);
++////  if (FLAG_IS_DEFAULT(PrefetchScanIntervalInBytes))
++////    FLAG_SET_DEFAULT(PrefetchScanIntervalInBytes, 3*dcache_line);
++////  if (FLAG_IS_DEFAULT(PrefetchCopyIntervalInBytes))
++////    FLAG_SET_DEFAULT(PrefetchCopyIntervalInBytes, 3*dcache_line);
++////  if (FLAG_IS_DEFAULT(SoftwarePrefetchHintDistance))
++////    FLAG_SET_DEFAULT(SoftwarePrefetchHintDistance, 3*dcache_line);
++////
++////  if (PrefetchCopyIntervalInBytes != -1 &&
++////       ((PrefetchCopyIntervalInBytes & 7) || (PrefetchCopyIntervalInBytes >= 32768))) {
++////    warning("PrefetchCopyIntervalInBytes must be -1, or a multiple of 8 and < 32768");
++////    PrefetchCopyIntervalInBytes &= ~7;
++////    if (PrefetchCopyIntervalInBytes >= 32768)
++////      PrefetchCopyIntervalInBytes = 32760;
++////  }
++////
++////  if (AllocatePrefetchDistance !=-1 && (AllocatePrefetchDistance & 7)) {
++////    warning("AllocatePrefetchDistance must be multiple of 8");
++////    AllocatePrefetchDistance &= ~7;
++////  }
++////
++////  if (AllocatePrefetchStepSize & 7) {
++////    warning("AllocatePrefetchStepSize must be multiple of 8");
++////    AllocatePrefetchStepSize &= ~7;
++////  }
++////
++////  if (SoftwarePrefetchHintDistance != -1 &&
++////       (SoftwarePrefetchHintDistance & 7)) {
++////    warning("SoftwarePrefetchHintDistance must be -1, or a multiple of 8");
++////    SoftwarePrefetchHintDistance &= ~7;
++////  }
++////
++////  unsigned long auxv = 0;  //ZHJ getauxval(AT_HWCAP);
++////
++////  char buf[512];
++////
++////  _features = auxv;
++////
++////  int cpu_lines = 0;
++////  if (FILE *f = fopen("/proc/cpuinfo", "r")) {
++////    char buf[128], *p;
++////    while (fgets(buf, sizeof (buf), f) != NULL) {
++////      if ((p = strchr(buf, ':')) != NULL) {
++////        long v = strtol(p+1, NULL, 0);
++////        if (strncmp(buf, "CPU implementer", sizeof "CPU implementer" - 1) == 0) {
++////          _cpu = v;
++////          cpu_lines++;
++////        } else if (strncmp(buf, "CPU variant", sizeof "CPU variant" - 1) == 0) {
++////          _variant = v;
++////        } else if (strncmp(buf, "CPU part", sizeof "CPU part" - 1) == 0) {
++////          if (_model != v)  _model2 = _model;
++////          _model = v;
++////        } else if (strncmp(buf, "CPU revision", sizeof "CPU revision" - 1) == 0) {
++////          _revision = v;
++////        }
++////      }
++////    }
++////    fclose(f);
++////  }
++////
++////  // Enable vendor specific features
++////
++////  // ThunderX
++////  if (_cpu == CPU_CAVIUM && (_model == 0xA1)) {
++////    if (_variant == 0) _features |= CPU_DMB_ATOMICS;
++////    if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
++////      FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
++////    }
++////    if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) {
++////      FLAG_SET_DEFAULT(UseSIMDForMemoryOps, (_variant > 0));
++////    }
++////    if (FLAG_IS_DEFAULT(UseSIMDForArrayEquals)) {
++////      FLAG_SET_DEFAULT(UseSIMDForArrayEquals, false);
++////    }
++////  }
++////
++////  // ThunderX2
++////  if ((_cpu == CPU_CAVIUM && (_model == 0xAF)) ||
++////      (_cpu == CPU_BROADCOM && (_model == 0x516))) {
++////    if (FLAG_IS_DEFAULT(AvoidUnalignedAccesses)) {
++////      FLAG_SET_DEFAULT(AvoidUnalignedAccesses, true);
++////    }
++////    if (FLAG_IS_DEFAULT(UseSIMDForMemoryOps)) {
++////      FLAG_SET_DEFAULT(UseSIMDForMemoryOps, true);
++////    }
++////    if (FLAG_IS_DEFAULT(UseFPUForSpilling)) {
++////      FLAG_SET_DEFAULT(UseFPUForSpilling, true);
++////    }
++////  }
++////
++////  // Cortex A53
++////  if (_cpu == CPU_ARM && (_model == 0xd03 || _model2 == 0xd03)) {
++////    _features |= CPU_A53MAC;
++////    if (FLAG_IS_DEFAULT(UseSIMDForArrayEquals)) {
++////      FLAG_SET_DEFAULT(UseSIMDForArrayEquals, false);
++////    }
++////  }
++////
++////  // Cortex A73
++////  if (_cpu == CPU_ARM && (_model == 0xd09 || _model2 == 0xd09)) {
++////    if (FLAG_IS_DEFAULT(SoftwarePrefetchHintDistance)) {
++////      FLAG_SET_DEFAULT(SoftwarePrefetchHintDistance, -1);
++////    }
++////    // A73 is faster with short-and-easy-for-speculative-execution-loop
++////    if (FLAG_IS_DEFAULT(UseSimpleArrayEquals)) {
++////      FLAG_SET_DEFAULT(UseSimpleArrayEquals, true);
++////    }
++////  }
++////
++////  if (_cpu == CPU_ARM && (_model == 0xd07 || _model2 == 0xd07)) _features |= CPU_STXR_PREFETCH;
++////  // If an olde style /proc/cpuinfo (cpu_lines == 1) then if _model is an A57 (0xd07)
++////  // we assume the worst and assume we could be on a big little system and have
++////  // undisclosed A53 cores which we could be swapped to at any stage
++////  if (_cpu == CPU_ARM && cpu_lines == 1 && _model == 0xd07) _features |= CPU_A53MAC;
++////
++////  sprintf(buf, "0x%02x:0x%x:0x%03x:%d", _cpu, _variant, _model, _revision);
++////  if (_model2) sprintf(buf+strlen(buf), "(0x%03x)", _model2);
++////  if (auxv & HWCAP_ASIMD) strcat(buf, ", simd");
++////  if (auxv & HWCAP_CRC32) strcat(buf, ", crc");
++////  if (auxv & HWCAP_AES)   strcat(buf, ", aes");
++////  if (auxv & HWCAP_SHA1)  strcat(buf, ", sha1");
++////  if (auxv & HWCAP_SHA2)  strcat(buf, ", sha256");
++////  if (auxv & HWCAP_ATOMICS) strcat(buf, ", lse");
++////
++////  _features_string = os::strdup(buf);
++////
++////  if (FLAG_IS_DEFAULT(UseCRC32)) {
++////    UseCRC32 = (auxv & HWCAP_CRC32) != 0;
++////  }
++////
++////  if (UseCRC32 && (auxv & HWCAP_CRC32) == 0) {
++////    warning("UseCRC32 specified, but not supported on this CPU");
++////    FLAG_SET_DEFAULT(UseCRC32, false);
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseAdler32Intrinsics)) {
++////    FLAG_SET_DEFAULT(UseAdler32Intrinsics, true);
++////  }
++////
++////  if (UseVectorizedMismatchIntrinsic) {
++////    warning("UseVectorizedMismatchIntrinsic specified, but not available on this CPU.");
++////    FLAG_SET_DEFAULT(UseVectorizedMismatchIntrinsic, false);
++////  }
++////
++////  if (auxv & HWCAP_ATOMICS) {
++////    if (FLAG_IS_DEFAULT(UseLSE))
++////      FLAG_SET_DEFAULT(UseLSE, true);
++////  } else {
++////    if (UseLSE) {
++////      warning("UseLSE specified, but not supported on this CPU");
++////      FLAG_SET_DEFAULT(UseLSE, false);
++////    }
++////  }
++////
++////  if (auxv & HWCAP_AES) {
++////    UseAES = UseAES || FLAG_IS_DEFAULT(UseAES);
++////    UseAESIntrinsics =
++////        UseAESIntrinsics || (UseAES && FLAG_IS_DEFAULT(UseAESIntrinsics));
++////    if (UseAESIntrinsics && !UseAES) {
++////      warning("UseAESIntrinsics enabled, but UseAES not, enabling");
++////      UseAES = true;
++////    }
++////  } else {
++////    if (UseAES) {
++////      warning("UseAES specified, but not supported on this CPU");
++////      FLAG_SET_DEFAULT(UseAES, false);
++////    }
++////    if (UseAESIntrinsics) {
++////      warning("UseAESIntrinsics specified, but not supported on this CPU");
++////      FLAG_SET_DEFAULT(UseAESIntrinsics, false);
++////    }
++////  }
++////
++////  if (UseAESCTRIntrinsics) {
++////    warning("AES/CTR intrinsics are not available on this CPU");
++////    FLAG_SET_DEFAULT(UseAESCTRIntrinsics, false);
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseCRC32Intrinsics)) {
++////    UseCRC32Intrinsics = true;
++////  }
++////
++////  if (auxv & HWCAP_CRC32) {
++////    if (FLAG_IS_DEFAULT(UseCRC32CIntrinsics)) {
++////      FLAG_SET_DEFAULT(UseCRC32CIntrinsics, true);
++////    }
++////  } else if (UseCRC32CIntrinsics) {
++////    warning("CRC32C is not available on the CPU");
++////    FLAG_SET_DEFAULT(UseCRC32CIntrinsics, false);
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseFMA)) {
++////    FLAG_SET_DEFAULT(UseFMA, true);
++////  }
++////
++////  if (auxv & (HWCAP_SHA1 | HWCAP_SHA2)) {
++////    if (FLAG_IS_DEFAULT(UseSHA)) {
++////      FLAG_SET_DEFAULT(UseSHA, true);
++////    }
++////  } else if (UseSHA) {
++////    warning("SHA instructions are not available on this CPU");
++////    FLAG_SET_DEFAULT(UseSHA, false);
++////  }
++////
++////  if (UseSHA && (auxv & HWCAP_SHA1)) {
++////    if (FLAG_IS_DEFAULT(UseSHA1Intrinsics)) {
++////      FLAG_SET_DEFAULT(UseSHA1Intrinsics, true);
++////    }
++////  } else if (UseSHA1Intrinsics) {
++////    warning("Intrinsics for SHA-1 crypto hash functions not available on this CPU.");
++////    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++////  }
++////
++////  if (UseSHA && (auxv & HWCAP_SHA2)) {
++////    if (FLAG_IS_DEFAULT(UseSHA256Intrinsics)) {
++////      FLAG_SET_DEFAULT(UseSHA256Intrinsics, true);
++////    }
++////  } else if (UseSHA256Intrinsics) {
++////    warning("Intrinsics for SHA-224 and SHA-256 crypto hash functions not available on this CPU.");
++////    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++////  }
++////
++////  if (UseSHA512Intrinsics) {
++////    warning("Intrinsics for SHA-384 and SHA-512 crypto hash functions not available on this CPU.");
++////    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++////  }
++////
++////  if (!(UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics)) {
++////    FLAG_SET_DEFAULT(UseSHA, false);
++////  }
++////
++////  if (auxv & HWCAP_PMULL) {
++////    if (FLAG_IS_DEFAULT(UseGHASHIntrinsics)) {
++////      FLAG_SET_DEFAULT(UseGHASHIntrinsics, true);
++////    }
++////  } else if (UseGHASHIntrinsics) {
++////    warning("GHASH intrinsics are not available on this CPU");
++////    FLAG_SET_DEFAULT(UseGHASHIntrinsics, false);
++////  }
++////
++////  if (is_zva_enabled()) {
++////    if (FLAG_IS_DEFAULT(UseBlockZeroing)) {
++////      FLAG_SET_DEFAULT(UseBlockZeroing, true);
++////    }
++////    if (FLAG_IS_DEFAULT(BlockZeroingLowLimit)) {
++////      FLAG_SET_DEFAULT(BlockZeroingLowLimit, 4 * VM_Version::zva_length());
++////    }
++////  } else if (UseBlockZeroing) {
++////    warning("DC ZVA is not available on this CPU");
++////    FLAG_SET_DEFAULT(UseBlockZeroing, false);
++////  }
++////
++////  // This machine allows unaligned memory accesses
++////  if (FLAG_IS_DEFAULT(UseUnalignedAccesses)) {
++////    FLAG_SET_DEFAULT(UseUnalignedAccesses, true);
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseMultiplyToLenIntrinsic)) {
++////    UseMultiplyToLenIntrinsic = true;
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseSquareToLenIntrinsic)) {
++////    UseSquareToLenIntrinsic = true;
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseMulAddIntrinsic)) {
++////    UseMulAddIntrinsic = true;
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseBarriersForVolatile)) {
++////    UseBarriersForVolatile = (_features & CPU_DMB_ATOMICS) != 0;
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
++////    UsePopCountInstruction = true;
++////  }
++////
++////  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++////    UseMontgomeryMultiplyIntrinsic = true;
++////  }
++////  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++////    UseMontgomerySquareIntrinsic = true;
++////  }
++////
++////#ifdef COMPILER2
++////  if (FLAG_IS_DEFAULT(OptoScheduling)) {
++////    OptoScheduling = true;
++////  }
++////#endif
++////}
++
++int VM_Version::determine_features() {
++  //////////////////////add some other feature here//////////////////
++  int features = platform_features(unknown_m);
++  //spt_16k_page_m;
++  return features;
++}
++
++void VM_Version::initialize() {
++
++  _features = determine_features();
++  //no need, Abstract_VM_Version already define it as false
++  _supports_cx8 = true;
++
++  //////////////////////add some other feature here//////////////////
++  
++  if (UseG1GC && FLAG_IS_DEFAULT(MaxGCPauseMillis)) {
++    FLAG_SET_DEFAULT(MaxGCPauseMillis, 650*8);
++  }
++  
++  if (UseG1GC && FLAG_IS_DEFAULT(GCPauseIntervalMillis)) {
++    FLAG_SET_DEFAULT(GCPauseIntervalMillis, MaxGCPauseMillis + 1);
++  }
++
++#ifdef COMPILER2
++  if (MaxVectorSize > 0) {
++    if (!is_power_of_2(MaxVectorSize)) {
++      warning("MaxVectorSize must be a power of 2");
++      MaxVectorSize = 8;
++    }
++  }
++  // Vector optimization was closed by default.
++  if (FLAG_IS_DEFAULT(MaxVectorSize)) {
++    MaxVectorSize = 0;
++  }
++
++  // Use ctlz/cttz/ctpop instructions if available.
++  if (is_shenwei()) {
++    if (FLAG_IS_DEFAULT(UseCountLeadingZerosInstruction)) {
++      FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, 1);
++    }
++    if (FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction)) {
++      FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, 1);
++    }
++    if (FLAG_IS_DEFAULT(UsePopCountInstruction)) {
++      FLAG_SET_DEFAULT(UsePopCountInstruction, 1);
++    }
++    if (is_sw6b() && FLAG_IS_DEFAULT(UseSW6B)) {
++      //FLAG_SET_DEFAULT(UseSW6B, 1);
++    }
++    if (is_sw8a() && FLAG_IS_DEFAULT(UseSW8A)) {
++      FLAG_SET_DEFAULT(UseSW8A, 1);
++      FLAG_SET_DEFAULT(FRegisterConflict, 0);
++      FLAG_SET_DEFAULT(UseWmemb, 1);
++      FLAG_SET_DEFAULT(UseAddpi, 0);
++    }
++  } else if (UseCountLeadingZerosInstruction || UseCountTrailingZerosInstruction
++          || UsePopCountInstruction) {
++    if (!FLAG_IS_DEFAULT(UseCountTrailingZerosInstruction))
++      warning("Only SW CPUs support UseCountTrailingZerosInstruction");
++    FLAG_SET_DEFAULT(UseCountLeadingZerosInstruction, 0);
++    FLAG_SET_DEFAULT(UseCountTrailingZerosInstruction, 0);
++    FLAG_SET_DEFAULT(UsePopCountInstruction, 0);
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomeryMultiplyIntrinsic)) {
++    UseMontgomeryMultiplyIntrinsic = true;
++  }
++  if (FLAG_IS_DEFAULT(UseMontgomerySquareIntrinsic)) {
++    UseMontgomerySquareIntrinsic = true;
++  }
++#endif
++
++  UseSSE = 0; // Only on x86 and x64
++
++  if (TieredCompilation) {
++    if (!FLAG_IS_DEFAULT(TieredCompilation))
++      warning("TieredCompilation not supported");
++    FLAG_SET_DEFAULT(TieredCompilation, false);
++  }
++//  if (UseCRC32Intrinsics) {
++//    if (!FLAG_IS_DEFAULT(UseCRC32Intrinsics))
++//      warning("CRC32 intrinsics  are not available on this CPU");
++//    FLAG_SET_DEFAULT(UseCRC32Intrinsics, false);
++//  }
++  char buf[512];
++  jio_snprintf(buf, sizeof(buf), "%s%s%s%s%s%s%s%s",
++              (has_l2_cache() ? ", has_l2_cache" : ""),
++              (has_16k_page() ? ", has_16k_page" : ""),
++              (is_shenwei()   ? ", on_shenwei_platform"  : ""),
++              (is_sw2f() ? ", SW410(2F)" : ""),
++              (is_sw4a() ? ", SW411(4A)" : "" ),
++              (is_sw6a() ? ", SW421(6A)" : ""),
++              (is_sw6b() ? ", SW422(6B)" : ""),
++              (is_sw1621() ? ", SW1621" : ""),
++              (is_sw8a() ? ", SW8A" : ""),
++              (UseCountTrailingZerosInstruction ? ", UseCountTrailingZerosInstruction" : ""));
++
++  // buf is started with ", " or is empty
++  _features_str = strdup(strlen(buf) > 2 ? buf + 2 : buf);
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStyle)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStyle, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchLines, 1);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchStepSize)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchStepSize, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocatePrefetchDistance)) {
++    FLAG_SET_DEFAULT(AllocatePrefetchDistance, 64);
++  }
++
++  if (FLAG_IS_DEFAULT(AllocateInstancePrefetchLines)) {
++    FLAG_SET_DEFAULT(AllocateInstancePrefetchLines, 1);
++  }
++
++  if (UseSHA) {
++    warning("SHA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA, false);
++  }
++
++  if (UseFMA) {
++    warning("FMA instructions are not available on this CPU");
++    FLAG_SET_DEFAULT(UseFMA, false);
++  }
++
++  if (UseSHA1Intrinsics || UseSHA256Intrinsics || UseSHA512Intrinsics) {
++    warning("SHA intrinsics are not available on this CPU");
++    FLAG_SET_DEFAULT(UseSHA1Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA256Intrinsics, false);
++    FLAG_SET_DEFAULT(UseSHA512Intrinsics, false);
++  }
++
++  NOT_PRODUCT( if (PrintMiscellaneous && Verbose) print_features(); );
++}
++
++void VM_Version::print_features() {
++  tty->print_cr("Version:%s", cpu_features());
++}
+diff --git a/src/hotspot/cpu/sw64/vm_version_sw64.hpp b/src/hotspot/cpu/sw64/vm_version_sw64.hpp
+new file mode 100644
+index 0000000000..e0f361bdfb
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vm_version_sw64.hpp
+@@ -0,0 +1,167 @@
++/*
++ * Copyright (c) 1997, 2021, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_VM_VERSION_SW64_HPP
++#define CPU_SW64_VM_VM_VERSION_SW64_HPP
++
++#include "runtime/abstract_vm_version.hpp"
++#include "runtime/globals_extension.hpp"
++#include "utilities/sizes.hpp"
++
++class VM_Version : public Abstract_VM_Version {
++////  friend class JVMCIVMStructs;
++
++protected:
++////  static int _cpu;
++////  static int _model;
++////  static int _model2;
++////  static int _variant;
++////  static int _revision;
++////  static int _stepping;
++////
++////  struct PsrInfo {
++////    uint32_t dczid_el0;
++////    uint32_t ctr_el0;
++////  };
++////  static PsrInfo _psr_info;
++  static int  _features;
++  static const char* _features_str;
++////  static void get_processor_features();
++  static void print_features();
++  static int  determine_features();
++  static int  platform_features(int features);
++
++public:
++  // Initialization
++  static void initialize();
++
++////  // Asserts
++////  static void assert_is_initialized() {
++////  }
++  
++  static bool has_l2_cache() { return _features & with_l2_cache_m; }
++  static bool has_16k_page() { return _features & spt_16k_page_m; }
++  static bool is_sw2f()      { return _features & sw2f_m; }
++  static bool is_sw4a()      { return _features & sw4a_m; }
++  static bool is_sw6a()      { return _features & sw6a_m; }
++  static bool is_sw6b()      { return _features & sw6b_m; }
++  static bool is_sw8a()      { return _features & wx_h8000_m; }//TODO UseSW8A
++  static bool is_sw1621()    { return _features & sw1621_m; }
++  static bool is_sw3231()    { return _features & sw3231_m; }
++  static bool is_shenwei()   { return _features & with_sw_support_m; }
++//  static bool sw2only()      { return is_sw2f() || is_sw4a() || is_sw6a(); }
++  static bool sw2only()      { return true; }
++  static bool sw3only()      { return is_sw6b(); }
++  static bool sw4only()      { return is_sw8a(); }
++  static const char* cpu_features() { return _features_str; }
++  
++////  static bool expensive_load(int ld_size, int scale) {
++////    if (cpu_family() == CPU_ARM) {
++////      // Half-word load with index shift by 1 (aka scale is 2) has
++////      // extra cycle latency, e.g. ldrsh w0, [x1,w2,sxtw #1].
++////      if (ld_size == 2 && scale == 2) {
++////        return true;
++////      }
++////    }
++////    return false;
++////  }
++
++  enum Family {
++    CPU_ARM       = 'A',
++    CPU_BROADCOM  = 'B',
++    CPU_CAVIUM    = 'C',
++    CPU_DEC       = 'D',
++    CPU_INFINEON  = 'I',
++    CPU_MOTOROLA  = 'M',
++    CPU_NVIDIA    = 'N',
++    CPU_AMCC      = 'P',
++    CPU_QUALCOM   = 'Q',
++    CPU_MARVELL   = 'V',
++    CPU_INTEL     = 'i',
++  };
++
++  enum Feature_Flag {
++    with_l2_cache   = 0,
++    spt_16k_page    = 1,
++    sw2f            = 2,
++    sw4a            = 3,
++    sw6a            = 4,
++    sw6b            = 5,
++    sw1621          = 6,
++    sw3231          = 7,
++    wx_h8000        = 8,
++    with_sw_support = 9,
++//    CPU_FP           = (1<<0),
++//    CPU_ASIMD        = (1<<1),
++//    CPU_EVTSTRM      = (1<<2),
++//    CPU_AES          = (1<<3),
++//    CPU_PMULL        = (1<<4),
++//    CPU_SHA1         = (1<<5),
++//    CPU_SHA2         = (1<<6),
++//    CPU_CRC32        = (1<<7),
++//    CPU_LSE          = (1<<8),
++//    CPU_STXR_PREFETCH= (1 << 29),
++//    CPU_A53MAC       = (1 << 30),
++//    CPU_DMB_ATOMICS  = (1 << 31),
++  };
++  
++  enum Feature_Flag_Set {
++    unknown_m         = 0,
++    all_features_m    = -1,
++    with_l2_cache_m   = 1 << with_l2_cache,
++    spt_16k_page_m    = 1 << spt_16k_page,
++    sw2f_m            = 1 << sw2f,
++    sw4a_m            = 1 << sw4a,
++    sw6a_m            = 1 << sw6a,
++    sw6b_m            = 1 << sw6b,
++    sw1621_m          = 1 << sw1621,
++    sw3231_m          = 1 << sw3231,
++    wx_h8000_m          = 1 << wx_h8000,
++    with_sw_support_m = 1 << with_sw_support,
++
++    //////////////////////add some other feature here//////////////////
++  };
++  
++////  static int cpu_family()                     { return _cpu; }
++////  static int cpu_model()                      { return _model; }
++////  static int cpu_model2()                     { return _model2; }
++////  static int cpu_variant()                    { return _variant; }
++////  static int cpu_revision()                   { return _revision; }
++////  static ByteSize dczid_el0_offset() { return byte_offset_of(PsrInfo, dczid_el0); }
++////  static ByteSize ctr_el0_offset()   { return byte_offset_of(PsrInfo, ctr_el0); }
++////  static bool is_zva_enabled() {
++////    // Check the DZP bit (bit 4) of dczid_el0 is zero
++////    // and block size (bit 0~3) is not zero.
++////    return ((_psr_info.dczid_el0 & 0x10) == 0 &&
++////            (_psr_info.dczid_el0 & 0xf) != 0);
++////  }
++////  static int icache_line_size() {
++////    return (1 << (_psr_info.ctr_el0 & 0x0f)) * 4;
++////  }
++////  static int dcache_line_size() {
++////    return (1 << ((_psr_info.ctr_el0 >> 16) & 0x0f)) * 4;
++////  }
++};
++
++#endif // CPU_SW64_VM_VM_VERSION_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/vmreg_sw64.cpp b/src/hotspot/cpu/sw64/vmreg_sw64.cpp
+new file mode 100644
+index 0000000000..9fd20be0f5
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vmreg_sw64.cpp
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/assembler.hpp"
++#include "code/vmreg.hpp"
++
++
++
++void VMRegImpl::set_regName() {
++  Register reg = ::as_Register(0);
++  int i;
++  for (i = 0; i < ConcreteRegisterImpl::max_gpr ; ) {
++    regName[i++] = reg->name();
++    regName[i++] = reg->name();
++    reg = reg->successor();
++  }
++
++  FloatRegister freg = ::as_FloatRegister(0);
++  for ( ; i < ConcreteRegisterImpl::max_fpr ; ) {
++    regName[i++] = freg->name();
++    regName[i++] = freg->name();
++    freg = freg->successor();
++  }
++
++  for ( ; i < ConcreteRegisterImpl::number_of_registers ; i ++ ) {
++    regName[i] = "NON-GPR-FPR";
++  }
++}
+diff --git a/src/hotspot/cpu/sw64/vmreg_sw64.hpp b/src/hotspot/cpu/sw64/vmreg_sw64.hpp
+new file mode 100644
+index 0000000000..39e4feb116
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vmreg_sw64.hpp
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++ #ifndef CPU_SW64_VM_VMREG_SW64_HPP
++ #define CPU_SW64_VM_VMREG_SW64_HPP
++
++  inline bool is_Register() {
++    return (unsigned int) value() < (unsigned int) ConcreteRegisterImpl::max_gpr;
++  }
++
++  inline bool is_FloatRegister() {
++    return value() >= ConcreteRegisterImpl::max_gpr && value() < ConcreteRegisterImpl::max_fpr;
++  }
++
++  inline Register as_Register() {
++
++    assert( is_Register(), "must be");
++  // Yuk
++    return ::as_Register(value() >> 1);
++  }
++
++  inline FloatRegister as_FloatRegister() {
++    assert( is_FloatRegister() && is_even(value()), "must be" );
++    // Yuk
++    return ::as_FloatRegister((value() - ConcreteRegisterImpl::max_gpr) >> 1);
++  }
++  
++  inline   bool is_concrete() {
++    assert(is_reg(), "must be");
++    if(is_Register()) return true;
++    if(is_FloatRegister()) return true;
++    assert(false, "what register?");
++    return false;
++  }
++
++ #endif // CPU_SW64_VM_VMREG_SW64_HPP
+diff --git a/src/hotspot/cpu/sw64/vmreg_sw64.inline.hpp b/src/hotspot/cpu/sw64/vmreg_sw64.inline.hpp
+new file mode 100644
+index 0000000000..b21409dbd4
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vmreg_sw64.inline.hpp
+@@ -0,0 +1,38 @@
++/*
++ * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef CPU_SW64_VM_VMREG_SW64_INLINE_HPP
++#define CPU_SW64_VM_VMREG_SW64_INLINE_HPP
++
++inline VMReg RegisterImpl::as_VMReg() {
++  if( this==noreg ) return VMRegImpl::Bad();
++  return VMRegImpl::as_VMReg(encoding() << 1 );
++}
++
++inline VMReg FloatRegisterImpl::as_VMReg() {
++  return VMRegImpl::as_VMReg((encoding() << 1) + ConcreteRegisterImpl::max_gpr);
++}
++
++#endif // CPU_SW64_VM_VMREG_SW64_INLINE_HPP
+diff --git a/src/hotspot/cpu/sw64/vtableStubs_sw64.cpp b/src/hotspot/cpu/sw64/vtableStubs_sw64.cpp
+new file mode 100644
+index 0000000000..2a00a5a26f
+--- /dev/null
++++ b/src/hotspot/cpu/sw64/vtableStubs_sw64.cpp
+@@ -0,0 +1,269 @@
++/*
++ * Copyright (c) 2003, 2021, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "asm/macroAssembler.inline.hpp"
++#include "assembler_sw64.inline.hpp"
++#include "code/vtableStubs.hpp"
++#include "interp_masm_sw64.hpp"
++#include "memory/resourceArea.hpp"
++#include "oops/compiledICHolder.hpp"
++#include "oops/instanceKlass.hpp"
++#include "oops/klassVtable.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "vmreg_sw64.inline.hpp"
++#ifdef COMPILER2
++#include "opto/runtime.hpp"
++#endif
++
++// machine-dependent part of VtableStubs: create VtableStub of correct size and
++// initialize its code
++
++#define __ masm->
++
++#ifndef PRODUCT
++extern "C" void bad_compiled_vtable_index(JavaThread* thread, oop receiver, int index);
++#endif
++
++VtableStub* VtableStubs::create_vtable_stub(int vtable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(true);
++  VtableStub* s = new(stub_code_length) VtableStub(true, vtable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  // No variance was detected in vtable stub sizes. Setting index_dependent_slop == 0 will unveil any deviation from this observation.
++  const int index_dependent_slop     = 0;
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler* masm = new MacroAssembler(&cb);
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    __ incrementw(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
++  }
++#endif
++
++  // get receiver (need to skip return address on top of stack)
++  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
++
++  // get receiver klass
++  Register rax = V0;
++//  Register rbx = rscratch1;
++  address npe_addr = __ pc();
++  __ load_klass(rax, j_rarg0);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    start_pc = __ pc();    
++    // check offset vs vtable length
++    __ cmpw(Address(rax, Klass::vtable_length_offset()), vtable_index*vtableEntry::size());
++    slop_delta  = 12 - (__ pc() - start_pc);  // cmpl varies in length, depending on data
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++    __ jcc(Assembler::greater, L);
++    __ movw(c_rarg2, vtable_index);
++    // VTABLE TODO: find upper bound for call_VM length.
++    start_pc = __ pc();
++    __ call_VM(noreg, CAST_FROM_FN_PTR(address, bad_compiled_vtable_index), j_rarg0, c_rarg2);
++    slop_delta  = 550 - (__ pc() - start_pc);
++    slop_bytes += slop_delta;
++    assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  const Register method = rmethod;
++
++  // load Method* and target address
++  start_pc = __ pc();
++  __ lookup_virtual_method(rax, vtable_index, method);
++  slop_delta  = 8 - (int)(__ pc() - start_pc);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "negative slop(%d) encountered, adjust code size estimate!", slop_delta);
++
++#ifndef PRODUCT
++  if (DebugVtables) {
++    Label L;
++    __ cmpptr(method, R0);
++    __ jcc(Assembler::equal, L);
++    __ cmpptr(Address(method, Method::from_compiled_offset()), R0);
++    __ jcc(Assembler::notZero, L);
++    __ stop("Vtable entry is NULL");
++    __ bind(L);
++  }
++#endif // PRODUCT
++
++  // rax: receiver klass
++  // method (rbx): Method*
++  // rcx: receiver
++  address ame_addr = __ pc();
++  __ jmp( Address(method, Method::from_compiled_offset()));
++
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, true, vtable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++
++VtableStub* VtableStubs::create_itable_stub(int itable_index) {
++  // Read "A word on VtableStub sizing" in share/code/vtableStubs.hpp for details on stub sizing.
++  const int stub_code_length = code_size_limit(false);
++  VtableStub* s = new(stub_code_length) VtableStub(false, itable_index);
++  // Can be NULL if there is no free space in the code cache.
++  if (s == NULL) {
++    return NULL;
++  }
++
++  // Count unused bytes in instruction sequences of variable size.
++  // We add them to the computed buffer size in order to avoid
++  // overflow in subsequently generated stubs.
++  address   start_pc;
++  int       slop_bytes = 0;
++  int       slop_delta = 0;
++  const int index_dependent_slop = (itable_index == 0) ? 4 :     // code size change with transition from 8-bit to 32-bit constant (@index == 16).
++                                   (itable_index < 16) ? 3 : 0;  // index == 0 generates even shorter code.
++
++  ResourceMark    rm;
++  CodeBuffer      cb(s->entry_point(), stub_code_length);
++  MacroAssembler *masm = new MacroAssembler(&cb);
++
++#if (!defined(PRODUCT) && defined(COMPILER2))
++  if (CountCompiledCalls) {
++    __ incrementw(ExternalAddress((address) SharedRuntime::nof_megamorphic_calls_addr()));
++  }
++#endif // PRODUCT
++
++  // Entry arguments:
++  //  rax: CompiledICHolder
++  //  j_rarg0: Receiver
++
++  // Most registers are in use; we'll use rax, rbx, r10, r11
++  // (various calling sequences use r[cd]x, r[sd]i, r[89]; stay away from them)
++  const Register recv_klass_reg     = r10;
++  const Register holder_klass_reg   = rax; // declaring interface klass (DECC)
++  const Register resolved_klass_reg = rbx; // resolved interface klass (REFC)
++  const Register temp_reg           = r11;
++  
++  const Register icholder_reg = rax;
++  __ ldptr(resolved_klass_reg, Address(icholder_reg, CompiledICHolder::holder_klass_offset()));
++  __ ldptr(holder_klass_reg,   Address(icholder_reg, CompiledICHolder::holder_metadata_offset()));
++
++  Label L_no_such_interface;
++
++  // get receiver klass (also an implicit null-check)
++  assert(VtableStub::receiver_location() == j_rarg0->as_VMReg(), "receiver expected in j_rarg0");
++  address npe_addr = __ pc();
++  __ load_klass(recv_klass_reg, j_rarg0);
++
++  start_pc = __ pc();
++
++  // Receiver subtype check against REFC.
++  // Destroys recv_klass_reg value.
++  __ lookup_interface_method(// inputs: rec. class, interface
++                             recv_klass_reg, resolved_klass_reg, noreg,
++                             // outputs:  scan temp. reg1, scan temp. reg2
++                             recv_klass_reg, temp_reg,
++                             L_no_such_interface,
++                             /*return_method=*/false);
++
++  const ptrdiff_t  typecheckSize = __ pc() - start_pc;
++  start_pc = __ pc();
++
++  // Get selected method from declaring class and itable index
++  const Register method = rbx;
++  __ load_klass(recv_klass_reg, j_rarg0);   // restore recv_klass_reg
++  __ lookup_interface_method(// inputs: rec. class, interface, itable index
++                             recv_klass_reg, holder_klass_reg, itable_index,
++                             // outputs: method, scan temp. reg
++                             method, temp_reg,
++                             L_no_such_interface);
++
++  const ptrdiff_t  lookupSize = __ pc() - start_pc;
++
++  // We expect we need index_dependent_slop extra bytes. Reason:
++  // The emitted code in lookup_interface_method changes when itable_index exceeds 15.
++  // For linux, a very narrow estimate would be 112, but Solaris requires some more space (130).
++  const ptrdiff_t estimate = 144;
++  const ptrdiff_t codesize = typecheckSize + lookupSize + index_dependent_slop;
++  slop_delta  = (int)(estimate - codesize);
++  slop_bytes += slop_delta;
++  assert(slop_delta >= 0, "itable #%d: Code size estimate (%d) for lookup_interface_method too small, required: %d", itable_index, (int)estimate, (int)codesize);
++
++  // If we take a trap while this arg is on the stack we will not
++  // be able to walk the stack properly. This is not an issue except
++  // when there are mistakes in this assembly code that could generate
++  // a spurious fault. Ask me how I know...
++
++  // method (rbx): Method*
++  // j_rarg0: receiver
++
++#ifdef ASSERT
++  if (DebugVtables) {
++    Label L2;
++    __ cmpptr(method, R0);
++    __ jcc(Assembler::equal, L2);
++    __ cmpptr(Address(method, Method::from_compiled_offset()), R0);
++    __ jcc(Assembler::notZero, L2);
++    __ stop("compiler entrypoint is null");
++    __ bind(L2);
++  }
++#endif // ASSERT
++
++  address ame_addr = __ pc();
++  __ jmp(Address(method, Method::from_compiled_offset()));
++
++  __ bind(L_no_such_interface);
++  // Handle IncompatibleClassChangeError in itable stubs.
++  // More detailed error message.
++  // We force resolving of the call site by jumping to the "handle
++  // wrong method" stub, and so let the interpreter runtime do all the
++  // dirty work.
++  __ jump(RuntimeAddress(SharedRuntime::get_handle_wrong_method_stub()));
++
++  masm->flush();
++  slop_bytes += index_dependent_slop; // add'l slop for size variance due to large itable offsets
++  bookkeeping(masm, tty, s, npe_addr, ame_addr, false, itable_index, slop_bytes, index_dependent_slop);
++
++  return s;
++}
++
++int VtableStub::pd_code_alignment() {
++  // cache line size is 64 bytes, but we want to limit alignment loss. <TODO:why ? jzy>
++  const unsigned int icache_line_size = wordSize;
++  return icache_line_size;
++}
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 2842a11f92..5698ae88d9 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -430,7 +430,7 @@ void os::init_system_properties_values() {
+   //        1: ...
+   //        ...
+   //        7: The default directories, normally /lib and /usr/lib.
+-#if defined(AMD64) || (defined(_LP64) && defined(SPARC)) || defined(PPC64) || defined(S390)
++#if defined(AMD64) || (defined(_LP64) && defined(SPARC)) || defined(PPC64) || defined(S390) || defined(SW64)
+   #define DEFAULT_LIBPATH "/usr/lib64:/lib64:/lib:/usr/lib"
+ #else
+ #if defined(AARCH64)
+@@ -1509,6 +1509,9 @@ void os::Linux::clock_init() {
+   #if defined(X86) || defined(PPC64) || defined(S390)
+     #define SYS_clock_getres AMD64_ONLY(229) IA32_ONLY(266) PPC64_ONLY(247) S390_ONLY(261)
+     #define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
++  #elif defined(SW64)
++    #define SYS_clock_getres SW64_ONLY(421)
++    #define sys_clock_getres(x,y)  ::syscall(SYS_clock_getres, x, y)
+   #else
+     #warning "SYS_clock_getres not defined for this platform, disabling fast_thread_cpu_time"
+     #define sys_clock_getres(x,y)  -1
+@@ -1991,7 +1994,11 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
+ #endif
+     {EM_ARM,         EM_ARM,     ELFCLASS32,   ELFDATA2LSB, (char*)"ARM"},
+     {EM_S390,        EM_S390,    ELFCLASSNONE, ELFDATA2MSB, (char*)"IBM System/390"},
++#if defined(__sw_64)
++    {EM_SW_64,       EM_SW_64,   ELFCLASS64, ELFDATA2LSB, (char*)"Sw64"},
++#else
+     {EM_ALPHA,       EM_ALPHA,   ELFCLASS64, ELFDATA2LSB, (char*)"Alpha"},
++#endif
+     {EM_MIPS_RS3_LE, EM_MIPS_RS3_LE, ELFCLASS32, ELFDATA2LSB, (char*)"MIPSel"},
+     {EM_MIPS,        EM_MIPS,    ELFCLASS32, ELFDATA2MSB, (char*)"MIPS"},
+     {EM_PARISC,      EM_PARISC,  ELFCLASS32, ELFDATA2MSB, (char*)"PARISC"},
+@@ -2023,6 +2030,10 @@ void * os::dll_load(const char *filename, char *ebuf, int ebuflen) {
+   static  Elf32_Half running_arch_code=EM_S390;
+ #elif  (defined ALPHA)
+   static  Elf32_Half running_arch_code=EM_ALPHA;
++#elif  (defined __sw_64) && (defined SW64)
++  static  Elf32_Half running_arch_code=EM_SW_64;
++#elif  (defined __alpha) && (defined SW64)
++  static  Elf32_Half running_arch_code=EM_ALPHA;
+ #elif  (defined MIPSEL)
+   static  Elf32_Half running_arch_code=EM_MIPS_RS3_LE;
+ #elif  (defined PARISC)
+@@ -3232,9 +3243,13 @@ void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec,
+ }
+ 
+ // Define MAP_HUGETLB here so we can build HotSpot on old systems.
++#ifdef SW64  //ZHJ20170828
++#define MAP_HUGETLB 0x100000
++#else
+ #ifndef MAP_HUGETLB
+   #define MAP_HUGETLB 0x40000
+ #endif
++#endif
+ 
+ // Define MADV_HUGEPAGE here so we can build HotSpot on old systems.
+ #ifndef MADV_HUGEPAGE
+@@ -3243,6 +3258,22 @@ void os::pd_commit_memory_or_exit(char* addr, size_t size, bool exec,
+ 
+ int os::Linux::commit_memory_impl(char* addr, size_t size,
+                                   size_t alignment_hint, bool exec) {
++#ifdef SW64
++    if (UseHugeTLBFS && alignment_hint > (size_t)vm_page_size()) {
++    int prot = exec ? PROT_READ|PROT_WRITE|PROT_EXEC : PROT_READ|PROT_WRITE;
++    uintptr_t res =
++      (uintptr_t) ::mmap(addr, size, prot,
++                         MAP_PRIVATE|MAP_FIXED|MAP_ANONYMOUS|MAP_HUGETLB,
++                         -1, 0);
++    if (res != (uintptr_t) MAP_FAILED) {
++      if (UseNUMAInterleaving) {
++        numa_make_global(addr, size);
++      }
++      return 0;
++    }
++    // Fall through and try to use small pages
++  }
++#endif
+   int err = os::Linux::commit_memory_impl(addr, size, exec);
+   if (err == 0) {
+     realign_memory(addr, size, alignment_hint);
+@@ -4058,6 +4089,7 @@ size_t os::Linux::find_large_page_size() {
+     ARM32_ONLY(2 * M)
+     IA32_ONLY(4 * M)
+     IA64_ONLY(256 * M)
++    SW64_ONLY(8 * M)
+     PPC_ONLY(4 * M)
+     S390_ONLY(1 * M)
+     SPARC_ONLY(4 * M);
+diff --git a/src/hotspot/os_cpu/linux_sw64/assembler_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/assembler_linux_sw64.cpp
+new file mode 100644
+index 0000000000..920d94da7f
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/assembler_linux_sw64.cpp
+@@ -0,0 +1,28 @@
++/*
++ * Copyright (c) 1999, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// nothing required here
++
++
+diff --git a/src/hotspot/os_cpu/linux_sw64/atomic_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/atomic_linux_sw64.hpp
+new file mode 100644
+index 0000000000..3a22bd7012
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/atomic_linux_sw64.hpp
+@@ -0,0 +1,260 @@
++/*
++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_ATOMIC_LINUX_SW64_HPP
++#define OS_CPU_LINUX_SW64_VM_ATOMIC_LINUX_SW64_HPP
++
++#include "vm_version_sw64.hpp"
++
++// Implementation of class atomic
++
++#define FULL_MEM_BARRIER  __sync_synchronize()
++#define READ_MEM_BARRIER  __atomic_thread_fence(__ATOMIC_ACQUIRE);
++#define WRITE_MEM_BARRIER __atomic_thread_fence(__ATOMIC_RELEASE);
++
++template<size_t byte_size>
++struct Atomic::PlatformAdd
++  : Atomic::FetchAndAdd<Atomic::PlatformAdd<byte_size> >
++{
++  template<typename I, typename D>
++  D fetch_and_add(I add_value, D volatile* dest, atomic_memory_order order) const;
++};
++
++template<>
++template<typename I, typename D>
++inline D Atomic::PlatformAdd<4>::fetch_and_add(I add_value, D volatile* dest,
++                                               atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(I));
++  STATIC_ASSERT(4 == sizeof(D));
++  D __ret;
++  I __tmp;
++  D* __addr;
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldw    %[__ret],0(%[__addr])\n\t"
++      "    ldi     %[__tmp],1\n\t"
++      "    wr_f    %[__tmp]\n\t"
++      "    addw    %[__ret],%[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstw    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "    zapnot  %[__ret],0xf,%[__ret]\n\t"
++      "    \n\t"
++      : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp)
++      : [__dest] "m" (*(volatile jint*)dest), [__val]  "Ir"  (add_value)
++      : "memory" );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<4>::operator()(T exchange_value,
++                                                     T volatile* dest,
++                                                     atomic_memory_order order) const {
++  STATIC_ASSERT(4 == sizeof(T));
++
++  //warning("Atomic::PlatformXchg<4>");
++  T __ret, __tmp;
++  T* __addr;
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldw    %[__ret],0(%[__addr])\n\t"
++      "    ldi     %[__tmp],1\n\t"
++      "    wr_f    %[__tmp]\n\t"
++      "    mov     %[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstw    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "    zapnot  %[__ret],0xf,%[__ret]\n\t"
++      "    \n\t"
++      : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp)
++      : [__dest] "m" (*(T volatile *)dest), [__val]  "Ir"(exchange_value) /* _val can not be constant in stl */
++      : "memory" );
++  return __ret;
++}
++
++
++// No direct support for cmpxchg of bytes; emulate using int.
++template<>
++struct Atomic::PlatformCmpxchg<1> : Atomic::CmpxchgByteUsingInt {};
++
++/*template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<1>::operator()(T exchange_value,
++                                                        T volatile* dest,
++                                                        T compare_value,
++                                                atomic_memory_order ) const {
++  STATIC_ASSERT(1 == sizeof(T));
++  T __prev, __cmp;
++  T __tmp;
++  T* __addr;
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldw    %[__prev],0(%[__addr])\n\t"
++      "    zap     %[__prev], 0x1, %[__tmp]\n\t"
++      "    bis     %[__val], %[__tmp], %[__val]\n\t"
++      "    mov     %[__old],%[__tmp]\n\t"
++     "     zapnot  %[__prev], 0x1, %[__prev]\n\t"
++      "    cmpeq   %[__prev],%[__tmp],%[__cmp]\n\t"
++      "    wr_f    %[__cmp]\n\t"
++      "    mov     %[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstw    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__cmp],2f\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "2: \n\t"
++        "    zapnot  %[__prev],0xf,%[__prev]\n\t"
++      : [__prev]"=&r" (__prev), [__addr]"=&r" (__addr), [__cmp] "=&r" (__cmp), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(T volatile *)dest), [__old]"Ir" (compare_value), [__val]"Ir"  (exchange_value) 
++      : "memory" );
++
++  return __prev;
++}*/
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<4>::operator()(T exchange_value,
++                                                T volatile* dest,
++                                                T compare_value,
++                                                atomic_memory_order /* order */) const {
++  STATIC_ASSERT(4 == sizeof(T));
++
++  //warning("Atomic::PlatformCmpxchg<4_1> exchange_value=%d dest=%d compare_value=%d\n", exchange_value, *dest, compare_value);
++  T __prev, __cmp;
++  T __tmp;
++  T* __addr;
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldw    %[__prev],0(%[__addr])\n\t"
++      "    mov     %[__old],%[__tmp]\n\t"
++      "    addw    %[__tmp], 0x0, %[__tmp]\n\t"
++      "    cmpeq   %[__prev],%[__tmp],%[__cmp]\n\t"
++      "    wr_f    %[__cmp]\n\t"
++      "    mov     %[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstw    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__cmp],2f\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "2: \n\t"
++      "    zapnot  %[__prev],0xf,%[__prev]\n\t"
++      : [__prev]"=&r" (__prev), [__addr]"=&r" (__addr), [__cmp] "=&r" (__cmp), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(T volatile *)dest), [__old]"Ir" (compare_value), [__val]"Ir"  (exchange_value) /* _val can not be constant in stl */
++      : "memory" );
++  //warning("Atomic::PlatformCmpxchg<4_2> exchange_value=%d dest=%d compare_value=%d\n", exchange_value, *dest, compare_value);
++  return __prev;
++}
++
++
++template<>
++template<typename I, typename D>
++inline D Atomic::PlatformAdd<8>::fetch_and_add(I add_value, D volatile* dest,
++                                                        atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(I));
++  STATIC_ASSERT(8 == sizeof(D));
++  //warning("Atomic::PlatformAdd<8>::fetch_and_add");
++  D __ret;
++  I __tmp;
++  D* __addr;
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldl    %[__ret],0(%[__addr])\n\t"
++      "    ldi     %[__tmp],1\n\t"
++      "    wr_f    %[__tmp]\n\t"
++      "    addl    %[__ret],%[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstl    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "    \n\t"
++      : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp)
++      : [__dest] "m" (*(D volatile *)dest), [__val]  "Ir"(add_value)
++      : "memory" );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformXchg<8>::operator()(T exchange_value, T volatile* dest,
++                                             atomic_memory_order order) const {
++  STATIC_ASSERT(8 == sizeof(T));
++
++  //warning("Atomic::PlatformXchg<8>");
++  T __ret, __tmp;
++  T __addr;
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldl    %[__ret],0(%[__addr])\n\t"
++      "    ldi     %[__tmp],1\n\t"
++      "    wr_f    %[__tmp]\n\t"
++      "    mov     %[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstl    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "    \n\t"
++      : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp)
++      : [__dest] "m" (*(T volatile *)dest), [__val]  "Ir"(exchange_value) /* _val can not be constant in stl */
++      : "memory" );
++
++  return __ret;
++}
++
++template<>
++template<typename T>
++inline T Atomic::PlatformCmpxchg<8>::operator()(T exchange_value,
++                                                T volatile* dest,
++                                                T compare_value,
++                                                atomic_memory_order /* order */) const {
++  STATIC_ASSERT(8 == sizeof(T));
++  //warning("Atomic::PlatformCmpxchg<8>");
++  T __prev, __cmp;
++  T __tmp, __addr;
++
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldl    %[__prev],0(%[__addr])\n\t"
++      "    cmpeq   %[__prev],%[__old],%[__cmp]\n\t"
++      "    wr_f    %[__cmp]\n\t"
++      "    mov     %[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstl    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__cmp],2f\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "2:  \n\t"
++      : [__prev]"=&r" (__prev), [__addr]"=&r" (__addr), [__cmp] "=&r" (__cmp), [__tmp] "=&r" (__tmp)
++      : [__dest] "m" (*(T volatile *)dest), [__old]"Ir" (compare_value), [__val]"Ir"  (exchange_value) /* _val can not be constant in stl */
++      : "memory" );
++
++  return __prev;
++}
++
++#endif // OS_CPU_LINUX_SW64_VM_ATOMIC_LINUX_SW64_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/bytes_linux_sw64.inline.hpp b/src/hotspot/os_cpu/linux_sw64/bytes_linux_sw64.inline.hpp
+new file mode 100644
+index 0000000000..5dcd03c800
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/bytes_linux_sw64.inline.hpp
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 1999, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_BYTES_LINUX_SW64_INLINE_HPP
++#define OS_CPU_LINUX_SW64_VM_BYTES_LINUX_SW64_INLINE_HPP
++
++#include <byteswap.h>
++
++// Efficient swapping of data bytes from Java byte
++// ordering to native byte ordering and vice versa.
++inline u2 Bytes::swap_u2(u2 x) { return bswap_16(x); }
++inline u4 Bytes::swap_u4(u4 x) { return bswap_32(x); }
++inline u8 Bytes::swap_u8(u8 x) { return bswap_64(x); }
++
++#endif // OS_CPU_LINUX_SW64_VM_BYTES_LINUX_SW64_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/copy_linux_sw64.inline.hpp b/src/hotspot/os_cpu/linux_sw64/copy_linux_sw64.inline.hpp
+new file mode 100644
+index 0000000000..58711314c3
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/copy_linux_sw64.inline.hpp
+@@ -0,0 +1,142 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_COPY_LINUX_SW64_INLINE_HPP
++#define OS_CPU_LINUX_SW64_VM_COPY_LINUX_SW64_INLINE_HPP
++
++static void pd_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  (void)memmove(to, from, count * HeapWordSize);
++}
++
++static void pd_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    (void)memcpy(to, from, count * HeapWordSize);
++    break;
++  }
++}
++
++static void pd_disjoint_words_atomic(const HeapWord* from, HeapWord* to, size_t count) {
++  switch (count) {
++  case 8:  to[7] = from[7];
++  case 7:  to[6] = from[6];
++  case 6:  to[5] = from[5];
++  case 5:  to[4] = from[4];
++  case 4:  to[3] = from[3];
++  case 3:  to[2] = from[2];
++  case 2:  to[1] = from[1];
++  case 1:  to[0] = from[0];
++  case 0:  break;
++  default:
++    while (count-- > 0) {
++      *to++ = *from++;
++    }
++    break;
++  }
++}
++
++static void pd_aligned_conjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_words(from, to, count);
++}
++
++static void pd_aligned_disjoint_words(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_disjoint_words(from, to, count);
++}
++
++static void pd_conjoint_bytes(const void* from, void* to, size_t count) {
++  (void)memmove(to, from, count);
++}
++
++static void pd_conjoint_bytes_atomic(const void* from, void* to, size_t count) {
++  pd_conjoint_bytes(from, to, count);
++}
++
++template <class T>
++static void copy_conjoint_atomic(const T* from, T* to, size_t count) {
++  if (from > to) {
++    while (count-- > 0) {
++      // Copy forwards
++      *to++ = *from++;
++    }
++  } else {
++    from += count - 1;
++    to   += count - 1;
++    while (count-- > 0) {
++      // Copy backwards
++      *to-- = *from--;
++    }
++  }
++}
++
++static void pd_conjoint_jshorts_atomic(const jshort* from, jshort* to, size_t count) {
++  copy_conjoint_atomic<jshort>(from, to, count);
++}
++
++static void pd_conjoint_jints_atomic(const jint* from, jint* to, size_t count) {
++  copy_conjoint_atomic<jint>(from, to, count);
++}
++
++static void pd_conjoint_jlongs_atomic(const jlong* from, jlong* to, size_t count) {
++  copy_conjoint_atomic<jlong>(from, to, count);
++}
++
++static void pd_conjoint_oops_atomic(const oop* from, oop* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(HeapWordSize == BytesPerOop, "heapwords and oops must be the same size");
++  copy_conjoint_atomic<oop>(from, to, count);
++}
++
++static void pd_arrayof_conjoint_bytes(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_bytes_atomic(from, to, count);
++}
++
++static void pd_arrayof_conjoint_jshorts(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jshorts_atomic((jshort*)from, (jshort*)to, count);
++}
++
++static void pd_arrayof_conjoint_jints(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jints_atomic((jint*)from, (jint*)to, count);
++}
++
++static void pd_arrayof_conjoint_jlongs(const HeapWord* from, HeapWord* to, size_t count) {
++  pd_conjoint_jlongs_atomic((jlong*)from, (jlong*)to, count);
++}
++
++static void pd_arrayof_conjoint_oops(const HeapWord* from, HeapWord* to, size_t count) {
++  //assert(!UseCompressedOops, "foo!");
++  assert(BytesPerLong == BytesPerOop, "jlongs and oops must be the same size");
++  pd_conjoint_oops_atomic((oop*)from, (oop*)to, count);
++}
++
++#endif // OS_CPU_LINUX_SW64_VM_COPY_LINUX_SW64_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/globals_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/globals_linux_sw64.hpp
+new file mode 100644
+index 0000000000..6d834b7169
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/globals_linux_sw64.hpp
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2000, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_GLOBALS_LINUX_SW64_HPP
++#define OS_CPU_LINUX_SW64_VM_GLOBALS_LINUX_SW64_HPP
++
++// Sets the default values for platform dependent flags used by the runtime system.
++// (see globals.hpp)
++
++define_pd_global(bool, DontYieldALot,            false);
++define_pd_global(intx, ThreadStackSize,          1024); // 0 => use system default
++define_pd_global(intx, VMThreadStackSize,        1024);
++
++define_pd_global(intx, CompilerThreadStackSize,  0);
++
++define_pd_global(uintx,JVMInvokeMethodSlack,     8192);
++
++// Used on 64 bit platforms for UseCompressedOops base address
++define_pd_global(uintx,HeapBaseMinAddress,       2*G);
++
++#endif // OS_CPU_LINUX_SW64_VM_GLOBALS_LINUX_SW64_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/linux_sw64.ad b/src/hotspot/os_cpu/linux_sw64/linux_sw64.ad
+new file mode 100644
+index 0000000000..c3b8cd2c45
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/linux_sw64.ad
+@@ -0,0 +1,69 @@
++//
++// Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++// Copyright (c) 2014, Red Hat Inc. All rights reserved.
++// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++//
++// This code is free software; you can redistribute it and/or modify it
++// under the terms of the GNU General Public License version 2 only, as
++// published by the Free Software Foundation.
++//
++// This code is distributed in the hope that it will be useful, but WITHOUT
++// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++// version 2 for more details (a copy is included in the LICENSE file that
++// accompanied this code).
++//
++// You should have received a copy of the GNU General Public License version
++// 2 along with this work; if not, write to the Free Software Foundation,
++// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++//
++// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++// or visit www.oracle.com if you need additional information or have any
++// questions.
++//
++//
++
++// AArch64 Linux Architecture Description File
++
++//----------OS-DEPENDENT ENCODING BLOCK----------------------------------------
++// This block specifies the encoding classes used by the compiler to
++// output byte streams.  Encoding classes generate functions which are
++// called by Machine Instruction Nodes in order to generate the bit
++// encoding of the instruction.  Operands specify their base encoding
++// interface with the interface keyword.  There are currently
++// supported four interfaces, REG_INTER, CONST_INTER, MEMORY_INTER, &
++// COND_INTER.  REG_INTER causes an operand to generate a function
++// which returns its register number when queried.  CONST_INTER causes
++// an operand to generate a function which returns the value of the
++// constant when queried.  MEMORY_INTER causes an operand to generate
++// four functions which return the Base Register, the Index Register,
++// the Scale Value, and the Offset Value of the operand when queried.
++// COND_INTER causes an operand to generate six functions which return
++// the encoding code (ie - encoding bits for the instruction)
++// associated with each basic boolean condition for a conditional
++// instruction.  Instructions specify two basic values for encoding.
++// They use the ins_encode keyword to specify their encoding class
++// (which must be one of the class names specified in the encoding
++// block), and they use the opcode keyword to specify, in order, their
++// primary, secondary, and tertiary opcode.  Only the opcode sections
++// which a particular instruction needs for encoding need to be
++// specified.
++encode %{
++  // Build emit functions for each basic byte or larger field in the intel
++  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
++  // code in the enc_class source block.  Emit functions will live in the
++  // main source block for now.  In future, we can generalize this by
++  // adding a syntax that specifies the sizes of fields in an order,
++  // so that the adlc can build the emit functions automagically
++
++  enc_class Java_To_Runtime(method meth) %{
++  %}
++
++%}
++
++
++// Platform dependent source
++
++source %{
++
++%}
+diff --git a/src/hotspot/os_cpu/linux_sw64/linux_sw64.s b/src/hotspot/os_cpu/linux_sw64/linux_sw64.s
+new file mode 100644
+index 0000000000..dd28925d19
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/linux_sw64.s
+@@ -0,0 +1,380 @@
++# 
++# Copyright (c) 2004, 2013, Oracle and/or its affiliates. All rights reserved.
++# DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++#
++# This code is free software; you can redistribute it and/or modify it
++# under the terms of the GNU General Public License version 2 only, as
++# published by the Free Software Foundation.
++#
++# This code is distributed in the hope that it will be useful, but WITHOUT
++# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++# version 2 for more details (a copy is included in the LICENSE file that
++# accompanied this code).
++#
++# You should have received a copy of the GNU General Public License version
++# 2 along with this work; if not, write to the Free Software Foundation,
++# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++#
++# Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++# or visit www.oracle.com if you need additional information or have any
++# questions.
++#
++
++
++#       # NOTE WELL!  The _Copy functions are called directly
++	# from server-compiler-generated code via CallLeafNoFP,
++	# which means that they *must* either not use floating
++	# point or use it in the same manner as does the server
++	# compiler.
++	
++#       .globl _Copy_arrayof_conjoint_bytes
++	.globl _Copy_arrayof_conjoint_jshorts
++#       .globl _Copy_conjoint_jshorts_atomic
++#       .globl _Copy_arrayof_conjoint_jints
++#       .globl _Copy_conjoint_jints_atomic
++#       .globl _Copy_arrayof_conjoint_jlongs
++#       .globl _Copy_conjoint_jlongs_atomic
++
++	.text
++
++#       .globl SpinPause
++#       .align 16
++#       .type  SpinPause,@function
++SpinPause:
++#       rep
++#       nop
++#       movq   $1, %rax
++#       ret
++
++#       # Support for void Copy::arrayof_conjoint_bytes(void* from,
++#       #                                               void* to,
++#       #                                               size_t count)
++#       # rdi - from
++#       # rsi - to
++#       # rdx - count, treated as ssize_t
++#       #
++#       .p2align 4,,15
++	.type    _Copy_arrayof_conjoint_bytes,@function
++_Copy_arrayof_conjoint_bytes:
++#       movq     %rdx,%r8             # byte count
++#       shrq     $3,%rdx              # qword count
++#       cmpq     %rdi,%rsi
++#       leaq     -1(%rdi,%r8,1),%rax  # from + bcount*1 - 1
++#       jbe      acb_CopyRight
++#       cmpq     %rax,%rsi
++#       jbe      acb_CopyLeft 
++acb_CopyRight:
++#       leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
++#       leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
++#       negq     %rdx
++#       jmp      7f
++#       .p2align 4,,15
++#1:      movq     8(%rax,%rdx,8),%rsi
++#       movq     %rsi,8(%rcx,%rdx,8)
++#       addq     $1,%rdx
++#       jnz      1b
++#2:      testq    $4,%r8               # check for trailing dword
++#       jz       3f
++#       movl     8(%rax),%esi         # copy trailing dword
++#       movl     %esi,8(%rcx)
++#       addq     $4,%rax
++#       addq     $4,%rcx              # original %rsi is trashed, so we
++#                                     #  can't use it as a base register
++#3:      testq    $2,%r8               # check for trailing word
++#       jz       4f
++#       movw     8(%rax),%si          # copy trailing word
++#       movw     %si,8(%rcx)
++#       addq     $2,%rcx
++#4:      testq    $1,%r8               # check for trailing byte
++#       jz       5f
++#       movb     -1(%rdi,%r8,1),%al   # copy trailing byte
++#       movb     %al,8(%rcx)
++#5:      ret
++#       .p2align 4,,15
++#6:      movq     -24(%rax,%rdx,8),%rsi
++#       movq     %rsi,-24(%rcx,%rdx,8)
++#       movq     -16(%rax,%rdx,8),%rsi
++#       movq     %rsi,-16(%rcx,%rdx,8)
++#       movq     -8(%rax,%rdx,8),%rsi
++#       movq     %rsi,-8(%rcx,%rdx,8)
++#       movq     (%rax,%rdx,8),%rsi
++#       movq     %rsi,(%rcx,%rdx,8)
++#7:      addq     $4,%rdx
++#       jle      6b
++#       subq     $4,%rdx
++#       jl       1b
++#       jmp      2b
++acb_CopyLeft:
++#       testq    $1,%r8               # check for trailing byte
++#       jz       1f
++#       movb     -1(%rdi,%r8,1),%cl   # copy trailing byte
++#       movb     %cl,-1(%rsi,%r8,1)
++#       subq     $1,%r8               # adjust for possible trailing word
++#1:      testq    $2,%r8               # check for trailing word
++#       jz       2f
++#       movw     -2(%rdi,%r8,1),%cx   # copy trailing word
++#       movw     %cx,-2(%rsi,%r8,1)
++#2:      testq    $4,%r8               # check for trailing dword
++#       jz       5f
++#       movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
++#       movl     %ecx,(%rsi,%rdx,8)
++#       jmp      5f
++#       .p2align 4,,15
++#3:      movq     -8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,-8(%rsi,%rdx,8)
++#       subq     $1,%rdx
++#       jnz      3b
++#       ret
++#       .p2align 4,,15
++#4:      movq     24(%rdi,%rdx,8),%rcx
++#       movq     %rcx,24(%rsi,%rdx,8)
++#       movq     16(%rdi,%rdx,8),%rcx
++#       movq     %rcx,16(%rsi,%rdx,8)
++#       movq     8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,8(%rsi,%rdx,8)
++#       movq     (%rdi,%rdx,8),%rcx
++#       movq     %rcx,(%rsi,%rdx,8)
++#5:      subq     $4,%rdx
++#       jge      4b
++#       addq     $4,%rdx
++#       jg       3b
++#       ret
++
++#       # Support for void Copy::arrayof_conjoint_jshorts(void* from,
++#       #                                                 void* to,
++#       #                                                 size_t count)
++#       # Equivalent to
++#       #   conjoint_jshorts_atomic
++#       #
++#       # If 'from' and/or 'to' are aligned on 4- or 2-byte boundaries, we
++#       # let the hardware handle it.  The tow or four words within dwords
++#       # or qwords that span cache line boundaries will still be loaded
++#       # and stored atomically.
++#       #
++#       # rdi - from
++#       # rsi - to
++#       # rdx - count, treated as ssize_t
++#       #
++#       .p2align 4,,15
++	.type    _Copy_arrayof_conjoint_jshorts,@function
++	.type    _Copy_conjoint_jshorts_atomic,@function
++_Copy_arrayof_conjoint_jshorts:
++_Copy_conjoint_jshorts_atomic:
++#       movq     %rdx,%r8             # word count
++#       shrq     $2,%rdx              # qword count
++#       cmpq     %rdi,%rsi
++#       leaq     -2(%rdi,%r8,2),%rax  # from + wcount*2 - 2
++#       jbe      acs_CopyRight
++#       cmpq     %rax,%rsi
++#       jbe      acs_CopyLeft 
++acs_CopyRight:
++#       leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
++#       leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
++#       negq     %rdx
++#       jmp      6f
++#1:      movq     8(%rax,%rdx,8),%rsi
++#       movq     %rsi,8(%rcx,%rdx,8)
++#       addq     $1,%rdx
++#       jnz      1b
++#2:      testq    $2,%r8               # check for trailing dword
++#       jz       3f
++#       movl     8(%rax),%esi         # copy trailing dword
++#       movl     %esi,8(%rcx)
++#       addq     $4,%rcx              # original %rsi is trashed, so we
++#                                     #  can't use it as a base register
++#3:      testq    $1,%r8               # check for trailing word
++#       jz       4f
++#       movw     -2(%rdi,%r8,2),%si   # copy trailing word
++#       movw     %si,8(%rcx)
++#4:      ret
++#       .p2align 4,,15
++#5:      movq     -24(%rax,%rdx,8),%rsi
++#       movq     %rsi,-24(%rcx,%rdx,8)
++#       movq     -16(%rax,%rdx,8),%rsi
++#       movq     %rsi,-16(%rcx,%rdx,8)
++#       movq     -8(%rax,%rdx,8),%rsi
++#       movq     %rsi,-8(%rcx,%rdx,8)
++#       movq     (%rax,%rdx,8),%rsi
++#       movq     %rsi,(%rcx,%rdx,8)
++#6:      addq     $4,%rdx
++#       jle      5b
++#       subq     $4,%rdx
++#       jl       1b
++#       jmp      2b
++acs_CopyLeft:
++#       testq    $1,%r8               # check for trailing word
++#       jz       1f
++#       movw     -2(%rdi,%r8,2),%cx   # copy trailing word
++#       movw     %cx,-2(%rsi,%r8,2)
++#1:      testq    $2,%r8               # check for trailing dword
++#       jz       4f
++#       movl     (%rdi,%rdx,8),%ecx   # copy trailing dword
++#       movl     %ecx,(%rsi,%rdx,8)
++#       jmp      4f
++#2:      movq     -8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,-8(%rsi,%rdx,8)
++#       subq     $1,%rdx
++#       jnz      2b
++#       ret
++#       .p2align 4,,15
++#3:      movq     24(%rdi,%rdx,8),%rcx
++#       movq     %rcx,24(%rsi,%rdx,8)
++#       movq     16(%rdi,%rdx,8),%rcx
++#       movq     %rcx,16(%rsi,%rdx,8)
++#       movq     8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,8(%rsi,%rdx,8)
++#       movq     (%rdi,%rdx,8),%rcx
++#       movq     %rcx,(%rsi,%rdx,8)
++#4:      subq     $4,%rdx
++#       jge      3b
++#       addq     $4,%rdx
++#       jg       2b
++#       ret
++
++#       # Support for void Copy::arrayof_conjoint_jints(jint* from,
++#       #                                               jint* to,
++#       #                                               size_t count)
++#       # Equivalent to
++#       #   conjoint_jints_atomic
++#       #
++#       # If 'from' and/or 'to' are aligned on 4-byte boundaries, we let
++#       # the hardware handle it.  The two dwords within qwords that span
++#       # cache line boundaries will still be loaded and stored atomically.
++#       #
++#       # rdi - from
++#       # rsi - to
++#       # rdx - count, treated as ssize_t
++#       #
++#       .p2align 4,,15
++	.type    _Copy_arrayof_conjoint_jints,@function
++	.type    _Copy_conjoint_jints_atomic,@function
++_Copy_arrayof_conjoint_jints:
++_Copy_conjoint_jints_atomic:
++#       movq     %rdx,%r8             # dword count
++#       shrq     %rdx                 # qword count
++#       cmpq     %rdi,%rsi
++#       leaq     -4(%rdi,%r8,4),%rax  # from + dcount*4 - 4
++#       jbe      aci_CopyRight
++#       cmpq     %rax,%rsi
++#       jbe      aci_CopyLeft 
++aci_CopyRight:
++#       leaq     -8(%rdi,%rdx,8),%rax # from + qcount*8 - 8
++#       leaq     -8(%rsi,%rdx,8),%rcx # to + qcount*8 - 8
++#       negq     %rdx
++#       jmp      5f
++#       .p2align 4,,15
++#1:      movq     8(%rax,%rdx,8),%rsi
++#       movq     %rsi,8(%rcx,%rdx,8)
++#       addq     $1,%rdx
++#       jnz       1b
++#2:      testq    $1,%r8               # check for trailing dword
++#       jz       3f
++#       movl     8(%rax),%esi         # copy trailing dword
++#       movl     %esi,8(%rcx)
++#3:      ret
++#       .p2align 4,,15
++#4:      movq     -24(%rax,%rdx,8),%rsi
++#       movq     %rsi,-24(%rcx,%rdx,8)
++#       movq     -16(%rax,%rdx,8),%rsi
++#       movq     %rsi,-16(%rcx,%rdx,8)
++#       movq     -8(%rax,%rdx,8),%rsi
++#       movq     %rsi,-8(%rcx,%rdx,8)
++#       movq     (%rax,%rdx,8),%rsi
++#       movq     %rsi,(%rcx,%rdx,8)
++#5:      addq     $4,%rdx
++#       jle      4b
++#       subq     $4,%rdx
++#       jl       1b
++#       jmp      2b
++aci_CopyLeft:
++#       testq    $1,%r8               # check for trailing dword
++#       jz       3f
++#       movl     -4(%rdi,%r8,4),%ecx  # copy trailing dword
++#       movl     %ecx,-4(%rsi,%r8,4)
++#       jmp      3f
++#1:      movq     -8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,-8(%rsi,%rdx,8)
++#       subq     $1,%rdx
++#       jnz      1b
++#       ret
++#       .p2align 4,,15
++#2:      movq     24(%rdi,%rdx,8),%rcx
++#       movq     %rcx,24(%rsi,%rdx,8)
++#       movq     16(%rdi,%rdx,8),%rcx
++#       movq     %rcx,16(%rsi,%rdx,8)
++#       movq     8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,8(%rsi,%rdx,8)
++#       movq     (%rdi,%rdx,8),%rcx
++#       movq     %rcx,(%rsi,%rdx,8)
++#3:      subq     $4,%rdx
++#       jge      2b
++#       addq     $4,%rdx
++#       jg       1b
++#       ret
++
++#       # Support for void Copy::arrayof_conjoint_jlongs(jlong* from,
++#       #                                                jlong* to,
++#       #                                                size_t count)
++#       # Equivalent to
++#       #   conjoint_jlongs_atomic
++#       #   arrayof_conjoint_oops
++#       #   conjoint_oops_atomic
++#       #
++#       # rdi - from
++#       # rsi - to
++#       # rdx - count, treated as ssize_t
++#       #
++#       .p2align 4,,15
++	.type    _Copy_arrayof_conjoint_jlongs,@function
++	.type    _Copy_conjoint_jlongs_atomic,@function
++_Copy_arrayof_conjoint_jlongs:
++_Copy_conjoint_jlongs_atomic:
++#       cmpq     %rdi,%rsi
++#       leaq     -8(%rdi,%rdx,8),%rax # from + count*8 - 8
++#       jbe      acl_CopyRight
++#       cmpq     %rax,%rsi
++#       jbe      acl_CopyLeft 
++acl_CopyRight:
++#       leaq     -8(%rsi,%rdx,8),%rcx # to + count*8 - 8
++#       negq     %rdx
++#       jmp      3f
++#1:      movq     8(%rax,%rdx,8),%rsi
++#       movq     %rsi,8(%rcx,%rdx,8)
++#       addq     $1,%rdx
++#       jnz      1b
++#       ret
++#       .p2align 4,,15
++#2:      movq     -24(%rax,%rdx,8),%rsi
++#       movq     %rsi,-24(%rcx,%rdx,8)
++#       movq     -16(%rax,%rdx,8),%rsi
++#       movq     %rsi,-16(%rcx,%rdx,8)
++#       movq     -8(%rax,%rdx,8),%rsi
++#       movq     %rsi,-8(%rcx,%rdx,8)
++#       movq     (%rax,%rdx,8),%rsi
++#       movq     %rsi,(%rcx,%rdx,8)
++#3:      addq     $4,%rdx
++#       jle      2b
++#       subq     $4,%rdx
++#       jl       1b
++#       ret
++#4:      movq     -8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,-8(%rsi,%rdx,8)
++#       subq     $1,%rdx
++#       jnz      4b
++#       ret
++#       .p2align 4,,15
++#5:      movq     24(%rdi,%rdx,8),%rcx
++#       movq     %rcx,24(%rsi,%rdx,8)
++#       movq     16(%rdi,%rdx,8),%rcx
++#       movq     %rcx,16(%rsi,%rdx,8)
++#       movq     8(%rdi,%rdx,8),%rcx
++#       movq     %rcx,8(%rsi,%rdx,8)
++#       movq     (%rdi,%rdx,8),%rcx
++#       movq     %rcx,(%rsi,%rdx,8)
++acl_CopyLeft:
++#       subq     $4,%rdx
++#       jge      5b
++#       addq     $4,%rdx
++#       jg       4b
++#       ret
+diff --git a/src/hotspot/os_cpu/linux_sw64/orderAccess_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/orderAccess_linux_sw64.hpp
+new file mode 100644
+index 0000000000..c6cbd19d61
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/orderAccess_linux_sw64.hpp
+@@ -0,0 +1,147 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_ORDERACCESS_LINUX_SW64_HPP
++#define OS_CPU_LINUX_SW64_VM_ORDERACCESS_LINUX_SW64_HPP
++
++// Included in orderAccess.hpp header file.
++
++#include "vm_version_sw64.hpp"
++
++// Implementation of class OrderAccess.
++#define inlasm_sync()  __asm__ __volatile__ ("memb"   : : : "memory");
++
++inline void OrderAccess::loadload()   { acquire(); }
++inline void OrderAccess::storestore() { release(); }
++inline void OrderAccess::loadstore()  { acquire(); }
++inline void OrderAccess::storeload()  { fence(); }
++
++inline void OrderAccess::acquire() {
++  inlasm_sync();
++}
++
++inline void OrderAccess::release() {
++  inlasm_sync();
++}
++
++inline void OrderAccess::fence() {
++  inlasm_sync();
++}
++
++/*
++template<>
++struct OrderAccess::PlatformOrderedStore<1, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const {
++    __asm__ volatile (  "xchgb (%2),%0"
++                      : "=q" (v)
++                      : "0" (v), "r" (p)
++                      : "memory");
++  }
++};
++
++template<>
++struct OrderAccess::PlatformOrderedStore<2, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const {
++    __asm__ volatile (  "xchgw (%2),%0"
++                      : "=r" (v)
++                      : "0" (v), "r" (p)
++                      : "memory");
++  }
++};*/
++/*
++template<>
++struct OrderAccess::PlatformOrderedStore<4, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const {
++  T __ret, __tmp;
++  T* __addr;
++  __asm__ __volatile__ (
++      "1:  ldi     %[__addr],%[__dest]\n\t"
++      "    lldw    %[__ret],0(%[__addr])\n\t"
++      "    ldi     %[__tmp],1\n\t"
++      "    wr_f    %[__tmp]\n\t"
++      "    mov     %[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstw    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "    zapnot  %[__ret],0xf,%[__ret]\n\t"
++      "    \n\t"
++      : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp)
++      : [__dest] "m" (*(T volatile *)p), [__val]  "Ir"(v) 
++      : "memory" );
++  }
++};
++
++template<>
++struct OrderAccess::PlatformOrderedStore<8, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const {
++  T __ret, __tmp;
++  T __addr;
++  __asm__ __volatile__ (
++      "1:    ldi     %[__addr],%[__dest]\n\t"
++      "    lldl    %[__ret],0(%[__addr])\n\t"
++      "    ldi     %[__tmp],1\n\t"
++      "    wr_f    %[__tmp]\n\t"
++      "    mov     %[__val],%[__tmp]\n\t"
++      " .align 3\n\t"
++      "    lstl    %[__tmp],0(%[__addr])\n\t"
++      "    rd_f    %[__tmp]\n\t"
++      "    beq     %[__tmp],1b\n\t"
++      "    \n\t"
++      : [__ret]"=&r" (__ret), [__addr]"=&r"(__addr), [__tmp]"=&r"(__tmp)
++      : [__dest] "m" (*(T volatile *)p), [__val]  "Ir"(v) 
++      : "memory" );
++  }
++};
++*/
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedLoad<byte_size, X_ACQUIRE>
++{
++  template <typename T>
++  T operator()(const volatile T* p) const { T data; __atomic_load(p, &data, __ATOMIC_ACQUIRE); return data; }
++};
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const { __atomic_store(p, &v, __ATOMIC_RELEASE); }
++};
++
++template<size_t byte_size>
++struct OrderAccess::PlatformOrderedStore<byte_size, RELEASE_X_FENCE>
++{
++  template <typename T>
++  void operator()(T v, volatile T* p) const { release_store(p, v); fence(); }
++};
++#endif // OS_CPU_LINUX_SW64_VM_ORDERACCESS_LINUX_SW64_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.cpp
+new file mode 100755
+index 0000000000..ec6d567a3a
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.cpp
+@@ -0,0 +1,803 @@
++/*
++ * Copyright (c) 1999, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++// no precompiled headers
++#include "jvm.h"
++#include "asm/macroAssembler.hpp"
++#include "classfile/classLoader.hpp"
++#include "classfile/systemDictionary.hpp"
++#include "classfile/vmSymbols.hpp"
++#include "code/codeCache.hpp"
++#include "code/icBuffer.hpp"
++#include "code/vtableStubs.hpp"
++#include "code/nativeInst.hpp"
++#include "interpreter/interpreter.hpp"
++#include "logging/log.hpp"
++#include "memory/allocation.inline.hpp"
++#include "os_share_linux.hpp"
++#include "prims/jniFastGetField.hpp"
++#include "prims/jvm_misc.hpp"
++#include "runtime/arguments.hpp"
++#include "runtime/extendedPC.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/interfaceSupport.inline.hpp"
++#include "runtime/java.hpp"
++#include "runtime/javaCalls.hpp"
++#include "runtime/mutexLocker.hpp"
++#include "runtime/osThread.hpp"
++#include "runtime/sharedRuntime.hpp"
++#include "runtime/stubRoutines.hpp"
++#include "runtime/thread.inline.hpp"
++#include "runtime/timer.hpp"
++#include "utilities/debug.hpp"
++#include "utilities/events.hpp"
++#include "utilities/vmError.hpp"
++
++// put OS-includes here
++# include <sys/types.h>
++# include <sys/mman.h>
++# include <pthread.h>
++# include <signal.h>
++# include <errno.h>
++# include <dlfcn.h>
++# include <stdlib.h>
++# include <stdio.h>
++# include <unistd.h>
++# include <sys/resource.h>
++# include <pthread.h>
++# include <sys/stat.h>
++# include <sys/time.h>
++# include <sys/utsname.h>
++# include <sys/socket.h>
++# include <sys/wait.h>
++# include <pwd.h>
++# include <poll.h>
++# include <ucontext.h>
++# include <fpu_control.h>
++
++//not sure
++#define REG_SP 30  //// #define REG_SP 29
++#define REG_FP 15  //// #define REG_FP 30
++#define REG_RA 26  //// #define REG_FP 30
++//#define PRINT_SIGNAL_HANDLE
++
++address __attribute__((always_inline)) os::current_stack_pointer() {
++//ZHJ  return (address)__builtin_frame_address(0);
++  register void *ssp;
++  __asm__ ("    mov $sp,%0\n":"=r"(ssp));
++
++  return (address) (char *)ssp;
++}
++
++char* os::non_memory_address_word() {
++  // Must never look like an address returned by reserve_memory,
++  // even in its subfields (as defined by the CPU immediate fields,
++  // if the CPU splits constants across multiple instructions).
++
++  return (char*) -1;
++}
++
++address os::Linux::ucontext_get_pc(const ucontext_t * uc) {
++  //return (address)uc->uc_mcontext.gregs[REG_PC];
++  return (address)uc->uc_mcontext.sc_pc;
++}
++
++void os::Linux::ucontext_set_pc(ucontext_t * uc, address pc) {
++//ZHJ  uc->uc_mcontext.pc = (intptr_t)pc;
++  uc->uc_mcontext.sc_pc = (intptr_t)pc;
++}
++
++intptr_t* os::Linux::ucontext_get_sp(const ucontext_t * uc) {
++//ZHJ  return (intptr_t*)uc->uc_mcontext.sp;
++  return (intptr_t*)uc->uc_mcontext.sc_regs[REG_SP];
++}
++
++intptr_t* os::Linux::ucontext_get_fp(const ucontext_t * uc) {
++//ZHJ  return (intptr_t*)uc->uc_mcontext.regs[REG_FP];
++  return (intptr_t*)uc->uc_mcontext.sc_regs[REG_FP];
++}
++
++address os::ucontext_get_ra(const ucontext_t * uc) {
++    return (address)uc->uc_mcontext.sc_regs[REG_RA];
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread
++// is currently interrupted by SIGPROF.
++// os::Solaris::fetch_frame_from_ucontext() tries to skip nested signal
++// frames. Currently we don't do that on Linux, so it's the same as
++// os::fetch_frame_from_context().
++// This method is also used for stack overflow signal handling.
++ExtendedPC os::Linux::fetch_frame_from_ucontext(Thread* thread,
++  const ucontext_t* uc, intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  assert(thread != NULL, "just checking");
++  assert(ret_sp != NULL, "just checking");
++  assert(ret_fp != NULL, "just checking");
++
++  return os::fetch_frame_from_context(uc, ret_sp, ret_fp);
++}
++
++ExtendedPC os::fetch_frame_from_context(const void* ucVoid,
++                    intptr_t** ret_sp, intptr_t** ret_fp) {
++
++  ExtendedPC  epc;
++  const ucontext_t* uc = (const ucontext_t*)ucVoid;
++
++  if (uc != NULL) {
++    epc = ExtendedPC(os::Linux::ucontext_get_pc(uc));
++    if (ret_sp) *ret_sp = os::Linux::ucontext_get_sp(uc);
++    if (ret_fp) *ret_fp = os::Linux::ucontext_get_fp(uc);
++  } else {
++    // construct empty ExtendedPC for return value checking
++    epc = ExtendedPC(NULL);
++    if (ret_sp) *ret_sp = (intptr_t *)NULL;
++    if (ret_fp) *ret_fp = (intptr_t *)NULL;
++  }
++
++  return epc;
++}
++
++frame os::fetch_frame_from_context(const void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = fetch_frame_from_context(ucVoid, &sp, &fp);
++  frame ret_frame(sp, fp, epc.pc());
++  ret_frame.fixRa(ucVoid);
++  return ret_frame;
++}
++
++frame os::fetch_frame_from_ucontext(Thread* thread, void* ucVoid) {
++  intptr_t* sp;
++  intptr_t* fp;
++  ExtendedPC epc = os::Linux::fetch_frame_from_ucontext(thread, (ucontext_t*)ucVoid, &sp, &fp);
++  frame ret_frame(sp, fp, epc.pc());
++  ret_frame.fixRa(ucVoid);
++  return ret_frame;
++}
++
++bool os::Linux::get_frame_at_stack_banging_point(JavaThread* thread, ucontext_t* uc, frame* fr) {
++  address pc = (address) os::Linux::ucontext_get_pc(uc);
++  if (Interpreter::contains(pc)) {
++    // interpreter performs stack banging after the fixed frame header has
++    // been generated while the compilers perform it before. To maintain
++    // semantic consistency between interpreted and compiled frames, the
++    // method returns the Java sender of the current frame.
++    *fr = os::fetch_frame_from_ucontext(thread, uc);
++    if (!fr->is_first_java_frame()) {
++      // get_frame_at_stack_banging_point() is only called when we
++      // have well defined stacks so java_sender() calls do not need
++      // to assert safe_for_sender() first.
++      *fr = fr->java_sender();
++    }
++  } else {
++    // more complex code with compiled code
++    assert(!Interpreter::contains(pc), "Interpreted methods should have been handled above");
++    CodeBlob* cb = CodeCache::find_blob(pc);
++    if (cb == NULL || !cb->is_nmethod() || cb->is_frame_complete_at(pc)) {
++      // Not sure where the pc points to, fallback to default
++      // stack overflow handling
++      return false;
++    } else {
++      // in compiled code, the stack banging is performed just after the return pc
++      // has been pushed on the stack
++      intptr_t* fp = os::Linux::ucontext_get_fp(uc);
++      intptr_t* sp = os::Linux::ucontext_get_sp(uc);
++      address pc = (address)uc->uc_mcontext.sc_regs[REG_RA];
++      *fr = frame(sp, fp, pc);
++      if (!fr->is_java_frame()) {
++        assert(!fr->is_first_frame(), "Safety check");
++        // See java_sender() comment above.
++        *fr = fr->java_sender();
++      }
++    }
++  }
++  assert(fr->is_java_frame(), "Safety check");
++  return true;
++}
++
++// By default, gcc always saves frame pointer rfp on this stack. This
++// may get turned off by -fomit-frame-pointer.
++frame os::get_sender_for_C_frame(frame* fr) {
++  return frame(NULL, fr->link(), fr->sender_pc());
++}
++
++intptr_t* __attribute__((always_inline)) os::get_previous_fp() {
++  register void *sfp;
++  __asm__ ("    mov $fp,%0\n":"=r"(sfp));
++
++  return (intptr_t *)sfp;
++}
++
++frame os::current_frame() {
++  intptr_t* fp = (intptr_t*)get_previous_fp();
++  frame myframe((intptr_t*)os::current_stack_pointer(),
++                (intptr_t*)fp,
++                CAST_FROM_FN_PTR(address, os::current_frame));
++  myframe.init_sender_for_c_frame(CAST_FROM_FN_PTR(address, os::current_frame));
++  if (os::is_first_C_frame(&myframe)) {
++    // stack is not walkable
++    return frame();
++  } else {
++    myframe = os::get_sender_for_C_frame(&myframe);
++    return os::get_sender_for_C_frame(&myframe);
++  }
++}
++
++// Utility functions
++extern "C" int
++JVM_handle_linux_signal(int sig,
++                        siginfo_t* info,
++                        void* ucVoid,
++                        int abort_if_unrecognized) {
++  if (TraceSignalHandling) {
++    tty->print_cr("Signal: signo=%d, sicode=%d, sierrno=%d, siaddr=%lx",
++                  info->si_signo,
++                  info->si_code,
++                  info->si_errno,
++                  (unsigned long)info->si_addr);
++    if (info->si_signo == 4) // the pc for SIGILL is (info->si_addr)-1) for SW, but mips and aarch64 are just info->si_addr
++      tty->print_cr("SIGILL 0x%08x", *((int*)(info->si_addr)-1));
++  }
++
++  ucontext_t* uc = (ucontext_t*) ucVoid;
++
++  Thread* t = Thread::current_or_null_safe();
++
++  // Must do this before SignalHandlerMark, if crash protection installed we will longjmp away
++  // (no destructors can be run)
++  os::ThreadCrashProtection::check_crash_protection(sig, t);
++
++  SignalHandlerMark shm(t);
++
++  // Note: it's not uncommon that JNI code uses signal/sigset to install
++  // then restore certain signal handler (e.g. to temporarily block SIGPIPE,
++  // or have a SIGILL handler when detecting CPU type). When that happens,
++  // JVM_handle_linux_signal() might be invoked with junk info/ucVoid. To
++  // avoid unnecessary crash when libjsig is not preloaded, try handle signals
++  // that do not require siginfo/ucontext first.
++
++  if (sig == SIGPIPE || sig == SIGXFSZ) {
++    // allow chained handler to go first
++    if (os::Linux::chained_handler(sig, info, ucVoid)) {
++      return true;
++    } else {
++      // Ignoring SIGPIPE/SIGXFSZ - see bugs 4229104 or 6499219
++      return true;
++    }
++  }
++
++#ifdef CAN_SHOW_REGISTERS_ON_ASSERT
++  if ((sig == SIGSEGV || sig == SIGBUS) && info != NULL && info->si_addr == g_assert_poison) {
++    if (handle_assert_poison_fault(ucVoid, info->si_addr)) {
++      return 1;
++    }
++  }
++#endif
++
++  JavaThread* thread = NULL;
++  VMThread* vmthread = NULL;
++  if (os::Linux::signal_handlers_are_installed) {
++    if (t != NULL ){
++      if(t->is_Java_thread()) {
++        if (TraceSignalHandling) tty->print_cr("this thread is a java thread");
++        thread = (JavaThread*)t;
++      }
++      else if(t->is_VM_thread()){
++        if (TraceSignalHandling) tty->print_cr("this thread is a VM thread\n");
++        vmthread = (VMThread *)t;
++      }
++    }
++  }
++
++  // Handle SafeFetch faults:
++  if (uc != NULL) {
++    address const pc = (address) os::Linux::ucontext_get_pc(uc);
++    if (pc && StubRoutines::is_safefetch_fault(pc)) {
++      os::Linux::ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
++      return 1;
++    }
++  }
++
++/*
++  NOTE: does not seem to work on linux.
++  if (info == NULL || info->si_code <= 0 || info->si_code == SI_NOINFO) {
++    // can't decode this kind of signal
++    info = NULL;
++  } else {
++    assert(sig == info->si_signo, "bad siginfo");
++  }
++*/
++  // decide if this trap can be handled by a stub
++  address stub = NULL;
++  
++  address pc   = NULL;
++
++//  pc = (address) os::Linux::ucontext_get_pc(uc);
++//if (TraceSignalHandling) {
++//  tty->print_cr("pc=%lx", pc);
++//  os::print_context(tty, uc);
++//}
++  //%note os_trap_1
++  if (info != NULL && uc != NULL && thread != NULL) {
++    pc = (address) os::Linux::ucontext_get_pc(uc);
++
++    // Halt if SI_KERNEL before more crashes get misdiagnosed as Java bugs
++    // This can happen in any running code (currently more frequently in
++    // interpreter code but has been seen in compiled code)
++    if (sig == SIGSEGV && info->si_addr == 0 && info->si_code == SI_KERNEL) {
++      fatal("An irrecoverable SI_KERNEL SIGSEGV has occurred due "
++            "to unstable signal handling in this distribution.");
++    }
++
++    // Handle ALL stack overflow variations here
++    if (sig == SIGSEGV) {
++      address addr = (address) info->si_addr;
++      if (TraceSignalHandling) tty->print("handle all stack overflow variations: ");
++      /*tty->print("addr = %lx, stack base = %lx, stack top = %lx\n",
++        addr,
++        thread->stack_base(),
++        thread->stack_base() - thread->stack_size());
++        */
++
++      // check if fault address is within thread stack
++      if (thread->on_local_stack(addr)) {
++        // stack overflow
++        if (TraceSignalHandling) tty->print("stack exception check \n");
++        if (thread->in_stack_yellow_reserved_zone(addr)) {
++          if (TraceSignalHandling) tty->print("exception addr is in yellow zone\n");
++          if (thread->thread_state() == _thread_in_Java) {
++            if (thread->in_stack_reserved_zone(addr)) {
++              frame fr;
++              if (os::Linux::get_frame_at_stack_banging_point(thread, uc, &fr)) {
++                assert(fr.is_java_frame(), "Must be a Java frame");
++                frame activation =
++                  SharedRuntime::look_for_reserved_stack_annotated_method(thread, fr);
++                if (activation.sp() != NULL) {
++                  thread->disable_stack_reserved_zone();
++                  if (activation.is_interpreted_frame()) {
++                    thread->set_reserved_stack_activation((address)(
++                      activation.fp() + frame::interpreter_frame_initial_sp_offset));
++                  } else {
++                    thread->set_reserved_stack_activation((address)activation.unextended_sp());
++                  }
++                  return 1;
++                }
++              }
++            }
++            // Throw a stack overflow exception.  Guard pages will be reenabled
++            // while unwinding the stack.
++            if (TraceSignalHandling) tty->print("this thread is in java\n");
++            thread->disable_stack_yellow_reserved_zone();
++            stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::STACK_OVERFLOW);
++          } else {
++            // Thread was in the vm or native code.  Return and try to finish.
++            if (TraceSignalHandling) tty->print("this thread is in vm or native codes and return\n");
++            thread->disable_stack_yellow_reserved_zone();
++            return 1;
++          }
++        } else if (thread->in_stack_red_zone(addr)) {
++          // Fatal red zone violation.  Disable the guard pages and fall through
++          // to handle_unexpected_exception way down below.
++          if (TraceSignalHandling) tty->print("exception addr is in red zone\n");
++          thread->disable_stack_red_zone();
++          tty->print_raw_cr("An irrecoverable stack overflow has occurred.");
++
++          // This is a likely cause, but hard to verify. Let's just print
++          // it as a hint.
++          tty->print_raw_cr("Please check if any of your loaded .so files has "
++                            "enabled executable stack (see man page execstack(8))");
++        } else {
++          // Accessing stack address below sp may cause SEGV if current
++          // thread has MAP_GROWSDOWN stack. This should only happen when
++          // current thread was created by user code with MAP_GROWSDOWN flag
++          // and then attached to VM. See notes in os_linux.cpp.
++          if (TraceSignalHandling) tty->print("exception addr is neither in yellow zone nor in the red one\n");
++          if (thread->osthread()->expanding_stack() == 0) {
++             thread->osthread()->set_expanding_stack();
++             if (os::Linux::manually_expand_stack(thread, addr)) {
++               thread->osthread()->clear_expanding_stack();
++               return 1;
++             }
++             thread->osthread()->clear_expanding_stack();
++          } else {
++             fatal("recursive segv. expanding stack.");
++          }
++        }
++      } //addr <
++    } //sig == SIGSEGV
++
++    if (thread->thread_state() == _thread_in_Java) {
++      // Java thread running in Java code => find exception handler if any
++      // a fault inside compiled code, the interpreter, or a stub
++      if (TraceSignalHandling) tty->print("java thread running in java code\n");
++      if (sig == SIGILL && (nativeInstruction_at(pc)->is_sigill_zombie_not_entrant() || nativeInstruction_at(pc - 4)->is_sigill_zombie_not_entrant())) {
++#ifdef PRINT_SIGNAL_HANDLE
++          tty->print_cr("verified entry = %lx, sig=%d", nativeInstruction_at(pc), sig);
++#endif
++          stub = SharedRuntime::get_handle_wrong_method_stub();
++          // Handle signal from NativeJump::patch_verified_entry().
++      }else if (sig == SIGSEGV && os::is_poll_address((address)info->si_addr)) {
++        stub = SharedRuntime::get_poll_stub(pc);
++        if (TraceSignalHandling) tty->print_cr("polling address = %lx, sig=%d, stub = %lx", (unsigned long)os::get_polling_page(), sig, (unsigned long)stub);
++      } else if (sig == SIGBUS /* && info->si_code == BUS_OBJERR */) {
++        // BugId 4454115: A read from a MappedByteBuffer can fault
++        // here if the underlying file has been truncated.
++        // Do not crash the VM in such a case.
++        CodeBlob* cb = CodeCache::find_blob_unsafe(pc);
++        CompiledMethod* nm = (cb != NULL) ? cb->as_compiled_method_or_null() : NULL;
++        if (TraceSignalHandling) tty->print("cb = %lx, nm = %lx\n", (unsigned long)cb, (unsigned long)nm);
++        if (nm != NULL && nm->has_unsafe_access()) {
++          address next_pc = pc + NativeCall::instruction_size;
++          stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++        }
++      } else if (sig == SIGFPE  &&
++          (info->si_code == FPE_INTDIV || info->si_code == FPE_FLTDIV)) {
++         stub = SharedRuntime::continuation_for_implicit_exception(thread,
++                                 pc,
++                                 SharedRuntime::IMPLICIT_DIVIDE_BY_ZERO);
++      } else if (sig == SIGSEGV &&
++               !MacroAssembler::needs_explicit_null_check((intptr_t)info->si_addr)) {
++          if (TraceSignalHandling) tty->print("continuation for implicit exception\n");
++          // Determination of interpreter/vtable stub/compiled code null exception
++          stub = SharedRuntime::continuation_for_implicit_exception(thread, pc, SharedRuntime::IMPLICIT_NULL);
++          if (TraceSignalHandling) tty->print_cr("continuation_for_implicit_exception stub: %lx", (unsigned long)stub);
++      }
++    } else if (thread->thread_state() == _thread_in_vm &&
++               sig == SIGBUS && /* info->si_code == BUS_OBJERR && */
++               thread->doing_unsafe_access()) {
++        if (TraceSignalHandling) tty->print_cr("SIGBUS in vm thread \n");
++        address next_pc = pc + NativeCall::instruction_size;
++        stub = SharedRuntime::handle_unsafe_access(thread, next_pc);
++    }
++
++    // jni_fast_Get<Primitive>Field can trap at certain pc's if a GC kicks in
++    // and the heap gets shrunk before the field access.
++    if ((sig == SIGSEGV) || (sig == SIGBUS)) {
++      if (TraceSignalHandling) tty->print("jni fast get trap: ");
++      address addr = JNI_FastGetField::find_slowcase_pc(pc);
++      if (addr != (address)-1) {
++        stub = addr;
++      }
++      if (TraceSignalHandling) tty->print_cr("addr = %lx, stub = %lx", (unsigned long)addr, (unsigned long)stub);
++    }
++
++    // Check to see if we caught the safepoint code in the
++    // process of write protecting the memory serialization page.
++    // It write enables the page immediately after protecting it
++    // so we can just return to retry the write.
++    if ((sig == SIGSEGV) &&
++      os::is_memory_serialize_page(thread, (address) info->si_addr)) {
++      if (TraceSignalHandling) tty->print("write protecting the memory serialiazation page\n");
++      // Block current thread until the memory serialize page permission restored.
++      os::block_on_serialize_page_trap();
++      return true;
++    }
++  }
++
++  // Execution protection violation
++  //
++  // This should be kept as the last step in the triage.  We don't
++  // have a dedicated trap number for a no-execute fault, so be
++  // conservative and allow other handlers the first shot.
++  //
++  // Note: We don't test that info->si_code == SEGV_ACCERR here.
++  // this si_code is so generic that it is almost meaningless; and
++  // the si_code for this condition may change in the future.
++  // Furthermore, a false-positive should be harmless.
++  if (UnguardOnExecutionViolation > 0 &&
++      (sig == SIGSEGV || sig == SIGBUS) /*&&
++      uc->uc_mcontext.sc_regs[REG_TRAPNO] == trap_page_fault*/) {
++    ShouldNotReachHere();
++    int page_size = os::vm_page_size();
++    address addr = (address) info->si_addr;
++    address pc = os::Linux::ucontext_get_pc(uc);
++    // Make sure the pc and the faulting address are sane.
++    //
++    // If an instruction spans a page boundary, and the page containing
++    // the beginning of the instruction is executable but the following
++    // page is not, the pc and the faulting address might be slightly
++    // different - we still want to unguard the 2nd page in this case.
++    //
++    // 15 bytes seems to be a (very) safe value for max instruction size.
++    bool pc_is_near_addr =
++      (pointer_delta((void*) addr, (void*) pc, sizeof(char)) < 15);
++    bool instr_spans_page_boundary =
++      (align_down((intptr_t) pc ^ (intptr_t) addr,
++                       (intptr_t) page_size) > 0);
++
++    if (pc == addr || (pc_is_near_addr && instr_spans_page_boundary)) {
++      static volatile address last_addr =
++        (address) os::non_memory_address_word();
++
++      // In conservative mode, don't unguard unless the address is in the VM
++      if (addr != last_addr &&
++          (UnguardOnExecutionViolation > 1 || os::address_is_in_vm(addr))) {
++
++        // Set memory to RWX and retry
++        address page_start = align_down(addr, page_size);
++        bool res = os::protect_memory((char*) page_start, page_size,
++                                      os::MEM_PROT_RWX);
++
++        log_debug(os)("Execution protection violation "
++                      "at " INTPTR_FORMAT
++                      ", unguarding " INTPTR_FORMAT ": %s, errno=%d", p2i(addr),
++                      p2i(page_start), (res ? "success" : "failed"), errno);
++        stub = pc;
++
++        // Set last_addr so if we fault again at the same address, we don't end
++        // up in an endless loop.
++        //
++        // There are two potential complications here.  Two threads trapping at
++        // the same address at the same time could cause one of the threads to
++        // think it already unguarded, and abort the VM.  Likely very rare.
++        //
++        // The other race involves two threads alternately trapping at
++        // different addresses and failing to unguard the page, resulting in
++        // an endless loop.  This condition is probably even more unlikely than
++        // the first.
++        //
++        // Although both cases could be avoided by using locks or thread local
++        // last_addr, these solutions are unnecessary complication: this
++        // handler is a best-effort safety net, not a complete solution.  It is
++        // disabled by default and should only be used as a workaround in case
++        // we missed any no-execute-unsafe VM code.
++
++        last_addr = addr;
++      }
++    }
++  }
++
++
++  if (stub != NULL) {
++    if (TraceSignalHandling) tty->print_cr("resolved stub=%lx\n",(unsigned long)stub);
++    // save all thread context in case we need to restore it
++    if (thread != NULL) thread->set_saved_exception_pc(pc);
++
++    os::Linux::ucontext_set_pc(uc, stub);
++    return true;
++  }
++
++  // signal-chaining
++  if (os::Linux::chained_handler(sig, info, ucVoid)) {
++    if (TraceSignalHandling) tty->print_cr("signal chaining\n");
++    return true;
++  }
++
++  if (!abort_if_unrecognized) {
++    if (TraceSignalHandling) tty->print_cr("abort becauce of unrecognized\n");
++    // caller wants another chance, so give it to him
++    return false;
++  }
++
++  if (pc == NULL && uc != NULL) {
++    pc = os::Linux::ucontext_get_pc(uc);
++  }
++
++  // unmask current signal
++  sigset_t newset;
++  sigemptyset(&newset);
++  sigaddset(&newset, sig);
++  sigprocmask(SIG_UNBLOCK, &newset, NULL);
++  if (TraceSignalHandling) tty->print_cr("VMError in signal handler\n");
++
++  VMError::report_and_die(t, sig, pc, info, ucVoid);
++
++  ShouldNotReachHere();
++  return true; // Mute compiler
++}
++
++// FCSR:...|24| 23 |22|21|...
++//      ...|FS|FCC0|FO|FN|...
++void os::Linux::init_thread_fpu_state(void) {
++  // Nothing to do
++}
++
++int os::Linux::get_fpu_control_word(void) {
++  ShouldNotReachHere();
++  return 0;
++}
++
++void os::Linux::set_fpu_control_word(int fpu_control) {
++  ShouldNotReachHere();
++}
++
++bool os::is_allocatable(size_t bytes) {
++
++  if (bytes < 2 * G) {
++    return true;
++  }
++
++  char* addr = reserve_memory(bytes, NULL);
++
++  if (addr != NULL) {
++    release_memory(addr, bytes);
++  }
++
++  return addr != NULL;
++}
++
++////////////////////////////////////////////////////////////////////////////////
++// thread stack
++
++size_t os::Posix::_compiler_thread_min_stack_allowed = 48 * K;
++size_t os::Posix::_java_thread_min_stack_allowed = 40 * K;
++size_t os::Posix::_vm_internal_thread_min_stack_allowed = 96 * K;
++
++// return default stack size for thr_type
++size_t os::Posix::default_stack_size(os::ThreadType thr_type) {
++  // default stack size (compiler thread needs larger stack)
++  size_t s = (thr_type == os::compiler_thread ? 2 * M : 512 * K);
++  return s;
++}
++
++/////////////////////////////////////////////////////////////////////////////
++// helper functions for fatal error handler
++
++void os::print_context(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++  st->print_cr("Registers:");
++  st->print(  "V0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[0]);
++  st->print(", T0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[1]);
++  st->print(", T1=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[2]);
++  st->print(", T2=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[3]);
++  st->cr();
++  st->print(  "T3=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[4]);
++  st->print(", T4=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[5]);
++  st->print(", T5=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[6]);
++  st->print(", T6=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[7]);
++  st->cr();
++  st->print(  "T7=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[8]);
++  st->print(", S0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[9]);
++  st->print(", S1=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[10]);
++  st->print(", S2=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[11]);
++  st->cr();
++  st->print(  "S3=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[12]);
++  st->print(", S4=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[13]);
++  st->print(", S5=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[14]);
++  st->print(", FP=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[15]);
++  st->cr();
++  st->print(  "A0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[16]);
++  st->print(", A1=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[17]);
++  st->print(", A2=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[18]);
++  st->print(", A3=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[19]);
++  st->cr();
++  st->print(  "A4=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[20]);
++  st->print(", A5=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[21]);
++  st->print(", T8=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[22]);
++  st->print(", T9=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[23]);
++  st->cr();
++  st->print(  "T10=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[24]);
++  st->print(", T11=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[25]);
++  st->print(", RA=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[26]);
++  st->print(", T12=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[27]);
++  st->cr();
++  st->print(  "AT=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[28]);
++  st->print(", GP=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[29]);
++  st->print(", SP=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[30]);
++  st->print(", R0=" INTPTR_FORMAT, uc->uc_mcontext.sc_regs[31]);
++  st->cr();
++  st->cr();
++
++  intptr_t *sp = (intptr_t *)os::Linux::ucontext_get_sp(uc);
++  st->print_cr("Top of Stack: (sp=" PTR_FORMAT ")", p2i(sp));
++  //print_hex_dump(st, (address)sp, (address)(sp + 8), sizeof(intptr_t));
++  print_hex_dump(st, (address)sp-32, (address)(sp + 32), sizeof(intptr_t));
++  st->cr();
++
++  // Note: it may be unsafe to inspect memory near pc. For example, pc may
++  // point to garbage if entry point in an nmethod is corrupted. Leave
++  // this at the end, and hope for the best.
++  address pc = os::Linux::ucontext_get_pc(uc);
++  print_instructions(st, pc, sizeof(char));
++  st->cr();
++}
++
++void os::print_register_info(outputStream *st, const void *context) {
++  if (context == NULL) return;
++
++  const ucontext_t *uc = (const ucontext_t*)context;
++
++  st->print_cr("Register to memory mapping:");
++  st->cr();
++
++  // this is horrendously verbose but the layout of the registers in the
++  //   // context does not match how we defined our abstract Register set, so
++  //     // we can't just iterate through the gregs area
++  //
++  //       // this is only for the "general purpose" registers
++  st->print("V0=" ); print_location(st, uc->uc_mcontext.sc_regs[0]);
++  st->print("T0=" ); print_location(st, uc->uc_mcontext.sc_regs[1]);
++  st->print("T1=" ); print_location(st, uc->uc_mcontext.sc_regs[2]);
++  st->print("T2=" ); print_location(st, uc->uc_mcontext.sc_regs[3]);
++  st->cr();
++  st->print("T3=" ); print_location(st, uc->uc_mcontext.sc_regs[4]);
++  st->print("T4=" ); print_location(st, uc->uc_mcontext.sc_regs[5]);
++  st->print("T5=" ); print_location(st, uc->uc_mcontext.sc_regs[6]);
++  st->print("T6=" ); print_location(st, uc->uc_mcontext.sc_regs[7]);
++  st->cr();
++  st->print("T7=" ); print_location(st, uc->uc_mcontext.sc_regs[8]);
++  st->print("S0=" ); print_location(st, uc->uc_mcontext.sc_regs[9]);
++  st->print("S1=" ); print_location(st, uc->uc_mcontext.sc_regs[10]);
++  st->print("S2=" ); print_location(st, uc->uc_mcontext.sc_regs[11]);
++  st->cr();
++  st->print("S3=" ); print_location(st, uc->uc_mcontext.sc_regs[12]);
++  st->print("S4=" ); print_location(st, uc->uc_mcontext.sc_regs[13]);
++  st->print("S5=" ); print_location(st, uc->uc_mcontext.sc_regs[14]);
++  st->print("FP=" ); print_location(st, uc->uc_mcontext.sc_regs[15]);
++  st->cr();
++  st->print("A0=" ); print_location(st, uc->uc_mcontext.sc_regs[16]);
++  st->print("A1=" ); print_location(st, uc->uc_mcontext.sc_regs[17]);
++  st->print("A2=" ); print_location(st, uc->uc_mcontext.sc_regs[18]);
++  st->print("A3=" ); print_location(st, uc->uc_mcontext.sc_regs[19]);
++  st->cr();
++  st->print("A4=" ); print_location(st, uc->uc_mcontext.sc_regs[20]);
++  st->print("A5=" ); print_location(st, uc->uc_mcontext.sc_regs[21]);
++  st->print("T8=" ); print_location(st, uc->uc_mcontext.sc_regs[22]);
++  st->print("T9=" ); print_location(st, uc->uc_mcontext.sc_regs[23]);
++  st->cr();
++  st->print("T10=" ); print_location(st, uc->uc_mcontext.sc_regs[24]);
++  st->print("T11=" ); print_location(st, uc->uc_mcontext.sc_regs[25]);
++  st->print("RA=" ); print_location(st, uc->uc_mcontext.sc_regs[26]);
++  st->print("T12=" ); print_location(st, uc->uc_mcontext.sc_regs[27]);
++  st->cr();
++  st->print("AT=" ); print_location(st, uc->uc_mcontext.sc_regs[28]);
++  st->print("GP=" ); print_location(st, uc->uc_mcontext.sc_regs[29]);
++  st->print("SP=" ); print_location(st, uc->uc_mcontext.sc_regs[30]);
++  st->print("R0=" ); print_location(st, uc->uc_mcontext.sc_regs[31]);
++  st->cr();
++}
++
++void os::setup_fpu() {
++  /*
++  //no use for MIPS
++  int fcsr;
++  address fpu_cntrl = StubRoutines::addr_fpu_cntrl_wrd_std();
++  __asm__ __volatile__ (
++      ".set noat;"
++      "cfc1 %0, $31;"
++      "sw   %0, 0(%1);"
++      : "=r" (fcsr)
++      : "r" (fpu_cntrl)
++      : "memory"
++  );
++  printf("fpu_cntrl:  %lx\n", fpu_cntrl);
++  */
++}
++
++#ifndef PRODUCT
++void os::verify_stack_alignment() {
++  //warning("TODO:os::verify_stack_alignment, check jzy");
++  //assert(((intptr_t)os::current_stack_pointer() & (StackAlignmentInBytes-1)) == 0, "incorrect stack alignment");
++}
++#endif
++
++int os::extra_bang_size_in_bytes() {
++  // sw64 does not require the additional stack bang.
++  //warning("TODO:os::extra_bang_size_in_bytes, check lsp");
++  return 0;
++}
++
++extern "C" int SpinPause() {return 0;}
+diff --git a/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.hpp
+new file mode 100644
+index 0000000000..0181eddfb1
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.hpp
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 1999, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_HPP
++#define OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_HPP
++
++  static void setup_fpu();
++////  static bool supports_sse();
++////
++////  static jlong rdtsc();
++
++  static bool is_allocatable(size_t bytes);
++  static intptr_t *get_previous_fp();
++  static address   ucontext_get_ra(const ucontext_t* uc);
++
++  // Used to register dynamic code cache area with the OS
++  // Note: Currently only used in 64 bit Windows implementations
++  static bool register_code_area(char *low, char *high) { return true; }
++
++////  // Atomically copy 64 bits of data
++////  static void atomic_copy64(const volatile void *src, volatile void *dst) {
++////    *(jlong *) dst = *(const jlong *) src;
++////  }
++
++#endif // OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.inline.hpp b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.inline.hpp
+new file mode 100644
+index 0000000000..9ca12eca3b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/os_linux_sw64.inline.hpp
+@@ -0,0 +1,44 @@
++/*
++ * Copyright (c) 2011, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_INLINE_HPP
++#define OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_INLINE_HPP
++
++#include "runtime/os.hpp"
++
++////// See http://www.technovelty.org/code/c/reading-rdtsc.htl for details
++////inline jlong os::rdtsc() {
++////#if 0
++////  uint64_t res;
++////  uint32_t ts1, ts2;
++////  __asm__ __volatile__ ("rdtsc" : "=a" (ts1), "=d" (ts2));
++////  res = ((uint64_t)ts1 | (uint64_t)ts2 << 32);
++////  return (jlong)res;
++////#else
++////  return (jlong)0;
++////#endif
++////}
++
++#endif // OS_CPU_LINUX_SW64_VM_OS_LINUX_SW64_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/prefetch_linux_sw64.inline.hpp b/src/hotspot/os_cpu/linux_sw64/prefetch_linux_sw64.inline.hpp
+new file mode 100644
+index 0000000000..87426c4fca
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/prefetch_linux_sw64.inline.hpp
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_PREFETCH_LINUX_SW64_INLINE_HPP
++#define OS_CPU_LINUX_SW64_VM_PREFETCH_LINUX_SW64_INLINE_HPP
++
++#include "runtime/prefetch.hpp"
++
++
++inline void Prefetch::read (void *loc, intx interval) {
++    if (interval >= 0)
++            __asm__ __volatile__ (
++            "        fillcs  0(%0) \n"
++            :
++            : "r" ( ((address)loc) +((long)interval) )
++            : "memory"
++            );
++}
++
++inline void Prefetch::write(void *loc, intx interval) {
++    if (interval >= 0)
++            __asm__ __volatile__ (
++            "        fillde  0(%0) \n"
++            :
++            : "r" ( ((address)loc) +((long)interval) )
++            : "memory"
++            );
++}
++
++#endif // OS_CPU_LINUX_SW64_VM_PREFETCH_LINUX_SW64_INLINE_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.cpp
+new file mode 100644
+index 0000000000..a1282d053b
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.cpp
+@@ -0,0 +1,117 @@
++/*
++ * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "memory/metaspaceShared.hpp"
++#include "runtime/frame.inline.hpp"
++#include "runtime/thread.inline.hpp"
++
++void JavaThread::pd_initialize()
++{
++    _anchor.clear();
++
++    // A non-existing address as error detector
++//    if (CompileBroker::get_compilation_id() > 0)
++//        _handle_wrong_method_stub = (address)SharedRuntime::get_handle_wrong_method_stub();
++//    else
++//        _handle_wrong_method_stub = (address)0x2B2B2B;
++}
++
++frame JavaThread::pd_last_frame() {
++  assert(has_last_Java_frame(), "must have last_Java_sp() when suspended");
++  vmassert(_anchor.last_Java_pc() != NULL, "not walkable");
++  return frame(_anchor.last_Java_sp(), _anchor.last_Java_fp(), _anchor.last_Java_pc());
++}
++
++// For Forte Analyzer AsyncGetCallTrace profiling support - thread is
++// currently interrupted by SIGPROF
++bool JavaThread::pd_get_top_frame_for_signal_handler(frame* fr_addr,
++  void* ucontext, bool isInJava) {
++
++  assert(Thread::current() == this, "caller must be current thread");
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++  
++bool JavaThread::pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava) {
++  return pd_get_top_frame(fr_addr, ucontext, isInJava);
++}
++
++bool JavaThread::pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava) {
++  assert(this->is_Java_thread(), "must be JavaThread");
++  JavaThread* jt = (JavaThread *)this;
++
++  // If we have a last_Java_frame, then we should use it even if
++  // isInJava == true.  It should be more reliable than ucontext info.
++  if (jt->has_last_Java_frame() && jt->frame_anchor()->walkable()) {
++    *fr_addr = jt->pd_last_frame();
++    return true;
++  }
++
++  // At this point, we don't have a last_Java_frame, so
++  // we try to glean some information out of the ucontext
++  // if we were running Java code when SIGPROF came in.
++  if (isInJava) {
++    ucontext_t* uc = (ucontext_t*) ucontext;
++
++    intptr_t* ret_fp;
++    intptr_t* ret_sp;
++    ExtendedPC addr = os::Linux::fetch_frame_from_ucontext(this, uc,
++      &ret_sp, &ret_fp);
++    if (addr.pc() == NULL || ret_sp == NULL ) {
++      // ucontext wasn't useful
++      return false;
++    }
++
++    if (MetaspaceShared::is_in_trampoline_frame(addr.pc())) {
++      // In the middle of a trampoline call. Bail out for safety.
++      // This happens rarely so shouldn't affect profiling.
++      return false;
++    }
++
++    frame ret_frame(ret_sp, ret_fp, addr.pc());
++    if (!ret_frame.safe_for_sender(jt)) {
++#if COMPILER2_OR_JVMCI
++      // C2 and JVMCI use ebp as a general register see if NULL fp helps
++      frame ret_frame2(ret_sp, NULL, addr.pc());
++      if (!ret_frame2.safe_for_sender(jt)) {
++        // nothing else to try if the frame isn't good
++        return false;
++      }
++      ret_frame = ret_frame2;
++#else
++      // nothing else to try if the frame isn't good
++      return false;
++#endif // COMPILER2_OR_JVMCI
++    }
++    *fr_addr = ret_frame;
++    return true;
++  }
++
++  // nothing else to try
++  return false;
++}
++
++void JavaThread::cache_global_variables() { }
++
+diff --git a/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.hpp
+new file mode 100644
+index 0000000000..d0047fba96
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/thread_linux_sw64.hpp
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_THREAD_LINUX_SW64_HPP
++#define OS_CPU_LINUX_SW64_VM_THREAD_LINUX_SW64_HPP
++
++ private:
++  void pd_initialize();
++
++  frame pd_last_frame();
++
++ public:
++  // Mutators are highly dangerous....
++  intptr_t* last_Java_fp()                       { return _anchor.last_Java_fp(); }
++  void  set_last_Java_fp(intptr_t* fp)           { _anchor.set_last_Java_fp(fp);   }
++
++  void set_base_of_stack_pointer(intptr_t* base_sp) {
++  }
++
++  static ByteSize last_Java_fp_offset()          {
++    return byte_offset_of(JavaThread, _anchor) + JavaFrameAnchor::last_Java_fp_offset();
++  }
++
++  intptr_t* base_of_stack_pointer() {
++    return NULL;
++  }
++  void record_base_of_stack_pointer() {
++  }
++
++  bool pd_get_top_frame_for_signal_handler(frame* fr_addr, void* ucontext,
++    bool isInJava);
++
++  bool pd_get_top_frame_for_profiling(frame* fr_addr, void* ucontext, bool isInJava);
++private:
++  bool pd_get_top_frame(frame* fr_addr, void* ucontext, bool isInJava);
++public:
++
++  // These routines are only used on cpu architectures that
++  // have separate register stacks (Itanium).
++  static bool register_stack_overflow() { return false; }
++  static void enable_register_stack_guard() {}
++  static void disable_register_stack_guard() {}
++
++  // For convenient implementation of NativeGeneralJump::replace_mt_safe()
++  volatile address _handle_wrong_method_stub;
++  static ByteSize handle_wrong_method_stub_offset()          { return byte_offset_of(JavaThread, _handle_wrong_method_stub); }
++//  void set_handle_wrong_method_stub(address stub)          { _handle_wrong_method_stub = stub; }
++
++#endif // OS_CPU_LINUX_SW64_VM_THREAD_LINUX_SW64_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/vmStructs_linux_sw64.hpp b/src/hotspot/os_cpu/linux_sw64/vmStructs_linux_sw64.hpp
+new file mode 100644
+index 0000000000..310c09e216
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/vmStructs_linux_sw64.hpp
+@@ -0,0 +1,55 @@
++/*
++ * Copyright (c) 2000, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2014, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#ifndef OS_CPU_LINUX_SW64_VM_VMSTRUCTS_LINUX_SW64_HPP
++#define OS_CPU_LINUX_SW64_VM_VMSTRUCTS_LINUX_SW64_HPP
++
++// These are the OS and CPU-specific fields, types and integer
++// constants required by the Serviceability Agent. This file is
++// referenced by vmStructs.cpp.
++
++#define VM_STRUCTS_OS_CPU(nonstatic_field, static_field, unchecked_nonstatic_field, volatile_nonstatic_field, nonproduct_nonstatic_field, c2_nonstatic_field, unchecked_c1_static_field, unchecked_c2_static_field) \
++                                                                                                                                     \
++  /******************************/                                                                                                   \
++  /* Threads (NOTE: incomplete) */                                                                                                   \
++  /******************************/                                                                                                   \
++  nonstatic_field(OSThread,                      _thread_id,                                      OSThread::thread_id_t)             \
++  nonstatic_field(OSThread,                      _pthread_id,                                     pthread_t)
++
++
++#define VM_TYPES_OS_CPU(declare_type, declare_toplevel_type, declare_oop_type, declare_integer_type, declare_unsigned_integer_type, declare_c1_toplevel_type, declare_c2_type, declare_c2_toplevel_type) \
++                                                                          \
++  /**********************/                                                \
++  /* Posix Thread IDs   */                                                \
++  /**********************/                                                \
++                                                                          \
++  declare_integer_type(OSThread::thread_id_t)                             \
++  declare_unsigned_integer_type(pthread_t)
++
++#define VM_INT_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#define VM_LONG_CONSTANTS_OS_CPU(declare_constant, declare_preprocessor_constant, declare_c1_constant, declare_c2_constant, declare_c2_preprocessor_constant)
++
++#endif // OS_CPU_LINUX_SW64_VM_VMSTRUCTS_LINUX_SW64_HPP
+diff --git a/src/hotspot/os_cpu/linux_sw64/vm_version_linux_sw64.cpp b/src/hotspot/os_cpu/linux_sw64/vm_version_linux_sw64.cpp
+new file mode 100644
+index 0000000000..80ee00d56f
+--- /dev/null
++++ b/src/hotspot/os_cpu/linux_sw64/vm_version_linux_sw64.cpp
+@@ -0,0 +1,118 @@
++/*
++ * Copyright (c) 2006, 2019, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++#include "precompiled.hpp"
++#include "runtime/os.hpp"
++#include "runtime/vm_version.hpp"
++
++#define CPU_FAMILY_AMOUNT 9
++
++const char cpuinfo[CPU_FAMILY_AMOUNT][30] = {
++  "not-sw", // 0
++  "sw410",  // 1
++  "sw4a",   // 2
++  "sw6a",   // 3
++  "sw6b",   // 4
++  "sw1621", // 5
++  "sw421",  // 6
++  "sw3231", // 7
++  "h8000", // 8 WX-H8000 for 8A
++};
++
++void read_cpu_info(const char *path, char *result) {
++  FILE *ptr;
++  char buf[1024];
++  int i = 0;
++  if((ptr=fopen(path, "r")) != NULL) {
++    while(fgets(buf, 1024, ptr)!=NULL) {
++      strcat(result,buf);
++      i++;
++      if (i == 10) break;
++    }
++    fclose(ptr);
++  } else {
++    tty->print_cr("fopen %s error\n", path);
++  }
++}
++
++void strlwr(char *str){
++  for (; *str!='\0'; str++)
++    *str = tolower(*str);
++}
++
++int VM_Version::platform_features(int features) {
++  char res[10240];
++  int i;
++  features = spt_16k_page_m; //default support
++  memset(res, '\0', 10240 * sizeof(char));
++  read_cpu_info("/proc/cpuinfo", res);
++  // res is converted to lower case
++  strlwr(res);
++  for (i = 1; i < CPU_FAMILY_AMOUNT; i++) {
++    if (strstr(res, cpuinfo[i])) {
++      break;
++    }
++  }
++  //add some other support when detected on shenwei
++  if (i != CPU_FAMILY_AMOUNT) {
++    features |= with_sw_support_m;
++  }
++  switch (i % CPU_FAMILY_AMOUNT) {
++    case 1 :
++      features |= sw2f_m;
++      //tty->print_cr("sw2f platform");
++      break;
++    case 2 :
++      features |= sw4a_m;
++      //tty->print_cr("sw4a platform");
++      break;
++    case 3 :
++      features |= sw6a_m;
++      //tty->print_cr("sw6a platform");
++      break;
++    case 4 :
++      features |= sw6b_m;
++      //tty->print_cr("sw6b platform");
++      break;
++    case 5 :
++      features |= sw1621_m;
++      //tty->print_cr("sw6b platform");
++      break;
++    case 6 :
++      features |= sw4a_m;
++      //tty->print_cr("sw6b platform");
++      break;
++    case 7 :
++      features |= sw3231_m;
++      break;
++    case 8 :
++      features |= wx_h8000_m;
++      break;
++    default:
++      ;
++      //tty->print_cr("cpu not support, the cpuinfo is: %s", res);
++      //ShouldNotReachHere();
++  }
++  return features;
++}
+diff --git a/src/hotspot/share/asm/assembler.hpp b/src/hotspot/share/asm/assembler.hpp
+index 37bf331e91..6fc66e468f 100644
+--- a/src/hotspot/share/asm/assembler.hpp
++++ b/src/hotspot/share/asm/assembler.hpp
+@@ -300,6 +300,9 @@ class AbstractAssembler : public ResourceObj  {
+   static bool is_simm9(int64_t x) { return is_simm(x, 9); }
+   static bool is_simm10(int64_t x) { return is_simm(x, 10); }
+   static bool is_simm16(int64_t x) { return is_simm(x, 16); }
++#ifdef SW64
++  static bool is_simm21(int64_t x) { return is_simm(x, 21); }
++#endif
+   static bool is_simm32(int64_t x) { return is_simm(x, 32); }
+ 
+   // Test if x is within unsigned immediate range for width.
+diff --git a/src/hotspot/share/asm/codeBuffer.cpp b/src/hotspot/share/asm/codeBuffer.cpp
+index 65b414a38c..0f11b0cf40 100644
+--- a/src/hotspot/share/asm/codeBuffer.cpp
++++ b/src/hotspot/share/asm/codeBuffer.cpp
+@@ -1190,7 +1190,11 @@ void CodeStrings::free() {
+ 
+ const char* CodeStrings::add_string(const char * string) {
+   check_valid();
++#ifdef SW64  
++  CodeString* s = new CodeString(string, 0);
++#else
+   CodeString* s = new CodeString(string);
++#endif  
+   s->set_next(_strings);
+   if (_strings == NULL) {
+     _strings_last = s;
+diff --git a/src/hotspot/share/asm/register.hpp b/src/hotspot/share/asm/register.hpp
+index 3c71d94cc6..37a3ce71d3 100644
+--- a/src/hotspot/share/asm/register.hpp
++++ b/src/hotspot/share/asm/register.hpp
+@@ -103,7 +103,7 @@ inline void assert_different_registers(
+   AbstractRegister a,
+   AbstractRegister b
+ ) {
+-  assert(
++  guarantee(
+     a != b,
+     "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT "", p2i(a), p2i(b)
+   );
+@@ -115,7 +115,7 @@ inline void assert_different_registers(
+   AbstractRegister b,
+   AbstractRegister c
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c
+            && b != c,
+     "registers must be different: a=" INTPTR_FORMAT ", b=" INTPTR_FORMAT
+@@ -131,7 +131,7 @@ inline void assert_different_registers(
+   AbstractRegister c,
+   AbstractRegister d
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d
+            && b != c && b != d
+                      && c != d,
+@@ -149,7 +149,7 @@ inline void assert_different_registers(
+   AbstractRegister d,
+   AbstractRegister e
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e
+            && b != c && b != d && b != e
+                      && c != d && c != e
+@@ -169,7 +169,7 @@ inline void assert_different_registers(
+   AbstractRegister e,
+   AbstractRegister f
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e && a != f
+            && b != c && b != d && b != e && b != f
+                      && c != d && c != e && c != f
+@@ -192,7 +192,7 @@ inline void assert_different_registers(
+   AbstractRegister f,
+   AbstractRegister g
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e && a != f && a != g
+            && b != c && b != d && b != e && b != f && b != g
+                      && c != d && c != e && c != f && c != g
+@@ -217,7 +217,7 @@ inline void assert_different_registers(
+   AbstractRegister g,
+   AbstractRegister h
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e && a != f && a != g && a != h
+            && b != c && b != d && b != e && b != f && b != g && b != h
+                      && c != d && c != e && c != f && c != g && c != h
+@@ -244,7 +244,7 @@ inline void assert_different_registers(
+   AbstractRegister h,
+   AbstractRegister i
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i
+            && b != c && b != d && b != e && b != f && b != g && b != h && b != i
+                      && c != d && c != e && c != f && c != g && c != h && c != i
+@@ -273,7 +273,7 @@ inline void assert_different_registers(
+   AbstractRegister i,
+   AbstractRegister j
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j
+            && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j
+                      && c != d && c != e && c != f && c != g && c != h && c != i && c != j
+@@ -304,7 +304,7 @@ inline void assert_different_registers(
+   AbstractRegister j,
+   AbstractRegister k
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k
+            && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k
+                      && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k
+@@ -337,7 +337,7 @@ inline void assert_different_registers(
+   AbstractRegister k,
+   AbstractRegister l
+ ) {
+-  assert(
++  guarantee(
+     a != b && a != c && a != d && a != e && a != f && a != g && a != h && a != i && a != j && a !=k && a !=l
+            && b != c && b != d && b != e && b != f && b != g && b != h && b != i && b != j && b !=k && b !=l
+                      && c != d && c != e && c != f && c != g && c != h && c != i && c != j && c !=k && c !=l
+diff --git a/src/hotspot/share/c1/c1_CodeStubs.hpp b/src/hotspot/share/c1/c1_CodeStubs.hpp
+index c935edd98c..0e5563209f 100644
+--- a/src/hotspot/share/c1/c1_CodeStubs.hpp
++++ b/src/hotspot/share/c1/c1_CodeStubs.hpp
+@@ -422,7 +422,7 @@ class PatchingStub: public CodeStub {
+       NativeMovRegMem* n_move = nativeMovRegMem_at(pc_start());
+       n_move->set_offset(field_offset);
+       // Copy will never get executed, so only copy the part which is required for patching.
+-      _bytes_to_copy = MAX2(n_move->num_bytes_to_end_of_patch(), (int)NativeGeneralJump::instruction_size);
++////      _bytes_to_copy = MAX2(n_move->num_bytes_to_end_of_patch(), (int)NativeGeneralJump::instruction_size); //DJX need fix
+     } else if (_id == load_klass_id || _id == load_mirror_id || _id == load_appendix_id) {
+       assert(_obj != noreg, "must have register object for load_klass/load_mirror");
+ #ifdef ASSERT
+diff --git a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp
+index 6ac1689bca..7e470116f1 100644
+--- a/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp
++++ b/src/hotspot/share/gc/g1/g1BlockOffsetTable.inline.hpp
+@@ -76,8 +76,17 @@ void G1BlockOffsetTable::set_offset_array(size_t left, size_t right, u_char offs
+   check_index(right, "right index out of range");
+   assert(left <= right, "indexes out of order");
+   size_t num_cards = right - left + 1;
++#ifndef SW64   //TODO check liangsp
+   memset_with_concurrent_readers
+     (const_cast<u_char*> (&_offset_array[left]), offset, num_cards);
++#else
++  size_t i = left;
++  const size_t end = i + num_cards;
++  for (; i < end; i++) {
++    _offset_array[i] = offset;
++  }
++#endif
++
+ }
+ 
+ // Variant of index_for that does not check the index for validity.
+diff --git a/src/hotspot/share/gc/g1/g1CardTable.cpp b/src/hotspot/share/gc/g1/g1CardTable.cpp
+index b66ddd28a5..9f56c9ed38 100644
+--- a/src/hotspot/share/gc/g1/g1CardTable.cpp
++++ b/src/hotspot/share/gc/g1/g1CardTable.cpp
+@@ -56,7 +56,10 @@ void G1CardTable::g1_mark_as_young(const MemRegion& mr) {
+   jbyte *const first = byte_for(mr.start());
+   jbyte *const last = byte_after(mr.last());
+ 
+-  memset_with_concurrent_readers(first, g1_young_gen, last - first);
++//  memset_with_concurrent_readers(first, g1_young_gen, last - first);
++    for (jbyte* i = first; i < last; i++) {
++        *i = g1_young_gen;
++    }
+ }
+ 
+ #ifndef PRODUCT
+diff --git a/src/hotspot/share/interpreter/abstractInterpreter.cpp b/src/hotspot/share/interpreter/abstractInterpreter.cpp
+index 30ef9ed2d7..b00daef6d1 100644
+--- a/src/hotspot/share/interpreter/abstractInterpreter.cpp
++++ b/src/hotspot/share/interpreter/abstractInterpreter.cpp
+@@ -180,8 +180,8 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(const methodHan
+     case vmIntrinsics::_dlog10: return java_lang_math_log10;
+     case vmIntrinsics::_dpow  : return java_lang_math_pow  ;
+     case vmIntrinsics::_dexp  : return java_lang_math_exp  ;
+-    case vmIntrinsics::_fmaD  : return java_lang_math_fmaD ;
+-    case vmIntrinsics::_fmaF  : return java_lang_math_fmaF ;
++////    case vmIntrinsics::_fmaD  : return java_lang_math_fmaD ;
++////    case vmIntrinsics::_fmaF  : return java_lang_math_fmaF ;
+ 
+     case vmIntrinsics::_Reference_get
+                               : return java_lang_ref_reference_get;
+@@ -276,8 +276,8 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) {
+     case java_lang_math_sqrt    : tty->print("java_lang_math_sqrt"    ); break;
+     case java_lang_math_log     : tty->print("java_lang_math_log"     ); break;
+     case java_lang_math_log10   : tty->print("java_lang_math_log10"   ); break;
+-    case java_lang_math_fmaD    : tty->print("java_lang_math_fmaD"    ); break;
+-    case java_lang_math_fmaF    : tty->print("java_lang_math_fmaF"    ); break;
++////    case java_lang_math_fmaD    : tty->print("java_lang_math_fmaD"    ); break;
++////    case java_lang_math_fmaF    : tty->print("java_lang_math_fmaF"    ); break;
+     case java_util_zip_CRC32_update           : tty->print("java_util_zip_CRC32_update"); break;
+     case java_util_zip_CRC32_updateBytes      : tty->print("java_util_zip_CRC32_updateBytes"); break;
+     case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break;
+diff --git a/src/hotspot/share/interpreter/abstractInterpreter.hpp b/src/hotspot/share/interpreter/abstractInterpreter.hpp
+index 55d6ad5beb..bbbc96ab05 100644
+--- a/src/hotspot/share/interpreter/abstractInterpreter.hpp
++++ b/src/hotspot/share/interpreter/abstractInterpreter.hpp
+@@ -253,7 +253,7 @@ class AbstractInterpreter: AllStatic {
+     return stackElementWords * i;
+   }
+ 
+-#if !defined(ZERO) && (defined(IA32) || defined(AMD64))
++#if !defined(ZERO) && (defined(IA32) || defined(AMD64)) || defined(SW64)
+   static Address::ScaleFactor stackElementScale() {
+     return NOT_LP64(Address::times_4) LP64_ONLY(Address::times_8);
+   }
+diff --git a/src/hotspot/share/interpreter/bytecodeTracer.cpp b/src/hotspot/share/interpreter/bytecodeTracer.cpp
+index d4058e53ef..c398c07a64 100644
+--- a/src/hotspot/share/interpreter/bytecodeTracer.cpp
++++ b/src/hotspot/share/interpreter/bytecodeTracer.cpp
+@@ -169,7 +169,7 @@ BytecodeClosure* BytecodeTracer::std_closure() {
+ 
+ 
+ void BytecodeTracer::trace(const methodHandle& method, address bcp, uintptr_t tos, uintptr_t tos2, outputStream* st) {
+-  if (TraceBytecodes && BytecodeCounter::counter_value() >= TraceBytecodesAt) {
++  if (TraceBytecodes && BytecodeCounter::counter_value() >= TraceBytecodesAt && !TraceBytecodesStubNoPrint) {
+     ttyLocker ttyl;  // 5065316: keep the following output coherent
+     // The ttyLocker also prevents races between two threads
+     // trying to use the single instance of BytecodePrinter.
+diff --git a/src/hotspot/share/interpreter/interpreterRuntime.cpp b/src/hotspot/share/interpreter/interpreterRuntime.cpp
+index 6483159136..5dc155993e 100644
+--- a/src/hotspot/share/interpreter/interpreterRuntime.cpp
++++ b/src/hotspot/share/interpreter/interpreterRuntime.cpp
+@@ -1497,7 +1497,7 @@ IRT_ENTRY(void, InterpreterRuntime::prepare_native_call(JavaThread* thread, Meth
+   // preparing the same method will be sure to see non-null entry & mirror.
+ IRT_END
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(SW64)
+ IRT_LEAF(void, InterpreterRuntime::popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address))
+   if (src_address == dest_address) {
+     return;
+diff --git a/src/hotspot/share/interpreter/interpreterRuntime.hpp b/src/hotspot/share/interpreter/interpreterRuntime.hpp
+index 87e84c893f..488c62be3e 100644
+--- a/src/hotspot/share/interpreter/interpreterRuntime.hpp
++++ b/src/hotspot/share/interpreter/interpreterRuntime.hpp
+@@ -146,7 +146,7 @@ class InterpreterRuntime: AllStatic {
+                                         Method* method,
+                                         intptr_t* from, intptr_t* to);
+ 
+-#if defined(IA32) || defined(AMD64) || defined(ARM)
++#if defined(IA32) || defined(AMD64) || defined(ARM) || defined(SW64)
+   // Popframe support (only needed on x86, AMD64 and ARM)
+   static void popframe_move_outgoing_args(JavaThread* thread, void* src_address, void* dest_address);
+ #endif
+diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp
+index 4535fe0741..7e725e257c 100644
+--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp
++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.cpp
+@@ -204,8 +204,8 @@ void TemplateInterpreterGenerator::generate_all() {
+   method_entry(java_lang_math_log10)
+   method_entry(java_lang_math_exp  )
+   method_entry(java_lang_math_pow  )
+-  method_entry(java_lang_math_fmaF )
+-  method_entry(java_lang_math_fmaD )
++////  method_entry(java_lang_math_fmaF )
++////  method_entry(java_lang_math_fmaD )
+   method_entry(java_lang_ref_reference_get)
+ 
+   AbstractInterpreter::initialize_method_handle_entries();
+diff --git a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
+index 965f6b0d10..9a4d949836 100644
+--- a/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
++++ b/src/hotspot/share/interpreter/templateInterpreterGenerator.hpp
+@@ -118,6 +118,10 @@ class TemplateInterpreterGenerator: public AbstractInterpreterGenerator {
+   void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
+ #endif // AARCH64
+ 
++#ifdef SW64
++  void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpargs);
++#endif // SW64
++
+ #ifdef PPC
+   void lock_method(Register Rflags, Register Rscratch1, Register Rscratch2, bool flags_preloaded=false);
+   void generate_fixed_frame(bool native_call, Register Rsize_of_parameters, Register Rsize_of_locals);
+diff --git a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+index e01a242a57..d17e03002d 100644
+--- a/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
++++ b/src/hotspot/share/jfr/utilities/jfrBigEndian.hpp
+@@ -102,7 +102,7 @@ inline T JfrBigEndian::read_unaligned(const address location) {
+ inline bool JfrBigEndian::platform_supports_unaligned_reads(void) {
+ #if defined(IA32) || defined(AMD64) || defined(PPC) || defined(S390)
+   return true;
+-#elif defined(SPARC) || defined(ARM) || defined(AARCH64)
++#elif defined(SPARC) || defined(ARM) || defined(AARCH64) || defined(SW64)
+   return false;
+ #else
+   #warning "Unconfigured platform"
+diff --git a/src/hotspot/share/oops/method.cpp b/src/hotspot/share/oops/method.cpp
+index 9fa79d4488..778e18f8ff 100644
+--- a/src/hotspot/share/oops/method.cpp
++++ b/src/hotspot/share/oops/method.cpp
+@@ -1504,9 +1504,11 @@ vmSymbols::SID Method::klass_id_for_intrinsics(const Klass* holder) {
+   // exception: the AES intrinsics come from lib/ext/sunjce_provider.jar
+   // which does not use the class default class loader so we check for its loader here
+   const InstanceKlass* ik = InstanceKlass::cast(holder);
++#ifndef SW64
+   if ((ik->class_loader() != NULL) && !SystemDictionary::is_platform_class_loader(ik->class_loader())) {
+     return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
+   }
++#endif
+ 
+   // see if the klass name is well-known:
+   Symbol* klass_name = ik->name();
+diff --git a/src/hotspot/share/runtime/globals.hpp b/src/hotspot/share/runtime/globals.hpp
+index ad31842086..2c8f182b41 100644
+--- a/src/hotspot/share/runtime/globals.hpp
++++ b/src/hotspot/share/runtime/globals.hpp
+@@ -1020,7 +1020,8 @@ define_pd_global(uint64_t,MaxRAM,                    1ULL*G);
+                                                                             \
+   develop(bool, TraceBytecodes, false,                                      \
+           "Trace bytecode execution")                                       \
+-                                                                            \
++  develop(bool, TraceBytecodesStubNoPrint, false,                           \
++          "Trace bytecode stub, just for debug")                            \
+   develop(bool, TraceICs, false,                                            \
+           "Trace inline cache changes")                                     \
+                                                                             \
+diff --git a/src/hotspot/share/runtime/safepointMechanism.cpp b/src/hotspot/share/runtime/safepointMechanism.cpp
+index 075f07e9f6..ea5462fc05 100644
+--- a/src/hotspot/share/runtime/safepointMechanism.cpp
++++ b/src/hotspot/share/runtime/safepointMechanism.cpp
+@@ -29,7 +29,7 @@
+ #include "runtime/safepointMechanism.inline.hpp"
+ #include "services/memTracker.hpp"
+ #include "utilities/globalDefinitions.hpp"
+-
++#include <sys/mman.h>
+ SafepointMechanism::PollingType SafepointMechanism::_polling_type = SafepointMechanism::_global_page_poll;
+ void* SafepointMechanism::_poll_armed_value;
+ void* SafepointMechanism::_poll_disarmed_value;
+@@ -73,11 +73,16 @@ void SafepointMechanism::default_initialize() {
+     _poll_disarmed_value = reinterpret_cast<void*>(poll_disarmed_value);
+   } else {
+     const size_t page_size = os::vm_page_size();
++#ifdef OPT_SAFEPOINT
++      void * p = (void *)(0x10000);
++      address polling_page = (address) ::mmap(p, page_size, PROT_READ, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
++#else
+     char* polling_page = os::reserve_memory(page_size, NULL, page_size);
+     os::commit_memory_or_exit(polling_page, page_size, false, "Unable to commit Safepoint polling page");
+     os::protect_memory(polling_page, page_size, os::MEM_PROT_READ);
++#endif
+     MemTracker::record_virtual_memory_type((address)polling_page, mtSafepoint);
+-
++    guarantee( polling_page != MAP_FAILED, "os::init_2: failed to allocate polling page" );
+     log_info(os)("SafePoint Polling address: " INTPTR_FORMAT, p2i(polling_page));
+     os::set_polling_page((address)(polling_page));
+   }
+diff --git a/src/hotspot/share/runtime/sharedRuntime.cpp b/src/hotspot/share/runtime/sharedRuntime.cpp
+index 595ff7495a..b9bb59e2f1 100644
+--- a/src/hotspot/share/runtime/sharedRuntime.cpp
++++ b/src/hotspot/share/runtime/sharedRuntime.cpp
+@@ -211,6 +211,26 @@ JRT_LEAF(jlong, SharedRuntime::lmul(jlong y, jlong x))
+ JRT_END
+ 
+ 
++#ifdef SW64  //ZHJ
++JRT_LEAF(jint, SharedRuntime::sdiv(jint y, jint x))
++  if (x == min_jint && y == CONST64(-1)) {
++    return x;
++  } else {
++    return x / y;
++  }
++JRT_END
++
++
++JRT_LEAF(jint, SharedRuntime::srem(jint y, jint x))
++  if (x == min_jint && y == CONST64(-1)) {
++    return 0;
++  } else {
++    return x % y;
++  }
++JRT_END
++#endif
++
++
+ JRT_LEAF(jlong, SharedRuntime::ldiv(jlong y, jlong x))
+   if (x == min_jlong && y == CONST64(-1)) {
+     return x;
+@@ -2550,7 +2570,7 @@ class AdapterHandlerTableIterator : public StackObj {
+ // Implementation of AdapterHandlerLibrary
+ AdapterHandlerTable* AdapterHandlerLibrary::_adapters = NULL;
+ AdapterHandlerEntry* AdapterHandlerLibrary::_abstract_method_handler = NULL;
+-const int AdapterHandlerLibrary_size = 16*K;
++const int AdapterHandlerLibrary_size = NOT_SW64(16*K)SW64_ONLY(46*K);
+ BufferBlob* AdapterHandlerLibrary::_buffer = NULL;
+ 
+ BufferBlob* AdapterHandlerLibrary::buffer_blob() {
+diff --git a/src/hotspot/share/runtime/sharedRuntime.hpp b/src/hotspot/share/runtime/sharedRuntime.hpp
+index a110098376..3bce87f92e 100644
+--- a/src/hotspot/share/runtime/sharedRuntime.hpp
++++ b/src/hotspot/share/runtime/sharedRuntime.hpp
+@@ -92,6 +92,12 @@ class SharedRuntime: AllStatic {
+   // not have machine instructions to implement their functionality.
+   // Do not remove these.
+ 
++#ifdef SW64
++  static jint    sdiv(jint y, jint x);
++  static jint    srem(jint y, jint x); 
++  static unsigned int updateBytesCRC32(unsigned long crc, const unsigned char *buf_bytes, unsigned int len_ints);
++#endif
++
+   // long arithmetics
+   static jlong   lmul(jlong y, jlong x);
+   static jlong   ldiv(jlong y, jlong x);
+diff --git a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
+index e086f794cd..c39caf705d 100644
+--- a/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
++++ b/src/hotspot/share/runtime/sharedRuntimeTrig.cpp
+@@ -496,29 +496,29 @@ static int __ieee754_rem_pio2(double x, double *y) {
+  *      3. sin(x) is approximated by a polynomial of degree 13 on
+  *         [0,pi/4]
+  *                               3            13
+- *              sin(x) ~ x + S1*x + ... + S6*x
++ *              sin(x) ~ x + SS1*x + ... + SS6*x
+  *         where
+  *
+  *      |sin(x)         2     4     6     8     10     12  |     -58
+- *      |----- - (1+S1*x +S2*x +S3*x +S4*x +S5*x  +S6*x   )| <= 2
++ *      |----- - (1+SS1*x +SS2*x +SS3*x +SS4*x +SS5*x  +SS6*x   )| <= 2
+  *      |  x                                               |
+  *
+  *      4. sin(x+y) = sin(x) + sin'(x')*y
+  *                  ~ sin(x) + (1-x*x/2)*y
+  *         For better accuracy, let
+  *                   3      2      2      2      2
+- *              r = x *(S2+x *(S3+x *(S4+x *(S5+x *S6))))
++ *              r = x *(SS2+x *(SS3+x *(SS4+x *(SS5+x *SS6))))
+  *         then                   3    2
+- *              sin(x) = x + (S1*x + (x *(r-y/2)+y))
++ *              sin(x) = x + (SS1*x + (x *(r-y/2)+y))
+  */
+ 
+ static const double
+-S1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
+-S2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
+-S3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
+-S4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
+-S5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
+-S6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
++SS1  = -1.66666666666666324348e-01, /* 0xBFC55555, 0x55555549 */
++SS2  =  8.33333333332248946124e-03, /* 0x3F811111, 0x1110F8A6 */
++SS3  = -1.98412698298579493134e-04, /* 0xBF2A01A0, 0x19C161D5 */
++SS4  =  2.75573137070700676789e-06, /* 0x3EC71DE3, 0x57B1FE7D */
++SS5  = -2.50507602534068634195e-08, /* 0xBE5AE5E6, 0x8A2B9CEB */
++SS6  =  1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
+ 
+ static double __kernel_sin(double x, double y, int iy)
+ {
+@@ -529,9 +529,9 @@ static double __kernel_sin(double x, double y, int iy)
+            {if((int)x==0) return x;}            /* generate inexact */
+         z       =  x*x;
+         v       =  z*x;
+-        r       =  S2+z*(S3+z*(S4+z*(S5+z*S6)));
+-        if(iy==0) return x+v*(S1+z*r);
+-        else      return x-((z*(half*y-v*r)-y)-v*S1);
++        r       =  SS2+z*(SS3+z*(SS4+z*(SS5+z*SS6)));
++        if(iy==0) return x+v*(SS1+z*r);
++        else      return x-((z*(half*y-v*r)-y)-v*SS1);
+ }
+ 
+ /*
+diff --git a/src/hotspot/share/runtime/stubRoutines.cpp b/src/hotspot/share/runtime/stubRoutines.cpp
+index afefcf666d..3f30e640dd 100644
+--- a/src/hotspot/share/runtime/stubRoutines.cpp
++++ b/src/hotspot/share/runtime/stubRoutines.cpp
+@@ -287,7 +287,7 @@ void StubRoutines::initialize2() {
+     assert(code_size2 == 0 || buffer.insts_remaining() > 200, "increase code_size2");
+   }
+ 
+-#ifdef ASSERT
++#ifdef ASSERT_TODO_need_check_jzy
+ 
+   MACOS_AARCH64_ONLY(os::current_thread_enable_wx(WXExec));
+ 
+@@ -339,9 +339,9 @@ void StubRoutines::initialize2() {
+     }                                                                                        \
+   }                                                                                          \
+ 
+-  TEST_FILL(jbyte);
+-  TEST_FILL(jshort);
+-  TEST_FILL(jint);
++//  TEST_FILL(jbyte);
++//  TEST_FILL(jshort);
++//  TEST_FILL(jint);
+ 
+ #undef TEST_FILL
+ 
+diff --git a/src/hotspot/share/runtime/thread.cpp b/src/hotspot/share/runtime/thread.cpp
+index d843651a4c..5b3d988962 100644
+--- a/src/hotspot/share/runtime/thread.cpp
++++ b/src/hotspot/share/runtime/thread.cpp
+@@ -3852,6 +3852,7 @@ jint Threads::create_vm(JavaVMInitArgs* args, bool* canTryAgain) {
+     return status;
+   }
+ 
++
+   JFR_ONLY(Jfr::on_create_vm_1();)
+ 
+   // Should be done after the heap is fully created
+diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+index 6605ab367c..d7dac7794e 100644
+--- a/src/hotspot/share/utilities/macros.hpp
++++ b/src/hotspot/share/utilities/macros.hpp
+@@ -599,8 +599,17 @@
+ #define NOT_AARCH64(code) code
+ #endif
+ 
++#ifdef SW64
++#define SW64_ONLY(code) code
++#define NOT_SW64(code)
++#else
++#define SW64_ONLY(code)
++#define NOT_SW64(code) code
++#endif
++
+ #define MACOS_AARCH64_ONLY(x) MACOS_ONLY(AARCH64_ONLY(x))
+ 
++
+ #ifdef VM_LITTLE_ENDIAN
+ #define LITTLE_ENDIAN_ONLY(code) code
+ #define BIG_ENDIAN_ONLY(code)
+diff --git a/src/hotspot/share/utilities/nativeCallStack.cpp b/src/hotspot/share/utilities/nativeCallStack.cpp
+index d9a2cbf40d..2c0dc318cc 100644
+--- a/src/hotspot/share/utilities/nativeCallStack.cpp
++++ b/src/hotspot/share/utilities/nativeCallStack.cpp
+@@ -38,7 +38,7 @@ NativeCallStack::NativeCallStack(int toSkip, bool fillStack) :
+     // to call os::get_native_stack. A tail call is used if _NMT_NOINLINE_ is not defined
+     // (which means this is not a slowdebug build), and we are on 64-bit (except Windows).
+     // This is not necessarily a rule, but what has been obvserved to date.
+-#if (defined(_NMT_NOINLINE_) || defined(_WINDOWS) || !defined(_LP64) || (defined(BSD) && defined (__aarch64__)))
++#if (defined(_NMT_NOINLINE_) || defined(_WINDOWS) || !defined(_LP64) || (defined(BSD) && defined (__aarch64__)) || defined(SW64))
+     // Not a tail call.
+     toSkip++;
+ #if (defined(_NMT_NOINLINE_) && defined(BSD) && defined(_LP64))
+diff --git a/src/java.base/share/classes/sun/security/rsa/RSACore.java b/src/java.base/share/classes/sun/security/rsa/RSACore.java
+index 33534c87da..6d4483033b 100644
+--- a/src/java.base/share/classes/sun/security/rsa/RSACore.java
++++ b/src/java.base/share/classes/sun/security/rsa/RSACore.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2003, 2020, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2003, 2024, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -25,20 +25,25 @@
+ 
+ package sun.security.rsa;
+ 
+-import java.math.BigInteger;
+-import java.util.*;
+-
+-import java.security.SecureRandom;
+-import java.security.interfaces.*;
++import sun.security.jca.JCAUtil;
+ 
+ import javax.crypto.BadPaddingException;
+-
+-import sun.security.jca.JCAUtil;
++import java.math.BigInteger;
++import java.security.SecureRandom;
++import java.security.interfaces.RSAKey;
++import java.security.interfaces.RSAPrivateCrtKey;
++import java.security.interfaces.RSAPrivateKey;
++import java.security.interfaces.RSAPublicKey;
++import java.util.Arrays;
++import java.util.Map;
++import java.util.WeakHashMap;
++import java.util.concurrent.ConcurrentLinkedQueue;
++import java.util.concurrent.locks.ReentrantLock;
+ 
+ /**
+  * Core of the RSA implementation. Has code to perform public and private key
+  * RSA operations (with and without CRT for private key ops). Private CRT ops
+- * also support blinding to twart timing attacks.
++ * also support blinding to thwart timing attacks.
+  *
+  * The code in this class only does the core RSA operation. Padding and
+  * unpadding must be done externally.
+@@ -53,11 +58,14 @@ public final class RSACore {
+     // globally enable/disable use of blinding
+     private static final boolean ENABLE_BLINDING = true;
+ 
+-    // cache for blinding parameters. Map<BigInteger, BlindingParameters>
+-    // use a weak hashmap so that cached values are automatically cleared
+-    // when the modulus is GC'ed
+-    private static final Map<BigInteger, BlindingParameters>
++    // cache for blinding parameters. Map<BigInteger,
++    // ConcurrentLinkedQueue<BlindingParameters>> use a weak hashmap so that,
++    // cached values are automatically cleared when the modulus is GC'ed.
++    // Multiple BlindingParameters can be queued during times of heavy load,
++    // like performance testing.
++    private static final Map<BigInteger, ConcurrentLinkedQueue<BlindingParameters>>
+                 blindingCache = new WeakHashMap<>();
++    private static final ReentrantLock lock = new ReentrantLock();
+ 
+     private RSACore() {
+         // empty
+@@ -402,56 +410,68 @@ public final class RSACore {
+             if ((this.e != null && this.e.equals(e)) ||
+                 (this.d != null && this.d.equals(d))) {
+ 
+-                BlindingRandomPair brp = null;
+-                synchronized (this) {
+-                    if (!u.equals(BigInteger.ZERO) &&
+-                        !v.equals(BigInteger.ZERO)) {
+-
+-                        brp = new BlindingRandomPair(u, v);
+-                        if (u.compareTo(BigInteger.ONE) <= 0 ||
+-                            v.compareTo(BigInteger.ONE) <= 0) {
+-
+-                            // need to reset the random pair next time
+-                            u = BigInteger.ZERO;
+-                            v = BigInteger.ZERO;
+-                        } else {
+-                            u = u.modPow(BIG_TWO, n);
+-                            v = v.modPow(BIG_TWO, n);
+-                        }
+-                    } // Otherwise, need to reset the random pair.
++                BlindingRandomPair brp = new BlindingRandomPair(u, v);
++                if (u.compareTo(BigInteger.ONE) <= 0 ||
++                    v.compareTo(BigInteger.ONE) <= 0) {
++                    // Reset so the parameters will be not queued later
++                    u = BigInteger.ZERO;
++                    v = BigInteger.ZERO;
++                } else {
++                    u = u.modPow(BIG_TWO, n);
++                    v = v.modPow(BIG_TWO, n);
+                 }
++
+                 return brp;
+             }
+ 
+             return null;
+         }
++
++        // Check if reusable, return true if both u & v are not zero.
++        boolean isReusable() {
++            return !u.equals(BigInteger.ZERO) && !v.equals(BigInteger.ZERO);
++        }
+     }
+ 
+     private static BlindingRandomPair getBlindingRandomPair(
+             BigInteger e, BigInteger d, BigInteger n) {
+ 
+-        BlindingParameters bps = null;
+-        synchronized (blindingCache) {
+-            bps = blindingCache.get(n);
++        ConcurrentLinkedQueue<BlindingParameters> queue;
++
++        // Get queue from map, if there is none then create one
++        lock.lock();
++        try {
++            queue = blindingCache.computeIfAbsent(n,
++                ignored -> new ConcurrentLinkedQueue<>());
++        } finally {
++            lock.unlock();
+         }
+ 
++        BlindingParameters bps = queue.poll();
+         if (bps == null) {
+             bps = new BlindingParameters(e, d, n);
+-            synchronized (blindingCache) {
+-                blindingCache.putIfAbsent(n, bps);
+-            }
+         }
+ 
+-        BlindingRandomPair brp = bps.getBlindingRandomPair(e, d, n);
+-        if (brp == null) {
+-            // need to reset the blinding parameters
+-            bps = new BlindingParameters(e, d, n);
+-            synchronized (blindingCache) {
+-                blindingCache.replace(n, bps);
+-            }
++        BlindingRandomPair brp = null;
++
++        // Loops to get a valid pair, going through the queue or create a new
++        // parameters if needed.
++        while (brp == null) {
+             brp = bps.getBlindingRandomPair(e, d, n);
++            if (brp == null) {
++                // need to reset the blinding parameters, first check for
++                // another in the queue.
++                bps = queue.poll();
++                if (bps == null) {
++                    bps = new BlindingParameters(e, d, n);
++                }
++            }
+         }
+ 
++        // If this parameters are still usable, put them back into the queue.
++        if (bps.isReusable()) {
++            queue.add(bps);
++        }
+         return brp;
+     }
+ 
+diff --git a/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/sw64/SW64ELFMacroAssembler.java b/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/sw64/SW64ELFMacroAssembler.java
+new file mode 100644
+index 0000000000..53e73afc8e
+--- /dev/null
++++ b/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/sw64/SW64ELFMacroAssembler.java
+@@ -0,0 +1,129 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++package jdk.tools.jaotc.sw64;
++
++import jdk.tools.jaotc.StubInformation;
++import jdk.tools.jaotc.ELFMacroAssembler;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++
++
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.Register;
++
++import static jdk.vm.ci.sw64.SW64.*;
++
++public final class SW64ELFMacroAssembler extends SW64MacroAssembler implements ELFMacroAssembler {
++
++    private int currentEndOfInstruction;
++
++    public SW64ELFMacroAssembler(TargetDescription target) {
++        super(target);
++    }
++
++    @Override
++    public int currentEndOfInstruction() {
++        return currentEndOfInstruction;
++    }
++
++    @Override
++    public byte[] getPLTJumpCode() {
++        // The main dispatch instruction
++        addressOf(r16);
++        ldr(64, r16, SW64Address.createBaseRegisterOnlyAddress(r16));
++        jmp(r16);
++
++        currentEndOfInstruction = position();
++
++        align(8);
++
++        return close(true);
++    }
++
++    @Override
++    public byte[] getPLTStaticEntryCode(StubInformation stub) {
++        // The main dispatch instruction
++        addressOf(r16);
++        ldr(64, r16, SW64Address.createBaseRegisterOnlyAddress(r16));
++        jmp(r16);
++        stub.setDispatchJumpOffset(position());
++
++        // C2I stub used to call interpreter.  First load r12
++        // (i.e. rmethod) with a pointer to the Method structure ...
++        addressOf(r12);
++        ldr(64, r12, SW64Address.createBaseRegisterOnlyAddress(r12));
++        nop();
++        stub.setMovOffset(position());
++
++        // ... then jump to the interpreter.
++        addressOf(r16);
++        ldr(64, r16, SW64Address.createBaseRegisterOnlyAddress(r16));
++        jmp(r16);
++        stub.setC2IJumpOffset(position());
++
++        // Call to VM runtime to resolve the call.
++        stub.setResolveJumpStart(position());
++        addressOf(r16);
++        ldr(64, r16, SW64Address.createBaseRegisterOnlyAddress(r16));
++        jmp(r16);
++        stub.setResolveJumpOffset(position());
++        currentEndOfInstruction = position();
++
++        align(8);
++        stub.setSize(position());
++
++        return close(true);
++    }
++
++    @Override
++    public byte[] getPLTVirtualEntryCode(StubInformation stub) {
++        // Fixup an inline cache.
++        // Load r9 with a pointer to the Klass.
++        addressOf(r17);
++        ldr(64, r9, SW64Address.createBaseRegisterOnlyAddress(r17));
++        nop();
++        stub.setMovOffset(position());
++
++        // Jump to the method.
++        addressOf(r16);
++        ldr(64, r16, SW64Address.createBaseRegisterOnlyAddress(r16));
++        jmp(r16);
++        stub.setDispatchJumpOffset(position());
++
++        // Call to VM runtime to resolve the call.
++        stub.setResolveJumpStart(position());
++        addressOf(r16);
++        ldr(64, r16, SW64Address.createBaseRegisterOnlyAddress(r16));
++        jmp(r16);
++        stub.setResolveJumpOffset(position());
++        currentEndOfInstruction = position();
++
++        align(8);
++        stub.setSize(position());
++
++        return close(true);
++    }
++}
+diff --git a/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/sw64/SW64InstructionDecoder.java b/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/sw64/SW64InstructionDecoder.java
+new file mode 100644
+index 0000000000..8627484a32
+--- /dev/null
++++ b/src/jdk.aot/share/classes/jdk.tools.jaotc/src/jdk/tools/jaotc/sw64/SW64InstructionDecoder.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++package jdk.tools.jaotc.sw64;
++
++import jdk.tools.jaotc.InstructionDecoder;
++
++import jdk.vm.ci.code.TargetDescription;
++
++public final class SW64InstructionDecoder extends InstructionDecoder {
++
++    private int currentEndOfInstruction;
++
++    public SW64InstructionDecoder(TargetDescription target) {
++    }
++
++    @Override
++    public int currentEndOfInstruction() {
++        return currentEndOfInstruction;
++    }
++
++    @Override
++    public void decodePosition(final byte[] code, int pcOffset) {
++        currentEndOfInstruction = pcOffset + 4;
++    }
++}
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+index 0d834302c5..7aaae03ac6 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/LinuxDebuggerLocal.c
+@@ -58,6 +58,10 @@
+ #include "sun_jvm_hotspot_debugger_aarch64_AARCH64ThreadContext.h"
+ #endif
+ 
++#ifdef sw64
++#include "sun_jvm_hotspot_debugger_sw64_SW64ThreadContext.h"
++#endif
++
+ static jfieldID p_ps_prochandle_ID = 0;
+ static jfieldID threadList_ID = 0;
+ static jfieldID loadObjectList_ID = 0;
+@@ -397,7 +401,7 @@ JNIEXPORT jbyteArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+   return (err == PS_OK)? array : 0;
+ }
+ 
+-#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64)
++#if defined(i386) || defined(amd64) || defined(sparc) || defined(sparcv9) | defined(ppc64) || defined(ppc64le) || defined(aarch64) || defined(sw64)
+ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLocal_getThreadIntegerRegisterSet0
+   (JNIEnv *env, jobject this_obj, jint lwp_id) {
+ 
+@@ -428,7 +432,9 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ #if defined(ppc64) || defined(ppc64le)
+ #define NPRGREG sun_jvm_hotspot_debugger_ppc64_PPC64ThreadContext_NPRGREG
+ #endif
+-
++#ifdef sw64
++#define NPRGREG sun_jvm_hotspot_debugger_sw64_SW64ThreadContext_NPRGREG
++#endif
+ 
+   array = (*env)->NewLongArray(env, NPRGREG);
+   CHECK_EXCEPTION_(0);
+@@ -574,6 +580,20 @@ JNIEXPORT jlongArray JNICALL Java_sun_jvm_hotspot_debugger_linux_LinuxDebuggerLo
+ 
+ #endif
+ 
++#ifdef sw64
++
++#define REG_INDEX(reg) sun_jvm_hotspot_debugger_sw64_SW64ThreadContext_##reg
++
++  {
++    int i;
++    for (i = 0; i < 31; i++)
++      regs[i] = gregs.regs[i];
++    regs[REG_INDEX(PC)] = gregs.pc;
++    regs[REG_INDEX(PSTATE)] = gregs.pstate;
++  }
++
++#endif /* sw64 */
++
+   (*env)->ReleaseLongArrayElements(env, array, regs, JNI_COMMIT);
+   return array;
+ }
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+index 8318e8e021..399299aec9 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/libproc.h
+@@ -44,6 +44,10 @@
+ #include <asm/ptrace.h>
+ #define user_regs_struct  pt_regs
+ #endif
++#if defined(sw64)
++#include <asm/ptrace.h>
++#define user_regs_struct  user_pt_regs
++#endif
+ 
+ // This C bool type must be int for compatibility with Linux calls and
+ // it would be a mistake to equivalence it to C++ bool on many platforms
+diff --git a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+index c22b5d1cb3..8b817b4980 100644
+--- a/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
++++ b/src/jdk.hotspot.agent/linux/native/libsaproc/ps_proc.c
+@@ -141,6 +141,9 @@ static bool process_get_lwp_regs(struct ps_prochandle* ph, pid_t pid, struct use
+ #elif defined(PT_GETREGS)
+ #define PTRACE_GETREGS_REQ PT_GETREGS
+ #endif
++#if defined(sw64)
++    #undef PTRACE_GETREGS_REQ
++#endif
+ 
+ #if defined(PTRACE_GETREGSET)
+  struct iovec iov;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java
+index 30811504b8..5c8b1e03d2 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HSDB.java
+@@ -997,7 +997,7 @@ public class HSDB implements ObjectHistogramPanel.Listener, SAListener {
+                                                      curFrame.getFP(),
+                                                      anno));
+             } else {
+-              // For C2, which has null frame pointers on x86/amd64/aarch64
++              // For C2, which has null frame pointers on x86/amd64/aarch64/sw64
+               CodeBlob cb = VM.getVM().getCodeCache().findBlob(curFrame.getPC());
+               Address sp = curFrame.getSP();
+               if (Assert.ASSERTS_ENABLED) {
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+index 0f5f0119c7..63037a3894 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/HotSpotAgent.java
+@@ -36,6 +36,7 @@ import sun.jvm.hotspot.debugger.MachineDescription;
+ import sun.jvm.hotspot.debugger.MachineDescriptionAMD64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionPPC64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionAArch64;
++import sun.jvm.hotspot.debugger.MachineDescriptionSW64;
+ import sun.jvm.hotspot.debugger.MachineDescriptionIntelX86;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC32Bit;
+ import sun.jvm.hotspot.debugger.MachineDescriptionSPARC64Bit;
+@@ -592,6 +593,8 @@ public class HotSpotAgent {
+             machDesc = new MachineDescriptionPPC64();
+         } else if (cpu.equals("aarch64")) {
+             machDesc = new MachineDescriptionAArch64();
++        } else if (cpu.equals("sw64")) {
++            machDesc = new MachineDescriptionSW64();
+         } else if (cpu.equals("sparc")) {
+             if (LinuxDebuggerLocal.getAddressSize()==8) {
+                     machDesc = new MachineDescriptionSPARC64Bit();
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionSW64.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionSW64.java
+new file mode 100644
+index 0000000000..5e5448340f
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/MachineDescriptionSW64.java
+@@ -0,0 +1,39 @@
++/*
++ * Copyright (c) 2024, 2014, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger;
++
++public class MachineDescriptionSW64 extends MachineDescriptionTwosComplement implements MachineDescription {
++  public long getAddressSize() {
++    return 8;
++  }
++
++  public boolean isLP64() {
++    return true;
++  }
++
++  public boolean isBigEndian() {
++    return false;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+index 5e5a6bb714..0de3e2a69f 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/LinuxCDebugger.java
+@@ -35,11 +35,13 @@ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
++import sun.jvm.hotspot.debugger.sw64.*;
+ import sun.jvm.hotspot.debugger.linux.x86.*;
+ import sun.jvm.hotspot.debugger.linux.amd64.*;
+ import sun.jvm.hotspot.debugger.linux.sparc.*;
+ import sun.jvm.hotspot.debugger.linux.ppc64.*;
+ import sun.jvm.hotspot.debugger.linux.aarch64.*;
++import sun.jvm.hotspot.debugger.linux.sw64.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+ class LinuxCDebugger implements CDebugger {
+@@ -116,6 +118,13 @@ class LinuxCDebugger implements CDebugger {
+        Address pc  = context.getRegisterAsAddress(AARCH64ThreadContext.PC);
+        if (pc == null) return null;
+        return new LinuxAARCH64CFrame(dbg, fp, pc);
++    } else if (cpu.equals("sw64")) {
++       SW64ThreadContext context = (SW64ThreadContext) thread.getContext();
++       Address fp = context.getRegisterAsAddress(SW64ThreadContext.FP);
++       if (fp == null) return null;
++       Address pc  = context.getRegisterAsAddress(SW64ThreadContext.PC);
++       if (pc == null) return null;
++       return new LinuxSW64CFrame(dbg, fp, pc);
+      } else {
+        // Runtime exception thrown by LinuxThreadContextFactory if unknown cpu
+        ThreadContext context = (ThreadContext) thread.getContext();
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64CFrame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64CFrame.java
+new file mode 100644
+index 0000000000..c629e4207c
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64CFrame.java
+@@ -0,0 +1,86 @@
++/*
++ * Copyright (c) 2003, 2013, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++import sun.jvm.hotspot.debugger.cdbg.basic.*;
++
++final public class LinuxSW64CFrame extends BasicCFrame {
++   public LinuxSW64CFrame(LinuxDebugger dbg, Address fp, Address pc) {
++      super(dbg.getCDebugger());
++      this.fp = fp;
++      this.pc = pc;
++      this.dbg = dbg;
++   }
++
++   // override base class impl to avoid ELF parsing
++   public ClosestSymbol closestSymbolToPC() {
++      // try native lookup in debugger.
++      return dbg.lookup(dbg.getAddressValue(pc()));
++   }
++
++   public Address pc() {
++      return pc;
++   }
++
++   public Address localVariableBase() {
++      return fp;
++   }
++
++   public CFrame sender(ThreadProxy thread) {
++      SW64ThreadContext context = (SW64ThreadContext) thread.getContext();
++      Address rsp = context.getRegisterAsAddress(SW64ThreadContext.SP);
++
++      if ((fp == null) || fp.lessThan(rsp)) {
++        return null;
++      }
++
++      // Check alignment of fp
++      if (dbg.getAddressValue(fp) % (2 * ADDRESS_SIZE) != 0) {
++        return null;
++      }
++
++      Address nextFP = fp.getAddressAt(0 * ADDRESS_SIZE);
++      if (nextFP == null || nextFP.lessThanOrEqual(fp)) {
++        return null;
++      }
++      Address nextPC  = fp.getAddressAt(1 * ADDRESS_SIZE);
++      if (nextPC == null) {
++        return null;
++      }
++      return new LinuxSW64CFrame(dbg, nextFP, nextPC);
++   }
++
++   // package/class internals only
++   private static final int ADDRESS_SIZE = 8;
++   private Address pc;
++   private Address sp;
++   private Address fp;
++   private LinuxDebugger dbg;
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64ThreadContext.java
+new file mode 100644
+index 0000000000..842e294cac
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/linux/sw64/LinuxSW64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.linux.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.debugger.linux.*;
++
++public class LinuxSW64ThreadContext extends SW64ThreadContext {
++  private LinuxDebugger debugger;
++
++  public LinuxSW64ThreadContext(LinuxDebugger debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+index 74e957d94b..c34d7e359f 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/ProcDebuggerLocal.java
+@@ -34,11 +34,13 @@ import sun.jvm.hotspot.debugger.proc.amd64.*;
+ import sun.jvm.hotspot.debugger.proc.aarch64.*;
+ import sun.jvm.hotspot.debugger.proc.sparc.*;
+ import sun.jvm.hotspot.debugger.proc.ppc64.*;
++import sun.jvm.hotspot.debugger.proc.sw64.*;
+ import sun.jvm.hotspot.debugger.proc.x86.*;
+ import sun.jvm.hotspot.debugger.ppc64.*;
+ import sun.jvm.hotspot.debugger.amd64.*;
+ import sun.jvm.hotspot.debugger.aarch64.*;
+ import sun.jvm.hotspot.debugger.sparc.*;
++import sun.jvm.hotspot.debugger.sw64.*;
+ import sun.jvm.hotspot.debugger.x86.*;
+ import sun.jvm.hotspot.utilities.*;
+ 
+@@ -90,6 +92,10 @@ public class ProcDebuggerLocal extends DebuggerBase implements ProcDebugger {
+             threadFactory = new ProcAMD64ThreadFactory(this);
+             pcRegIndex = AMD64ThreadContext.RIP;
+             fpRegIndex = AMD64ThreadContext.RBP;
++        } else if (cpu.equals("sw64") || cpu.equals("sw_64")) {
++            threadFactory = new ProcSW64ThreadFactory(this);
++            pcRegIndex = SW64ThreadContext.PC;
++            fpRegIndex = SW64ThreadContext.FP;
+         } else if (cpu.equals("aarch64")) {
+             threadFactory = new ProcAARCH64ThreadFactory(this);
+             pcRegIndex = AARCH64ThreadContext.PC;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64Thread.java
+new file mode 100644
+index 0000000000..a5d6ddfeba
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64Thread.java
+@@ -0,0 +1,87 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class ProcSW64Thread implements ThreadProxy {
++    private ProcDebugger debugger;
++    private int         id;
++
++    public ProcSW64Thread(ProcDebugger debugger, Address addr) {
++        this.debugger = debugger;
++
++        // FIXME: the size here should be configurable. However, making it
++        // so would produce a dependency on the "types" package from the
++        // debugger package, which is not desired.
++        this.id       = (int) addr.getCIntegerAt(0, 4, true);
++    }
++
++    public ProcSW64Thread(ProcDebugger debugger, long id) {
++        this.debugger = debugger;
++        this.id = (int) id;
++    }
++
++    public ThreadContext getContext() throws IllegalThreadStateException {
++        ProcSW64ThreadContext context = new ProcSW64ThreadContext(debugger);
++        long[] regs = debugger.getThreadIntegerRegisterSet(id);
++        if (Assert.ASSERTS_ENABLED) {
++            Assert.that(regs.length == SW64ThreadContext.NPRGREG, "size mismatch");
++        }
++        for (int i = 0; i < regs.length; i++) {
++            context.setRegister(i, regs[i]);
++        }
++        return context;
++    }
++
++    public boolean canSetContext() throws DebuggerException {
++        return false;
++    }
++
++    public void setContext(ThreadContext context)
++    throws IllegalThreadStateException, DebuggerException {
++        throw new DebuggerException("Unimplemented");
++    }
++
++    public String toString() {
++        return "t@" + id;
++    }
++
++    public boolean equals(Object obj) {
++        if ((obj == null) || !(obj instanceof ProcSW64Thread)) {
++            return false;
++        }
++
++        return (((ProcSW64Thread) obj).id == id);
++    }
++
++    public int hashCode() {
++        return id;
++    }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadContext.java
+new file mode 100644
+index 0000000000..f8def11beb
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcSW64ThreadContext extends SW64ThreadContext {
++    private ProcDebugger debugger;
++
++    public ProcSW64ThreadContext(ProcDebugger debugger) {
++        super();
++        this.debugger = debugger;
++    }
++
++    public void setRegisterAsAddress(int index, Address value) {
++        setRegister(index, debugger.getAddressValue(value));
++    }
++
++    public Address getRegisterAsAddress(int index) {
++        return debugger.newAddress(getRegister(index));
++    }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadFactory.java
+new file mode 100644
+index 0000000000..9908d21dbe
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/proc/sw64/ProcSW64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.proc.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.proc.*;
++
++public class ProcSW64ThreadFactory implements ProcThreadFactory {
++    private ProcDebugger debugger;
++
++    public ProcSW64ThreadFactory(ProcDebugger debugger) {
++        this.debugger = debugger;
++    }
++
++    public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++        return new ProcSW64Thread(debugger, threadIdentifierAddr);
++    }
++
++    public ThreadProxy createThreadWrapper(long id) {
++        return new ProcSW64Thread(debugger, id);
++    }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+index b6253f6d63..89b64cdc30 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/RemoteDebuggerClient.java
+@@ -31,6 +31,7 @@ import java.lang.reflect.*;
+ import sun.jvm.hotspot.debugger.*;
+ import sun.jvm.hotspot.debugger.cdbg.*;
+ import sun.jvm.hotspot.debugger.remote.sparc.*;
++import sun.jvm.hotspot.debugger.remote.sw64.*;
+ import sun.jvm.hotspot.debugger.remote.x86.*;
+ import sun.jvm.hotspot.debugger.remote.amd64.*;
+ import sun.jvm.hotspot.debugger.remote.ppc64.*;
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64Thread.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64Thread.java
+new file mode 100644
+index 0000000000..a0f1e6fe72
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64Thread.java
+@@ -0,0 +1,54 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class RemoteSW64Thread extends RemoteThread  {
++  public RemoteSW64Thread(RemoteDebuggerClient debugger, Address addr) {
++     super(debugger, addr);
++  }
++
++  public RemoteSW64Thread(RemoteDebuggerClient debugger, long id) {
++     super(debugger, id);
++  }
++
++  public ThreadContext getContext() throws IllegalThreadStateException {
++    RemoteSW64ThreadContext context = new RemoteSW64ThreadContext(debugger);
++    long[] regs = (addr != null)? debugger.getThreadIntegerRegisterSet(addr) :
++                                  debugger.getThreadIntegerRegisterSet(id);
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(regs.length == SW64ThreadContext.NPRGREG, "size of register set must match");
++    }
++    for (int i = 0; i < regs.length; i++) {
++      context.setRegister(i, regs[i]);
++    }
++    return context;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadContext.java
+new file mode 100644
+index 0000000000..fcaf8d2597
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadContext.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteSW64ThreadContext extends SW64ThreadContext {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteSW64ThreadContext(RemoteDebuggerClient debugger) {
++    super();
++    this.debugger = debugger;
++  }
++
++  public void setRegisterAsAddress(int index, Address value) {
++    setRegister(index, debugger.getAddressValue(value));
++  }
++
++  public Address getRegisterAsAddress(int index) {
++    return debugger.newAddress(getRegister(index));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadFactory.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadFactory.java
+new file mode 100644
+index 0000000000..618764c882
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/remote/sw64/RemoteSW64ThreadFactory.java
+@@ -0,0 +1,45 @@
++/*
++ * Copyright (c) 2004, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.remote.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.remote.*;
++
++public class RemoteSW64ThreadFactory implements RemoteThreadFactory {
++  private RemoteDebuggerClient debugger;
++
++  public RemoteSW64ThreadFactory(RemoteDebuggerClient debugger) {
++    this.debugger = debugger;
++  }
++
++  public ThreadProxy createThreadWrapper(Address threadIdentifierAddr) {
++    return new RemoteSW64Thread(debugger, threadIdentifierAddr);
++  }
++
++  public ThreadProxy createThreadWrapper(long id) {
++    return new RemoteSW64Thread(debugger, id);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/sw64/SW64ThreadContext.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/sw64/SW64ThreadContext.java
+new file mode 100644
+index 0000000000..7b2c201e0e
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/debugger/sw64/SW64ThreadContext.java
+@@ -0,0 +1,143 @@
++/*
++ * Copyright (c) 2003, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.debugger.sw64;
++
++import java.lang.annotation.Native;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.cdbg.*;
++
++/** Specifies the thread context on sw64 platforms; only a sub-portion
++ * of the context is guaranteed to be present on all operating
++ * systems. */
++
++public abstract class SW64ThreadContext implements ThreadContext {
++    // Taken from /usr/include/asm/sigcontext.h on Linux/SW64.
++
++    //  /*
++    //   * Signal context structure - contains all info to do with the state
++    //   * before the signal handler was invoked.
++    //   */
++    // struct sigcontext {
++    //         __u64 fault_address;
++    //         /* sw64 registers */
++    //          __u64 regs[31];
++    //          __u64 pc;
++    //          __u64 pstate;
++    //          /* 4K reserved for FP/SIMD state and future expansion */
++    //          __u8 __reserved[4096] __attribute__((__aligned__(16)));
++    //  };
++
++    // NOTE: the indices for the various registers must be maintained as
++    // listed across various operating systems. However, only a small
++    // subset of the registers' values are guaranteed to be present (and
++    // must be present for the SA's stack walking to work)
++
++    // One instance of the Native annotation is enough to trigger header generation
++    // for this file.
++    @Native
++    public static final int V0  = 0;
++    public static final int T0  = 1;
++    public static final int T1  = 2;
++    public static final int T2  = 3;
++    public static final int T3  = 4;
++    public static final int T4  = 5;
++    public static final int T5  = 6;
++    public static final int T6  = 7;
++    public static final int T7  = 8;
++    public static final int S0  = 9;
++    public static final int S1  = 10;
++    public static final int S2  = 11;
++    public static final int S3  = 12;
++    public static final int S4  = 13;
++    public static final int S5  = 14;
++    public static final int FP  = 15;
++    public static final int A0  = 16;
++    public static final int A1  = 17;
++    public static final int A2  = 18;
++    public static final int A3  = 19;
++    public static final int A4  = 20;
++    public static final int A5  = 21;
++    public static final int T8  = 22;
++    public static final int T9  = 23;
++    public static final int T10 = 24;
++    public static final int T11 = 25;
++    public static final int RA  = 26;
++    public static final int T12 = 27;
++    public static final int AT  = 28;
++    public static final int GP  = 29;
++    public static final int SP  = 30;
++    public static final int PC  = 31;
++    public static final int PSTATE  = 32;
++
++    public static final int NPRGREG = 33;
++
++    private static final String[] regNames = {
++        "V0",  "T0",  "T1",  "T2",
++        "T3",  "T4",  "T5",  "T6",
++        "T7",  "S0",  "S1",  "S2",
++        "S3",  "S4",  "S5",  "FP",
++        "A0",  "A1",  "A2",  "A3",
++        "A4",  "A5",  "T8",  "T9",
++        "T10", "T11", "RA",  "T12",
++        "AT",  "GP",  "SP",  "PC",
++        "PSTATE",
++    };
++
++    private long[] data;
++
++    public SW64ThreadContext() {
++        data = new long[NPRGREG];
++    }
++
++    public int getNumRegisters() {
++        return NPRGREG;
++    }
++
++    public String getRegisterName(int index) {
++        return regNames[index];
++    }
++
++    public void setRegister(int index, long value) {
++        data[index] = value;
++    }
++
++    public long getRegister(int index) {
++        return data[index];
++    }
++
++    public CFrame getTopFrame(Debugger dbg) {
++        return null;
++    }
++
++    /** This can't be implemented in this class since we would have to
++     * tie the implementation to, for example, the debugging system */
++    public abstract void setRegisterAsAddress(int index, Address value);
++
++    /** This can't be implemented in this class since we would have to
++     * tie the implementation to, for example, the debugging system */
++    public abstract Address getRegisterAsAddress(int index);
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+index 190062785a..af615a571b 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/Threads.java
+@@ -40,6 +40,7 @@ import sun.jvm.hotspot.runtime.linux_amd64.LinuxAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_aarch64.LinuxAARCH64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_ppc64.LinuxPPC64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.linux_sparc.LinuxSPARCJavaThreadPDAccess;
++import sun.jvm.hotspot.runtime.linux_sw64.LinuxSW64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_x86.BsdX86JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_amd64.BsdAMD64JavaThreadPDAccess;
+ import sun.jvm.hotspot.runtime.bsd_aarch64.BsdAARCH64JavaThreadPDAccess;
+@@ -99,6 +100,8 @@ public class Threads {
+                 access = new LinuxPPC64JavaThreadPDAccess();
+             } else if (cpu.equals("aarch64")) {
+                 access = new LinuxAARCH64JavaThreadPDAccess();
++            } else if (cpu.equals("sw64")) {
++                access = new LinuxSW64JavaThreadPDAccess();
+             } else {
+               try {
+                 access = (JavaThreadPDAccess)
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_sw64/LinuxSW64JavaThreadPDAccess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_sw64/LinuxSW64JavaThreadPDAccess.java
+new file mode 100644
+index 0000000000..31dba70845
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/linux_sw64/LinuxSW64JavaThreadPDAccess.java
+@@ -0,0 +1,132 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.linux_sw64;
++
++import java.io.*;
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.sw64.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++public class LinuxSW64JavaThreadPDAccess implements JavaThreadPDAccess {
++  private static AddressField  lastJavaFPField;
++  private static AddressField  osThreadField;
++
++  // Field from OSThread
++  private static CIntegerField osThreadThreadIDField;
++
++  // This is currently unneeded but is being kept in case we change
++  // the currentFrameGuess algorithm
++  private static final long GUESS_SCAN_RANGE = 128 * 1024;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaThread");
++    osThreadField           = type.getAddressField("_osthread");
++
++    Type anchorType = db.lookupType("JavaFrameAnchor");
++    lastJavaFPField         = anchorType.getAddressField("_last_Java_fp");
++
++    Type osThreadType = db.lookupType("OSThread");
++    osThreadThreadIDField   = osThreadType.getCIntegerField("_thread_id");
++  }
++
++  public Address getLastJavaFP(Address addr) {
++    return lastJavaFPField.getValue(addr.addOffsetTo(sun.jvm.hotspot.runtime.JavaThread.getAnchorField().getOffset()));
++  }
++
++  public Address getLastJavaPC(Address addr) {
++    return null;
++  }
++
++  public Address getBaseOfStackPointer(Address addr) {
++    return null;
++  }
++
++  public Frame getLastFramePD(JavaThread thread, Address addr) {
++    Address fp = thread.getLastJavaFP();
++    if (fp == null) {
++      return null; // no information
++    }
++    return new SW64Frame(thread.getLastJavaSP(), fp);
++  }
++
++  public RegisterMap newRegisterMap(JavaThread thread, boolean updateMap) {
++    return new SW64RegisterMap(thread, updateMap);
++  }
++
++  public Frame getCurrentFrameGuess(JavaThread thread, Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    SW64ThreadContext context = (SW64ThreadContext) t.getContext();
++    SW64CurrentFrameGuess guesser = new SW64CurrentFrameGuess(context, thread);
++    if (!guesser.run(GUESS_SCAN_RANGE)) {
++      return null;
++    }
++    if (guesser.getPC() == null) {
++      return new SW64Frame(guesser.getSP(), guesser.getFP());
++    } else {
++      return new SW64Frame(guesser.getSP(), guesser.getFP(), guesser.getPC());
++    }
++  }
++
++  public void printThreadIDOn(Address addr, PrintStream tty) {
++    tty.print(getThreadProxy(addr));
++  }
++
++  public void printInfoOn(Address threadAddr, PrintStream tty) {
++    tty.print("Thread id: ");
++    printThreadIDOn(threadAddr, tty);
++//    tty.println("\nPostJavaState: " + getPostJavaState(threadAddr));
++  }
++
++  public Address getLastSP(Address addr) {
++    ThreadProxy t = getThreadProxy(addr);
++    SW64ThreadContext context = (SW64ThreadContext) t.getContext();
++    return context.getRegisterAsAddress(SW64ThreadContext.SP);
++  }
++
++  public ThreadProxy getThreadProxy(Address addr) {
++    // Addr is the address of the JavaThread.
++    // Fetch the OSThread (for now and for simplicity, not making a
++    // separate "OSThread" class in this package)
++    Address osThreadAddr = osThreadField.getValue(addr);
++    // Get the address of the _thread_id from the OSThread
++    Address threadIdAddr = osThreadAddr.addOffsetTo(osThreadThreadIDField.getOffset());
++
++    JVMDebugger debugger = VM.getVM().getDebugger();
++    return debugger.getThreadForIdentifierAddress(threadIdAddr);
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64CurrentFrameGuess.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64CurrentFrameGuess.java
+new file mode 100644
+index 0000000000..c340a41e82
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64CurrentFrameGuess.java
+@@ -0,0 +1,250 @@
++/*
++ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.debugger.sw64.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.interpreter.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.runtime.sw64.*;
++
++/** <P> Should be able to be used on all sw64 platforms we support
++    (Linux/sw64) to implement JavaThread's "currentFrameGuess()"
++    functionality. Input is an SW64ThreadContext; output is SP, FP,
++    and PC for an SW64Frame. Instantiation of the SW64Frame is
++    left to the caller, since we may need to subclass SW64Frame to
++    support signal handler frames on Unix platforms. </P>
++
++    <P> Algorithm is to walk up the stack within a given range (say,
++    512K at most) looking for a plausible PC and SP for a Java frame,
++    also considering those coming in from the context. If we find a PC
++    that belongs to the VM (i.e., in generated code like the
++    interpreter or CodeCache) then we try to find an associated FP.
++    We repeat this until we either find a complete frame or run out of
++    stack to look at. </P> */
++
++public class SW64CurrentFrameGuess {
++  private SW64ThreadContext context;
++  private JavaThread       thread;
++  private Address          spFound;
++  private Address          fpFound;
++  private Address          pcFound;
++
++  private static final boolean DEBUG = System.getProperty("sun.jvm.hotspot.runtime.sw64.SW64Frame.DEBUG")
++                                       != null;
++
++  public SW64CurrentFrameGuess(SW64ThreadContext context,
++                              JavaThread thread) {
++    this.context = context;
++    this.thread  = thread;
++  }
++
++  /** Returns false if not able to find a frame within a reasonable range. */
++  public boolean run(long regionInBytesToSearch) {
++    Address sp  = context.getRegisterAsAddress(SW64ThreadContext.SP);
++    Address pc  = context.getRegisterAsAddress(SW64ThreadContext.PC);
++    Address fp  = context.getRegisterAsAddress(SW64ThreadContext.FP);
++    if (sp == null) {
++      // Bail out if no last java frame either
++      if (thread.getLastJavaSP() != null) {
++        setValues(thread.getLastJavaSP(), thread.getLastJavaFP(), null);
++        return true;
++      }
++      return false;
++    }
++    Address end = sp.addOffsetTo(regionInBytesToSearch);
++    VM vm       = VM.getVM();
++
++    setValues(null, null, null); // Assume we're not going to find anything
++
++    if (vm.isJavaPCDbg(pc)) {
++      if (vm.isClientCompiler()) {
++        // If the topmost frame is a Java frame, we are (pretty much)
++        // guaranteed to have a viable FP. We should be more robust
++        // than this (we have the potential for losing entire threads'
++        // stack traces) but need to see how much work we really have
++        // to do here. Searching the stack for an (SP, FP) pair is
++        // hard since it's easy to misinterpret inter-frame stack
++        // pointers as base-of-frame pointers; we also don't know the
++        // sizes of C1 frames (not registered in the nmethod) so can't
++        // derive them from SP.
++
++        setValues(sp, fp, pc);
++        return true;
++      } else {
++        if (vm.getInterpreter().contains(pc)) {
++          if (DEBUG) {
++            System.out.println("CurrentFrameGuess: choosing interpreter frame: sp = " +
++                               sp + ", fp = " + fp + ", pc = " + pc);
++          }
++          setValues(sp, fp, pc);
++          return true;
++        }
++
++        // For the server compiler, FP is not guaranteed to be valid
++        // for compiled code. In addition, an earlier attempt at a
++        // non-searching algorithm (see below) failed because the
++        // stack pointer from the thread context was pointing
++        // (considerably) beyond the ostensible end of the stack, into
++        // garbage; walking from the topmost frame back caused a crash.
++        //
++        // This algorithm takes the current PC as a given and tries to
++        // find the correct corresponding SP by walking up the stack
++        // and repeatedly performing stackwalks (very inefficient).
++        //
++        // FIXME: there is something wrong with stackwalking across
++        // adapter frames...this is likely to be the root cause of the
++        // failure with the simpler algorithm below.
++
++        for (long offset = 0;
++             offset < regionInBytesToSearch;
++             offset += vm.getAddressSize()) {
++          try {
++            Address curSP = sp.addOffsetTo(offset);
++            Frame frame = new SW64Frame(curSP, null, pc);
++            RegisterMap map = thread.newRegisterMap(false);
++            while (frame != null) {
++              if (frame.isEntryFrame() && frame.entryFrameIsFirst()) {
++                // We were able to traverse all the way to the
++                // bottommost Java frame.
++                // This sp looks good. Keep it.
++                if (DEBUG) {
++                  System.out.println("CurrentFrameGuess: Choosing sp = " + curSP + ", pc = " + pc);
++                }
++                setValues(curSP, null, pc);
++                return true;
++              }
++              frame = frame.sender(map);
++            }
++          } catch (Exception e) {
++            if (DEBUG) {
++              System.out.println("CurrentFrameGuess: Exception " + e + " at offset " + offset);
++            }
++            // Bad SP. Try another.
++          }
++        }
++
++        // Were not able to find a plausible SP to go with this PC.
++        // Bail out.
++        return false;
++
++        /*
++        // Original algorithm which does not work because SP was
++        // pointing beyond where it should have:
++
++        // For the server compiler, FP is not guaranteed to be valid
++        // for compiled code. We see whether the PC is in the
++        // interpreter and take care of that, otherwise we run code
++        // (unfortunately) duplicated from SW64Frame.senderForCompiledFrame.
++
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++
++          // See if we can derive a frame pointer from SP and PC
++          // NOTE: This is the code duplicated from SW64Frame
++          Address saved_fp = null;
++          int llink_offset = cb.getLinkOffset();
++          if (llink_offset >= 0) {
++            // Restore base-pointer, since next frame might be an interpreter frame.
++            Address fp_addr = sp.addOffsetTo(VM.getVM().getAddressSize() * llink_offset);
++            saved_fp = fp_addr.getAddressAt(0);
++          }
++
++          setValues(sp, saved_fp, pc);
++          return true;
++        }
++        */
++      }
++    } else {
++      // If the current program counter was not known to us as a Java
++      // PC, we currently assume that we are in the run-time system
++      // and attempt to look to thread-local storage for saved SP and
++      // FP. Note that if these are null (because we were, in fact,
++      // in Java code, i.e., vtable stubs or similar, and the SA
++      // didn't have enough insight into the target VM to understand
++      // that) then we are going to lose the entire stack trace for
++      // the thread, which is sub-optimal. FIXME.
++
++      if (DEBUG) {
++        System.out.println("CurrentFrameGuess: choosing last Java frame: sp = " +
++                           thread.getLastJavaSP() + ", fp = " + thread.getLastJavaFP());
++      }
++      if (thread.getLastJavaSP() == null) {
++        return false; // No known Java frames on stack
++      }
++
++      // The runtime has a nasty habit of not saving fp in the frame
++      // anchor, leaving us to grovel about in the stack to find a
++      // plausible address.  Fortunately, this only happens in
++      // compiled code; there we always have a valid PC, and we always
++      // push LR and FP onto the stack as a pair, with FP at the lower
++      // address.
++      pc = thread.getLastJavaPC();
++      fp = thread.getLastJavaFP();
++      sp = thread.getLastJavaSP();
++
++      if (fp == null) {
++        CodeCache cc = vm.getCodeCache();
++        if (cc.contains(pc)) {
++          CodeBlob cb = cc.findBlob(pc);
++          if (DEBUG) {
++            System.out.println("FP is null.  Found blob frame size " + cb.getFrameSize());
++          }
++          // See if we can derive a frame pointer from SP and PC
++          long link_offset = cb.getFrameSize() - 2 * VM.getVM().getAddressSize();
++          if (link_offset >= 0) {
++            fp = sp.addOffsetTo(link_offset);
++          }
++        }
++      }
++
++      // We found a PC in the frame anchor. Check that it's plausible, and
++      // if it is, use it.
++      if (vm.isJavaPCDbg(pc)) {
++        setValues(sp, fp, pc);
++      } else {
++        setValues(sp, fp, null);
++      }
++
++      return true;
++    }
++  }
++
++  public Address getSP() { return spFound; }
++  public Address getFP() { return fpFound; }
++  /** May be null if getting values from thread-local storage; take
++      care to call the correct SW64Frame constructor to recover this if
++      necessary */
++  public Address getPC() { return pcFound; }
++
++  private void setValues(Address sp, Address fp, Address pc) {
++    spFound = sp;
++    fpFound = fp;
++    pcFound = pc;
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64Frame.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64Frame.java
+new file mode 100644
+index 0000000000..3716910df4
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64Frame.java
+@@ -0,0 +1,564 @@
++/*
++ * Copyright (c) 2001, 2019, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, 2019, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.sw64;
++
++import java.util.*;
++import sun.jvm.hotspot.code.*;
++import sun.jvm.hotspot.compiler.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.oops.*;
++import sun.jvm.hotspot.runtime.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.utilities.*;
++
++/** Specialization of and implementation of abstract methods of the
++    Frame class for the sw64 family of CPUs. */
++
++public class SW64Frame extends Frame {
++  private static final boolean DEBUG;
++  static {
++    DEBUG = System.getProperty("sun.jvm.hotspot.runtime.sw64.SW64Frame.DEBUG") != null;
++  }
++
++  // All frames
++  private static final int LINK_OFFSET                =  0;
++  private static final int RETURN_ADDR_OFFSET         =  1;
++  private static final int SENDER_SP_OFFSET           =  2;
++
++  // Interpreter frames
++  private static final int INTERPRETER_FRAME_SENDER_SP_OFFSET = -1;
++  private static final int INTERPRETER_FRAME_LAST_SP_OFFSET   = INTERPRETER_FRAME_SENDER_SP_OFFSET - 1;
++  private static final int INTERPRETER_FRAME_METHOD_OFFSET    = INTERPRETER_FRAME_LAST_SP_OFFSET - 1; // -3
++  private static       int INTERPRETER_FRAME_MIRROR_OFFSET;
++  private static       int INTERPRETER_FRAME_MDX_OFFSET;         // Non-core builds only
++  private static       int INTERPRETER_FRAME_CACHE_OFFSET;
++  private static       int INTERPRETER_FRAME_LOCALS_OFFSET;
++  private static       int INTERPRETER_FRAME_BCX_OFFSET;
++  private static       int INTERPRETER_FRAME_INITIAL_SP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET;
++  private static       int INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET;
++
++  // Entry frames
++  private static       int ENTRY_FRAME_CALL_WRAPPER_OFFSET = -6;
++
++  // Native frames
++  private static final int NATIVE_FRAME_INITIAL_PARAM_OFFSET =  2;
++
++  private static VMReg fp = new VMReg(29);
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    INTERPRETER_FRAME_MIRROR_OFFSET = INTERPRETER_FRAME_METHOD_OFFSET - 1;  // -4
++    INTERPRETER_FRAME_MDX_OFFSET = INTERPRETER_FRAME_MIRROR_OFFSET - 1;   // -5      // Non-core builds only
++    INTERPRETER_FRAME_CACHE_OFFSET = INTERPRETER_FRAME_MDX_OFFSET - 1; // -6
++    INTERPRETER_FRAME_LOCALS_OFFSET = INTERPRETER_FRAME_CACHE_OFFSET - 1; // -7
++    INTERPRETER_FRAME_BCX_OFFSET = INTERPRETER_FRAME_LOCALS_OFFSET - 1; // -8
++    INTERPRETER_FRAME_INITIAL_SP_OFFSET = INTERPRETER_FRAME_BCX_OFFSET - 1; // -9
++    INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET ; // -9
++    INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET = INTERPRETER_FRAME_INITIAL_SP_OFFSET; // -9
++  }
++
++
++  // an additional field beyond sp and pc:
++  Address raw_fp; // frame pointer
++  private Address raw_unextendedSP;
++
++  private SW64Frame() {
++  }
++
++  private void adjustForDeopt() {
++    if ( pc != null) {
++      // Look for a deopt pc and if it is deopted convert to original pc
++      CodeBlob cb = VM.getVM().getCodeCache().findBlob(pc);
++      if (cb != null && cb.isJavaMethod()) {
++        NMethod nm = (NMethod) cb;
++        if (pc.equals(nm.deoptHandlerBegin())) {
++          if (Assert.ASSERTS_ENABLED) {
++            Assert.that(this.getUnextendedSP() != null, "null SP in Java frame");
++          }
++          // adjust pc if frame is deoptimized.
++          pc = this.getUnextendedSP().getAddressAt(nm.origPCOffset());
++          deoptimized = true;
++        }
++      }
++    }
++  }
++
++  public SW64Frame(Address raw_sp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("SW64Frame(sp, fp, pc): " + this);
++      dumpStack();
++    }
++  }
++
++  public SW64Frame(Address raw_sp, Address raw_fp) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_sp;
++    this.raw_fp = raw_fp;
++
++    // We cannot assume SP[-1] always contains a valid return PC (e.g. if
++    // the callee is a C/C++ compiled frame). If the PC is not known to
++    // Java then this.pc is null.
++    Address savedPC = raw_sp.getAddressAt(-1 * VM.getVM().getAddressSize());
++    if (VM.getVM().isJavaPCDbg(savedPC)) {
++      this.pc = savedPC;
++    }
++
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("SW64Frame(sp, fp): " + this);
++      dumpStack();
++    }
++  }
++
++  public SW64Frame(Address raw_sp, Address raw_unextendedSp, Address raw_fp, Address pc) {
++    this.raw_sp = raw_sp;
++    this.raw_unextendedSP = raw_unextendedSp;
++    this.raw_fp = raw_fp;
++    this.pc = pc;
++    adjustUnextendedSP();
++
++    // Frame must be fully constructed before this call
++    adjustForDeopt();
++
++    if (DEBUG) {
++      System.out.println("SW64Frame(sp, unextendedSP, fp, pc): " + this);
++      dumpStack();
++    }
++
++  }
++
++  public Object clone() {
++    SW64Frame frame = new SW64Frame();
++    frame.raw_sp = raw_sp;
++    frame.raw_unextendedSP = raw_unextendedSP;
++    frame.raw_fp = raw_fp;
++    frame.pc = pc;
++    frame.deoptimized = deoptimized;
++    return frame;
++  }
++
++  public boolean equals(Object arg) {
++    if (arg == null) {
++      return false;
++    }
++
++    if (!(arg instanceof SW64Frame)) {
++      return false;
++    }
++
++    SW64Frame other = (SW64Frame) arg;
++
++    return (AddressOps.equal(getSP(), other.getSP()) &&
++            AddressOps.equal(getUnextendedSP(), other.getUnextendedSP()) &&
++            AddressOps.equal(getFP(), other.getFP()) &&
++            AddressOps.equal(getPC(), other.getPC()));
++  }
++
++  public int hashCode() {
++    if (raw_sp == null) {
++      return 0;
++    }
++
++    return raw_sp.hashCode();
++  }
++
++  public String toString() {
++    return "sp: " + (getSP() == null? "null" : getSP().toString()) +
++         ", unextendedSP: " + (getUnextendedSP() == null? "null" : getUnextendedSP().toString()) +
++         ", fp: " + (getFP() == null? "null" : getFP().toString()) +
++         ", pc: " + (pc == null? "null" : pc.toString());
++  }
++
++  // accessors for the instance variables
++  public Address getFP() { return raw_fp; }
++  public Address getSP() { return raw_sp; }
++  public Address getID() { return raw_sp; }
++
++  // FIXME: not implemented yet
++  public boolean isSignalHandlerFrameDbg() { return false; }
++  public int     getSignalNumberDbg()      { return 0;     }
++  public String  getSignalNameDbg()        { return null;  }
++
++  public boolean isInterpretedFrameValid() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "Not an interpreted frame");
++    }
++
++    // These are reasonable sanity checks
++    if (getFP() == null || getFP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getSP() == null || getSP().andWithMask(0x3) != null) {
++      return false;
++    }
++
++    if (getFP().addOffsetTo(INTERPRETER_FRAME_INITIAL_SP_OFFSET * VM.getVM().getAddressSize()).lessThan(getSP())) {
++      return false;
++    }
++
++    // These are hacks to keep us out of trouble.
++    // The problem with these is that they mask other problems
++    if (getFP().lessThanOrEqual(getSP())) {
++      // this attempts to deal with unsigned comparison above
++      return false;
++    }
++
++    if (getFP().minus(getSP()) > 4096 * VM.getVM().getAddressSize()) {
++      // stack frames shouldn't be large.
++      return false;
++    }
++
++    return true;
++  }
++
++  // FIXME: not applicable in current system
++  //  void    patch_pc(Thread* thread, address pc);
++
++  public Frame sender(RegisterMap regMap, CodeBlob cb) {
++    SW64RegisterMap map = (SW64RegisterMap) regMap;
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // Default is we done have to follow them. The sender_for_xxx will
++    // update it accordingly
++    map.setIncludeArgumentOops(false);
++
++    if (isEntryFrame())       return senderForEntryFrame(map);
++    if (isInterpretedFrame()) return senderForInterpreterFrame(map);
++
++    if(cb == null) {
++      cb = VM.getVM().getCodeCache().findBlob(getPC());
++    } else {
++      if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.equals(VM.getVM().getCodeCache().findBlob(getPC())), "Must be the same");
++      }
++    }
++
++    if (cb != null) {
++      return senderForCompiledFrame(map, cb);
++    }
++
++    // Must be native-compiled frame, i.e. the marshaling code for native
++    // methods that exists in the core system.
++    return new SW64Frame(getSenderSP(), getLink(), getSenderPC());
++  }
++
++  private Frame senderForEntryFrame(SW64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForEntryFrame");
++    }
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++    // Java frame called from C; skip all C frames and return top C
++    // frame of that chunk as the sender
++    SW64JavaCallWrapper jcw = (SW64JavaCallWrapper) getEntryFrameCallWrapper();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(!entryFrameIsFirst(), "next Java fp must be non zero");
++      Assert.that(jcw.getLastJavaSP().greaterThan(getSP()), "must be above this frame on stack");
++    }
++    SW64Frame fr;
++    if (jcw.getLastJavaPC() != null) {
++      fr = new SW64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP(), jcw.getLastJavaPC());
++    } else {
++      fr = new SW64Frame(jcw.getLastJavaSP(), jcw.getLastJavaFP());
++    }
++    map.clear();
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map.getIncludeArgumentOops(), "should be set by clear");
++    }
++    return fr;
++  }
++
++  //------------------------------------------------------------------------------
++  // frame::adjust_unextended_sp
++  private void adjustUnextendedSP() {
++    // If we are returning to a compiled MethodHandle call site, the
++    // saved_fp will in fact be a saved value of the unextended SP.  The
++    // simplest way to tell whether we are returning to such a call site
++    // is as follows:
++
++    CodeBlob cb = cb();
++    NMethod senderNm = (cb == null) ? null : cb.asNMethodOrNull();
++    if (senderNm != null) {
++      // If the sender PC is a deoptimization point, get the original
++      // PC.  For MethodHandle call site the unextended_sp is stored in
++      // saved_fp.
++      if (senderNm.isDeoptMhEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptMhOriginalPc(senderNm, getFP()));
++        raw_unextendedSP = getFP();
++      }
++      else if (senderNm.isDeoptEntry(getPC())) {
++        // DEBUG_ONLY(verifyDeoptOriginalPc(senderNm, raw_unextendedSp));
++      }
++      else if (senderNm.isMethodHandleReturn(getPC())) {
++        raw_unextendedSP = getFP();
++      }
++    }
++  }
++
++  private Frame senderForInterpreterFrame(SW64RegisterMap map) {
++    if (DEBUG) {
++      System.out.println("senderForInterpreterFrame");
++    }
++    Address unextendedSP = addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++    Address sp = addressOfStackSlot(SENDER_SP_OFFSET);
++    // We do not need to update the callee-save register mapping because above
++    // us is either another interpreter frame or a converter-frame, but never
++    // directly a compiled frame.
++    // 11/24/04 SFG. With the removal of adapter frames this is no longer true.
++    // However c2 no longer uses callee save register for java calls so there
++    // are no callee register to find.
++
++    if (map.getUpdateMap())
++      updateMapWithSavedLink(map, addressOfStackSlot(LINK_OFFSET));
++
++    return new SW64Frame(sp, unextendedSP, getLink(), getSenderPC());
++  }
++
++  private void updateMapWithSavedLink(RegisterMap map, Address savedFPAddr) {
++    map.setLocation(fp, savedFPAddr);
++  }
++
++  private Frame senderForCompiledFrame(SW64RegisterMap map, CodeBlob cb) {
++    if (DEBUG) {
++      System.out.println("senderForCompiledFrame");
++    }
++
++    //
++    // NOTE: some of this code is (unfortunately) duplicated  SW64CurrentFrameGuess
++    //
++
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(map != null, "map must be set");
++    }
++
++    // frame owned by optimizing compiler
++    if (Assert.ASSERTS_ENABLED) {
++        Assert.that(cb.getFrameSize() >= 0, "must have non-zero frame size");
++    }
++    Address senderSP = getUnextendedSP().addOffsetTo(cb.getFrameSize());
++
++    // The return_address is always the word on the stack
++    Address senderPC = senderSP.getAddressAt(-1 * VM.getVM().getAddressSize());
++
++    // This is the saved value of FP which may or may not really be an FP.
++    // It is only an FP if the sender is an interpreter frame.
++    Address savedFPAddr = senderSP.addOffsetTo(- SENDER_SP_OFFSET * VM.getVM().getAddressSize());
++
++    if (map.getUpdateMap()) {
++      // Tell GC to use argument oopmaps for some runtime stubs that need it.
++      // For C1, the runtime stub might not have oop maps, so set this flag
++      // outside of update_register_map.
++      map.setIncludeArgumentOops(cb.callerMustGCArguments());
++
++      if (cb.getOopMaps() != null) {
++        ImmutableOopMapSet.updateRegisterMap(this, cb, map, true);
++      }
++
++      // Since the prolog does the save and restore of FP there is no oopmap
++      // for it so we must fill in its location as if there was an oopmap entry
++      // since if our caller was compiled code there could be live jvm state in it.
++      updateMapWithSavedLink(map, savedFPAddr);
++    }
++
++    return new SW64Frame(senderSP, savedFPAddr.getAddressAt(0), senderPC);
++  }
++
++  protected boolean hasSenderPD() {
++    return true;
++  }
++
++  public long frameSize() {
++    return (getSenderSP().minus(getSP()) / VM.getVM().getAddressSize());
++  }
++
++    public Address getLink() {
++        try {
++            if (DEBUG) {
++                System.out.println("Reading link at " + addressOfStackSlot(LINK_OFFSET)
++                        + " = " + addressOfStackSlot(LINK_OFFSET).getAddressAt(0));
++            }
++            return addressOfStackSlot(LINK_OFFSET).getAddressAt(0);
++        } catch (Exception e) {
++            if (DEBUG)
++                System.out.println("Returning null");
++            return null;
++        }
++    }
++
++  // FIXME: not implementable yet
++  //inline void      frame::set_link(intptr_t* addr)  { *(intptr_t **)addr_at(link_offset) = addr; }
++
++  public Address getUnextendedSP() { return raw_unextendedSP; }
++
++  // Return address:
++  public Address getSenderPCAddr() { return addressOfStackSlot(RETURN_ADDR_OFFSET); }
++  public Address getSenderPC()     { return getSenderPCAddr().getAddressAt(0);      }
++
++  // return address of param, zero origin index.
++  public Address getNativeParamAddr(int idx) {
++    return addressOfStackSlot(NATIVE_FRAME_INITIAL_PARAM_OFFSET + idx);
++  }
++
++  public Address getSenderSP()     { return addressOfStackSlot(SENDER_SP_OFFSET); }
++
++  public Address addressOfInterpreterFrameLocals() {
++    return addressOfStackSlot(INTERPRETER_FRAME_LOCALS_OFFSET);
++  }
++
++  private Address addressOfInterpreterFrameBCX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_BCX_OFFSET);
++  }
++
++  public int getInterpreterFrameBCI() {
++    // FIXME: this is not atomic with respect to GC and is unsuitable
++    // for use in a non-debugging, or reflective, system. Need to
++    // figure out how to express this.
++    Address bcp = addressOfInterpreterFrameBCX().getAddressAt(0);
++    Address methodHandle = addressOfInterpreterFrameMethod().getAddressAt(0);
++    Method method = (Method)Metadata.instantiateWrapperFor(methodHandle);
++    return bcpToBci(bcp, method);
++  }
++
++  public Address addressOfInterpreterFrameMDX() {
++    return addressOfStackSlot(INTERPRETER_FRAME_MDX_OFFSET);
++  }
++
++  // FIXME
++  //inline int frame::interpreter_frame_monitor_size() {
++  //  return BasicObjectLock::size();
++  //}
++
++  // expression stack
++  // (the max_stack arguments are used by the GC; see class FrameClosure)
++
++  public Address addressOfInterpreterFrameExpressionStack() {
++    Address monitorEnd = interpreterFrameMonitorEnd().address();
++    return monitorEnd.addOffsetTo(-1 * VM.getVM().getAddressSize());
++  }
++
++  public int getInterpreterFrameExpressionStackDirection() { return -1; }
++
++  // top of expression stack
++  public Address addressOfInterpreterFrameTOS() {
++    return getSP();
++  }
++
++  /** Expression stack from top down */
++  public Address addressOfInterpreterFrameTOSAt(int slot) {
++    return addressOfInterpreterFrameTOS().addOffsetTo(slot * VM.getVM().getAddressSize());
++  }
++
++  public Address getInterpreterFrameSenderSP() {
++    if (Assert.ASSERTS_ENABLED) {
++      Assert.that(isInterpretedFrame(), "interpreted frame expected");
++    }
++    return addressOfStackSlot(INTERPRETER_FRAME_SENDER_SP_OFFSET).getAddressAt(0);
++  }
++
++  // Monitors
++  public BasicObjectLock interpreterFrameMonitorBegin() {
++    return new BasicObjectLock(addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_BOTTOM_OFFSET));
++  }
++
++  public BasicObjectLock interpreterFrameMonitorEnd() {
++    Address result = addressOfStackSlot(INTERPRETER_FRAME_MONITOR_BLOCK_TOP_OFFSET).getAddressAt(0);
++    if (Assert.ASSERTS_ENABLED) {
++      // make sure the pointer points inside the frame
++      Assert.that(AddressOps.gt(getFP(), result), "result must <  than frame pointer");
++      Assert.that(AddressOps.lte(getSP(), result), "result must >= than stack pointer");
++    }
++    return new BasicObjectLock(result);
++  }
++
++  public int interpreterFrameMonitorSize() {
++    return BasicObjectLock.size();
++  }
++
++  // Method
++  public Address addressOfInterpreterFrameMethod() {
++    return addressOfStackSlot(INTERPRETER_FRAME_METHOD_OFFSET);
++  }
++
++  // Constant pool cache
++  public Address addressOfInterpreterFrameCPCache() {
++    return addressOfStackSlot(INTERPRETER_FRAME_CACHE_OFFSET);
++  }
++
++  // Entry frames
++  public JavaCallWrapper getEntryFrameCallWrapper() {
++    return new SW64JavaCallWrapper(addressOfStackSlot(ENTRY_FRAME_CALL_WRAPPER_OFFSET).getAddressAt(0));
++  }
++
++  protected Address addressOfSavedOopResult() {
++    // offset is 2 for compiler2 and 3 for compiler1
++    return getSP().addOffsetTo((VM.getVM().isClientCompiler() ? 2 : 3) *
++                               VM.getVM().getAddressSize());
++  }
++
++  protected Address addressOfSavedReceiver() {
++    return getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++  }
++
++  private void dumpStack() {
++    for (Address addr = getSP().addOffsetTo(-4 * VM.getVM().getAddressSize());
++         AddressOps.lt(addr, getSP());
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++    System.out.println("-----------------------");
++    for (Address addr = getSP();
++         AddressOps.lte(addr, getSP().addOffsetTo(20 * VM.getVM().getAddressSize()));
++         addr = addr.addOffsetTo(VM.getVM().getAddressSize())) {
++      System.out.println(addr + ": " + addr.getAddressAt(0));
++    }
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64JavaCallWrapper.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64JavaCallWrapper.java
+new file mode 100644
+index 0000000000..1f6b5b1f40
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64JavaCallWrapper.java
+@@ -0,0 +1,57 @@
++/*
++ * Copyright (c) 2003, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.sw64;
++
++import java.util.*;
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.types.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class SW64JavaCallWrapper extends JavaCallWrapper {
++  private static AddressField lastJavaFPField;
++
++  static {
++    VM.registerVMInitializedObserver(new Observer() {
++        public void update(Observable o, Object data) {
++          initialize(VM.getVM().getTypeDataBase());
++        }
++      });
++  }
++
++  private static synchronized void initialize(TypeDataBase db) {
++    Type type = db.lookupType("JavaFrameAnchor");
++
++    lastJavaFPField  = type.getAddressField("_last_Java_fp");
++  }
++
++  public SW64JavaCallWrapper(Address addr) {
++    super(addr);
++  }
++
++  public Address getLastJavaFP() {
++    return lastJavaFPField.getValue(addr.addOffsetTo(anchorField.getOffset()));
++  }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64RegisterMap.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64RegisterMap.java
+new file mode 100644
+index 0000000000..ded1591a6b
+--- /dev/null
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/runtime/sw64/SW64RegisterMap.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2001, 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2015, Red Hat Inc.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ *
++ */
++
++package sun.jvm.hotspot.runtime.sw64;
++
++import sun.jvm.hotspot.debugger.*;
++import sun.jvm.hotspot.runtime.*;
++
++public class SW64RegisterMap extends RegisterMap {
++
++  /** This is the only public constructor */
++  public SW64RegisterMap(JavaThread thread, boolean updateMap) {
++    super(thread, updateMap);
++  }
++
++  protected SW64RegisterMap(RegisterMap map) {
++    super(map);
++  }
++
++  public Object clone() {
++    SW64RegisterMap retval = new SW64RegisterMap(this);
++    return retval;
++  }
++
++  // no PD state to clear or copy:
++  protected void clearPD() {}
++  protected void initializePD() {}
++  protected void initializeFromPD(RegisterMap map) {}
++  protected Address getLocationPD(VMReg reg) { return null; }
++}
+diff --git a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+index 7d7a6107ca..8980eb3a1c 100644
+--- a/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
++++ b/src/jdk.hotspot.agent/share/classes/sun/jvm/hotspot/utilities/PlatformInfo.java
+@@ -54,8 +54,7 @@ public class PlatformInfo {
+ 
+   public static boolean knownCPU(String cpu) {
+     final String[] KNOWN =
+-        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64"};
+-
++        new String[] {"i386", "x86", "x86_64", "amd64", "sparc", "sparcv9", "ppc64", "ppc64le", "aarch64", "sw_64", "sw64"};
+     for(String s : KNOWN) {
+       if(s.equals(cpu))
+         return true;
+@@ -101,6 +100,9 @@ public class PlatformInfo {
+     if (cpu.equals("ppc64le"))
+       return "ppc64";
+ 
++    if (cpu.equals("sw_64"))
++      return "sw64";
++
+     return cpu;
+ 
+   }
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotJVMCIBackendFactory.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotJVMCIBackendFactory.java
+new file mode 100644
+index 0000000000..8c294a16aa
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotJVMCIBackendFactory.java
+@@ -0,0 +1,191 @@
++/*
++ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.sw64;
++
++import static jdk.vm.ci.common.InitTimer.timer;
++
++import java.util.EnumSet;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.stack.StackIntrospection;
++import jdk.vm.ci.common.InitTimer;
++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
++import jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory;
++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
++import jdk.vm.ci.hotspot.HotSpotStackIntrospection;
++import jdk.vm.ci.meta.ConstantReflectionProvider;
++import jdk.vm.ci.runtime.JVMCIBackend;
++
++public class SW64HotSpotJVMCIBackendFactory implements HotSpotJVMCIBackendFactory {
++
++    protected EnumSet<SW64.CPUFeature> computeFeatures(@SuppressWarnings("unused") SW64HotSpotVMConfig config) {
++        // Configure the feature set using the HotSpot flag settings.
++        EnumSet<SW64.CPUFeature> features = EnumSet.noneOf(SW64.CPUFeature.class);
++
++        if ((config.vmVersionFeatures & config.sw64FP) != 0) {
++            features.add(SW64.CPUFeature.FP);
++        }
++        if ((config.vmVersionFeatures & config.sw64ASIMD) != 0) {
++            features.add(SW64.CPUFeature.ASIMD);
++        }
++        if ((config.vmVersionFeatures & config.sw64EVTSTRM) != 0) {
++            features.add(SW64.CPUFeature.EVTSTRM);
++        }
++        if ((config.vmVersionFeatures & config.sw64AES) != 0) {
++            features.add(SW64.CPUFeature.AES);
++        }
++        if ((config.vmVersionFeatures & config.sw64PMULL) != 0) {
++            features.add(SW64.CPUFeature.PMULL);
++        }
++        if ((config.vmVersionFeatures & config.sw64SHA1) != 0) {
++            features.add(SW64.CPUFeature.SHA1);
++        }
++        if ((config.vmVersionFeatures & config.sw64SHA2) != 0) {
++            features.add(SW64.CPUFeature.SHA2);
++        }
++        if ((config.vmVersionFeatures & config.sw64CRC32) != 0) {
++            features.add(SW64.CPUFeature.CRC32);
++        }
++        if ((config.vmVersionFeatures & config.sw64LSE) != 0) {
++            features.add(SW64.CPUFeature.LSE);
++        }
++        if ((config.vmVersionFeatures & config.sw64STXR_PREFETCH) != 0) {
++            features.add(SW64.CPUFeature.STXR_PREFETCH);
++        }
++        if ((config.vmVersionFeatures & config.sw64A53MAC) != 0) {
++            features.add(SW64.CPUFeature.A53MAC);
++        }
++        if ((config.vmVersionFeatures & config.sw64DMB_ATOMICS) != 0) {
++            features.add(SW64.CPUFeature.DMB_ATOMICS);
++        }
++
++        return features;
++    }
++
++    protected EnumSet<SW64.Flag> computeFlags(@SuppressWarnings("unused") SW64HotSpotVMConfig config) {
++        EnumSet<SW64.Flag> flags = EnumSet.noneOf(SW64.Flag.class);
++
++        if (config.useBarriersForVolatile) {
++            flags.add(SW64.Flag.UseBarriersForVolatile);
++        }
++        if (config.useCRC32) {
++            flags.add(SW64.Flag.UseCRC32);
++        }
++        if (config.useNeon) {
++            flags.add(SW64.Flag.UseNeon);
++        }
++        if (config.useSIMDForMemoryOps) {
++            flags.add(SW64.Flag.UseSIMDForMemoryOps);
++        }
++        if (config.avoidUnalignedAccesses) {
++            flags.add(SW64.Flag.AvoidUnalignedAccesses);
++        }
++        if (config.useLSE) {
++            flags.add(SW64.Flag.UseLSE);
++        }
++        if (config.useBlockZeroing) {
++            flags.add(SW64.Flag.UseBlockZeroing);
++        }
++
++        return flags;
++    }
++
++    protected TargetDescription createTarget(SW64HotSpotVMConfig config) {
++        final int stackFrameAlignment = 16;
++        final int implicitNullCheckLimit = 4096;
++        final boolean inlineObjects = true;
++        Architecture arch = new SW64(computeFeatures(config), computeFlags(config));
++        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
++    }
++
++    protected HotSpotConstantReflectionProvider createConstantReflection(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotConstantReflectionProvider(runtime);
++    }
++
++    protected RegisterConfig createRegisterConfig(SW64HotSpotVMConfig config, TargetDescription target) {
++        return new SW64HotSpotRegisterConfig(target, config.useCompressedOops);
++    }
++
++    protected HotSpotCodeCacheProvider createCodeCache(HotSpotJVMCIRuntime runtime, TargetDescription target, RegisterConfig regConfig) {
++        return new HotSpotCodeCacheProvider(runtime, runtime.getConfig(), target, regConfig);
++    }
++
++    protected HotSpotMetaAccessProvider createMetaAccess(HotSpotJVMCIRuntime runtime) {
++        return new HotSpotMetaAccessProvider(runtime);
++    }
++
++    @Override
++    public String getArchitecture() {
++        return "sw64";
++    }
++
++    @Override
++    public String toString() {
++        return "JVMCIBackend:" + getArchitecture();
++    }
++
++    @Override
++    @SuppressWarnings("try")
++    public JVMCIBackend createJVMCIBackend(HotSpotJVMCIRuntime runtime, JVMCIBackend host) {
++
++        assert host == null;
++        SW64HotSpotVMConfig config = new SW64HotSpotVMConfig(runtime.getConfigStore());
++        TargetDescription target = createTarget(config);
++
++        RegisterConfig regConfig;
++        HotSpotCodeCacheProvider codeCache;
++        ConstantReflectionProvider constantReflection;
++        HotSpotMetaAccessProvider metaAccess;
++        StackIntrospection stackIntrospection;
++        try (InitTimer t = timer("create providers")) {
++            try (InitTimer rt = timer("create MetaAccess provider")) {
++                metaAccess = createMetaAccess(runtime);
++            }
++            try (InitTimer rt = timer("create RegisterConfig")) {
++                regConfig = createRegisterConfig(config, target);
++            }
++            try (InitTimer rt = timer("create CodeCache provider")) {
++                codeCache = createCodeCache(runtime, target, regConfig);
++            }
++            try (InitTimer rt = timer("create ConstantReflection provider")) {
++                constantReflection = createConstantReflection(runtime);
++            }
++            try (InitTimer rt = timer("create StackIntrospection provider")) {
++                stackIntrospection = new HotSpotStackIntrospection(runtime);
++            }
++        }
++        try (InitTimer rt = timer("instantiate backend")) {
++            return createBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++        }
++    }
++
++    protected JVMCIBackend createBackend(HotSpotMetaAccessProvider metaAccess, HotSpotCodeCacheProvider codeCache, ConstantReflectionProvider constantReflection,
++                    StackIntrospection stackIntrospection) {
++        return new JVMCIBackend(metaAccess, codeCache, constantReflection, stackIntrospection);
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotRegisterConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotRegisterConfig.java
+new file mode 100644
+index 0000000000..9ce2e018fc
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotRegisterConfig.java
+@@ -0,0 +1,300 @@
++/*
++ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.lr;
++import static jdk.vm.ci.sw64.SW64.r0;
++import static jdk.vm.ci.sw64.SW64.r1;
++import static jdk.vm.ci.sw64.SW64.r2;
++import static jdk.vm.ci.sw64.SW64.r3;
++import static jdk.vm.ci.sw64.SW64.r4;
++import static jdk.vm.ci.sw64.SW64.r5;
++import static jdk.vm.ci.sw64.SW64.r6;
++import static jdk.vm.ci.sw64.SW64.r7;
++import static jdk.vm.ci.sw64.SW64.rscratch1;
++import static jdk.vm.ci.sw64.SW64.rscratch2;
++import static jdk.vm.ci.sw64.SW64.r12;
++import static jdk.vm.ci.sw64.SW64.r27;
++import static jdk.vm.ci.sw64.SW64.r28;
++import static jdk.vm.ci.sw64.SW64.r29;
++import static jdk.vm.ci.sw64.SW64.r31;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.sw64.SW64.v0;
++import static jdk.vm.ci.sw64.SW64.v1;
++import static jdk.vm.ci.sw64.SW64.v2;
++import static jdk.vm.ci.sw64.SW64.v3;
++import static jdk.vm.ci.sw64.SW64.v4;
++import static jdk.vm.ci.sw64.SW64.v5;
++import static jdk.vm.ci.sw64.SW64.v6;
++import static jdk.vm.ci.sw64.SW64.v7;
++import static jdk.vm.ci.sw64.SW64.zr;
++
++import java.util.ArrayList;
++import java.util.HashSet;
++import java.util.List;
++import java.util.Set;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CallingConvention.Type;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterAttributes;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.code.ValueKindFactory;
++import jdk.vm.ci.common.JVMCIError;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.JavaType;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public class SW64HotSpotRegisterConfig implements RegisterConfig {
++
++    private final TargetDescription target;
++
++    private final RegisterArray allocatable;
++
++    /**
++     * The caller saved registers always include all parameter registers.
++     */
++    private final RegisterArray callerSaved;
++
++    private final boolean allAllocatableAreCallerSaved;
++
++    private final RegisterAttributes[] attributesMap;
++
++    @Override
++    public RegisterArray getAllocatableRegisters() {
++        return allocatable;
++    }
++
++    @Override
++    public RegisterArray filterAllocatableRegisters(PlatformKind kind, RegisterArray registers) {
++        ArrayList<Register> list = new ArrayList<>();
++        for (Register reg : registers) {
++            if (target.arch.canStoreValue(reg.getRegisterCategory(), kind)) {
++                list.add(reg);
++            }
++        }
++
++        return new RegisterArray(list);
++    }
++
++    @Override
++    public RegisterAttributes[] getAttributesMap() {
++        return attributesMap.clone();
++    }
++
++    private final RegisterArray javaGeneralParameterRegisters = new RegisterArray(r1, r2, r3, r4, r5, r6, r7, r0);
++    private final RegisterArray nativeGeneralParameterRegisters = new RegisterArray(r0, r1, r2, r3, r4, r5, r6, r7);
++    private final RegisterArray simdParameterRegisters = new RegisterArray(v0, v1, v2, v3, v4, v5, v6, v7);
++
++    public static final Register inlineCacheRegister = rscratch2;
++
++    /**
++     * Vtable stubs expect the metaspace Method in r12.
++     */
++    public static final Register metaspaceMethodRegister = r12;
++
++    public static final Register heapBaseRegister = r27;
++    public static final Register threadRegister = r28;
++    public static final Register fp = r29;
++
++    private static final RegisterArray reservedRegisters
++        = new RegisterArray(rscratch1, rscratch2, threadRegister, fp, lr, r31, zr, sp);
++
++    private static RegisterArray initAllocatable(Architecture arch, boolean reserveForHeapBase) {
++        RegisterArray allRegisters = arch.getAvailableValueRegisters();
++        Register[] registers = new Register[allRegisters.size() - reservedRegisters.size() - (reserveForHeapBase ? 1 : 0)];
++        List<Register> reservedRegistersList = reservedRegisters.asList();
++
++        int idx = 0;
++        for (Register reg : allRegisters) {
++            if (reservedRegistersList.contains(reg)) {
++                // skip reserved registers
++                continue;
++            }
++            assert !(reg.equals(threadRegister) || reg.equals(fp) || reg.equals(lr) || reg.equals(r31) || reg.equals(zr) || reg.equals(sp));
++            if (reserveForHeapBase && reg.equals(heapBaseRegister)) {
++                // skip heap base register
++                continue;
++            }
++
++            registers[idx++] = reg;
++        }
++
++        assert idx == registers.length;
++        return new RegisterArray(registers);
++    }
++
++    public SW64HotSpotRegisterConfig(TargetDescription target, boolean useCompressedOops) {
++        this(target, initAllocatable(target.arch, useCompressedOops));
++        assert callerSaved.size() >= allocatable.size();
++    }
++
++    public SW64HotSpotRegisterConfig(TargetDescription target, RegisterArray allocatable) {
++        this.target = target;
++
++        this.allocatable = allocatable;
++        Set<Register> callerSaveSet = new HashSet<>();
++        allocatable.addTo(callerSaveSet);
++        simdParameterRegisters.addTo(callerSaveSet);
++        javaGeneralParameterRegisters.addTo(callerSaveSet);
++        nativeGeneralParameterRegisters.addTo(callerSaveSet);
++        callerSaved = new RegisterArray(callerSaveSet);
++
++        allAllocatableAreCallerSaved = true;
++        attributesMap = RegisterAttributes.createMap(this, SW64.allRegisters);
++    }
++
++    @Override
++    public RegisterArray getCallerSaveRegisters() {
++        return callerSaved;
++    }
++
++    @Override
++    public RegisterArray getCalleeSaveRegisters() {
++        return null;
++    }
++
++    @Override
++    public boolean areAllAllocatableRegistersCallerSaved() {
++        return allAllocatableAreCallerSaved;
++    }
++
++    @Override
++    public CallingConvention getCallingConvention(Type type, JavaType returnType, JavaType[] parameterTypes, ValueKindFactory<?> valueKindFactory) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        if (type == HotSpotCallingConventionType.NativeCall) {
++            return callingConvention(nativeGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++        }
++        // On x64, parameter locations are the same whether viewed
++        // from the caller or callee perspective
++        return callingConvention(javaGeneralParameterRegisters, returnType, parameterTypes, hotspotType, valueKindFactory);
++    }
++
++    @Override
++    public RegisterArray getCallingConventionRegisters(Type type, JavaKind kind) {
++        HotSpotCallingConventionType hotspotType = (HotSpotCallingConventionType) type;
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Short:
++            case Char:
++            case Int:
++            case Long:
++            case Object:
++                return hotspotType == HotSpotCallingConventionType.NativeCall ? nativeGeneralParameterRegisters : javaGeneralParameterRegisters;
++            case Float:
++            case Double:
++                return simdParameterRegisters;
++            default:
++                throw JVMCIError.shouldNotReachHere();
++        }
++    }
++
++    private CallingConvention callingConvention(RegisterArray generalParameterRegisters, JavaType returnType, JavaType[] parameterTypes, HotSpotCallingConventionType type,
++                    ValueKindFactory<?> valueKindFactory) {
++        AllocatableValue[] locations = new AllocatableValue[parameterTypes.length];
++
++        int currentGeneral = 0;
++        int currentSIMD = 0;
++        int currentStackOffset = 0;
++
++        for (int i = 0; i < parameterTypes.length; i++) {
++            final JavaKind kind = parameterTypes[i].getJavaKind().getStackKind();
++
++            switch (kind) {
++                case Byte:
++                case Boolean:
++                case Short:
++                case Char:
++                case Int:
++                case Long:
++                case Object:
++                    if (currentGeneral < generalParameterRegisters.size()) {
++                        Register register = generalParameterRegisters.get(currentGeneral++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                case Float:
++                case Double:
++                    if (currentSIMD < simdParameterRegisters.size()) {
++                        Register register = simdParameterRegisters.get(currentSIMD++);
++                        locations[i] = register.asValue(valueKindFactory.getValueKind(kind));
++                    }
++                    break;
++                default:
++                    throw JVMCIError.shouldNotReachHere();
++            }
++
++            if (locations[i] == null) {
++                ValueKind<?> valueKind = valueKindFactory.getValueKind(kind);
++                locations[i] = StackSlot.get(valueKind, currentStackOffset, !type.out);
++                currentStackOffset += Math.max(valueKind.getPlatformKind().getSizeInBytes(), target.wordSize);
++            }
++        }
++
++        JavaKind returnKind = returnType == null ? JavaKind.Void : returnType.getJavaKind();
++        AllocatableValue returnLocation = returnKind == JavaKind.Void ? Value.ILLEGAL : getReturnRegister(returnKind).asValue(valueKindFactory.getValueKind(returnKind.getStackKind()));
++        return new CallingConvention(currentStackOffset, returnLocation, locations);
++    }
++
++    @Override
++    public Register getReturnRegister(JavaKind kind) {
++        switch (kind) {
++            case Boolean:
++            case Byte:
++            case Char:
++            case Short:
++            case Int:
++            case Long:
++            case Object:
++                return r0;
++            case Float:
++            case Double:
++                return v0;
++            case Void:
++            case Illegal:
++                return null;
++            default:
++                throw new UnsupportedOperationException("no return register for type " + kind);
++        }
++    }
++
++    @Override
++    public Register getFrameRegister() {
++        return sp;
++    }
++
++    @Override
++    public String toString() {
++        return String.format("Allocatable: " + getAllocatableRegisters() + "%n" + "CallerSave:  " + getCallerSaveRegisters() + "%n");
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotVMConfig.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotVMConfig.java
+new file mode 100644
+index 0000000000..e8e309f92a
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.hotspot.sw64/src/jdk/vm/ci/hotspot/sw64/SW64HotSpotVMConfig.java
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.hotspot.sw64;
++
++import jdk.vm.ci.hotspot.HotSpotVMConfigAccess;
++import jdk.vm.ci.hotspot.HotSpotVMConfigStore;
++
++/**
++ * Used to access native configuration details.
++ *
++ * All non-static, public fields in this class are so that they can be compiled as constants.
++ */
++class SW64HotSpotVMConfig extends HotSpotVMConfigAccess {
++
++    SW64HotSpotVMConfig(HotSpotVMConfigStore config) {
++        super(config);
++    }
++
++    final boolean linuxOs = System.getProperty("os.name", "").startsWith("Linux");
++
++    final boolean useCompressedOops = getFlag("UseCompressedOops", Boolean.class);
++
++    // CPU Capabilities
++
++    /*
++     * These flags are set based on the corresponding command line flags.
++     */
++    final boolean useBarriersForVolatile = getFlag("UseBarriersForVolatile", Boolean.class);
++    final boolean useCRC32 = getFlag("UseCRC32", Boolean.class);
++    final boolean useNeon = getFlag("UseNeon", Boolean.class);
++    final boolean useSIMDForMemoryOps = getFlag("UseSIMDForMemoryOps", Boolean.class);
++    final boolean avoidUnalignedAccesses = getFlag("AvoidUnalignedAccesses", Boolean.class);
++    final boolean useLSE = getFlag("UseLSE", Boolean.class);
++    final boolean useBlockZeroing = getFlag("UseBlockZeroing", Boolean.class);
++
++    final long vmVersionFeatures = getFieldValue("Abstract_VM_Version::_features", Long.class, "uint64_t");
++
++    /*
++     * These flags are set if the corresponding support is in the hardware.
++     */
++    final long sw64FP = getConstant("VM_Version::CPU_FP", Long.class);
++    final long sw64ASIMD = getConstant("VM_Version::CPU_ASIMD", Long.class);
++    final long sw64EVTSTRM = getConstant("VM_Version::CPU_EVTSTRM", Long.class);
++    final long sw64AES = getConstant("VM_Version::CPU_AES", Long.class);
++    final long sw64PMULL = getConstant("VM_Version::CPU_PMULL", Long.class);
++    final long sw64SHA1 = getConstant("VM_Version::CPU_SHA1", Long.class);
++    final long sw64SHA2 = getConstant("VM_Version::CPU_SHA2", Long.class);
++    final long sw64CRC32 = getConstant("VM_Version::CPU_CRC32", Long.class);
++    final long sw64LSE = getConstant("VM_Version::CPU_LSE", Long.class);
++    final long sw64STXR_PREFETCH = getConstant("VM_Version::CPU_STXR_PREFETCH", Long.class);
++    final long sw64A53MAC = getConstant("VM_Version::CPU_A53MAC", Long.class);
++    final long sw64DMB_ATOMICS = getConstant("VM_Version::CPU_DMB_ATOMICS", Long.class);
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sw64/src/jdk/vm/ci/sw64/SW64.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sw64/src/jdk/vm/ci/sw64/SW64.java
+new file mode 100644
+index 0000000000..1767389b96
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sw64/src/jdk/vm/ci/sw64/SW64.java
+@@ -0,0 +1,255 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.sw64;
++
++import java.nio.ByteOrder;
++import java.util.EnumSet;
++
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.Register.RegisterCategory;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.PlatformKind;
++
++/**
++ * Represents the SW64 architecture.
++ */
++public class SW64 extends Architecture {
++
++    public static final RegisterCategory CPU = new RegisterCategory("CPU");
++
++    // General purpose CPU registers
++    public static final Register r0 = new Register(0, 0, "r0", CPU);
++    public static final Register r1 = new Register(1, 1, "r1", CPU);
++    public static final Register r2 = new Register(2, 2, "r2", CPU);
++    public static final Register r3 = new Register(3, 3, "r3", CPU);
++    public static final Register r4 = new Register(4, 4, "r4", CPU);
++    public static final Register r5 = new Register(5, 5, "r5", CPU);
++    public static final Register r6 = new Register(6, 6, "r6", CPU);
++    public static final Register r7 = new Register(7, 7, "r7", CPU);
++    public static final Register r8 = new Register(8, 8, "r8", CPU);
++    public static final Register r9 = new Register(9, 9, "r9", CPU);
++    public static final Register r10 = new Register(10, 10, "r10", CPU);
++    public static final Register r11 = new Register(11, 11, "r11", CPU);
++    public static final Register r12 = new Register(12, 12, "r12", CPU);
++    public static final Register r13 = new Register(13, 13, "r13", CPU);
++    public static final Register r14 = new Register(14, 14, "r14", CPU);
++    public static final Register r15 = new Register(15, 15, "r15", CPU);
++    public static final Register r16 = new Register(16, 16, "r16", CPU);
++    public static final Register r17 = new Register(17, 17, "r17", CPU);
++    public static final Register r18 = new Register(18, 18, "r18", CPU);
++    public static final Register r19 = new Register(19, 19, "r19", CPU);
++    public static final Register r20 = new Register(20, 20, "r20", CPU);
++    public static final Register r21 = new Register(21, 21, "r21", CPU);
++    public static final Register r22 = new Register(22, 22, "r22", CPU);
++    public static final Register r23 = new Register(23, 23, "r23", CPU);
++    public static final Register r24 = new Register(24, 24, "r24", CPU);
++    public static final Register r25 = new Register(25, 25, "r25", CPU);
++    public static final Register r26 = new Register(26, 26, "r26", CPU);
++    public static final Register r27 = new Register(27, 27, "r27", CPU);
++    public static final Register r28 = new Register(28, 28, "r28", CPU);
++    public static final Register r29 = new Register(29, 29, "r29", CPU);
++    public static final Register r30 = new Register(30, 30, "r30", CPU);
++
++    /*
++     * r31 is not a general purpose register, but represents either the stackpointer or the
++     * zero/discard register depending on the instruction. So we represent those two uses as two
++     * different registers. The register numbers are kept in sync with register_sw64.hpp and have
++     * to be sequential, hence we also need a general r31 register here, which is never used.
++     */
++    public static final Register r31 = new Register(31, 31, "r31", CPU);
++    public static final Register zr = new Register(32, 31, "zr", CPU);
++    public static final Register sp = new Register(33, 31, "sp", CPU);
++
++    public static final Register lr = r30;
++
++    // Used by runtime code: cannot be compiler-allocated.
++    public static final Register rscratch1 = r8;
++    public static final Register rscratch2 = r9;
++
++    // @formatter:off
++    public static final RegisterArray cpuRegisters = new RegisterArray(
++        r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
++        r8,  r9,  r10, r11, r12, r13, r14, r15,
++        r16, r17, r18, r19, r20, r21, r22, r23,
++        r24, r25, r26, r27, r28, r29, r30, r31,
++        zr,  sp
++    );
++    // @formatter:on
++
++    public static final RegisterCategory SIMD = new RegisterCategory("SIMD");
++
++    // Simd registers
++    public static final Register v0 = new Register(34, 0, "v0", SIMD);
++    public static final Register v1 = new Register(35, 1, "v1", SIMD);
++    public static final Register v2 = new Register(36, 2, "v2", SIMD);
++    public static final Register v3 = new Register(37, 3, "v3", SIMD);
++    public static final Register v4 = new Register(38, 4, "v4", SIMD);
++    public static final Register v5 = new Register(39, 5, "v5", SIMD);
++    public static final Register v6 = new Register(40, 6, "v6", SIMD);
++    public static final Register v7 = new Register(41, 7, "v7", SIMD);
++    public static final Register v8 = new Register(42, 8, "v8", SIMD);
++    public static final Register v9 = new Register(43, 9, "v9", SIMD);
++    public static final Register v10 = new Register(44, 10, "v10", SIMD);
++    public static final Register v11 = new Register(45, 11, "v11", SIMD);
++    public static final Register v12 = new Register(46, 12, "v12", SIMD);
++    public static final Register v13 = new Register(47, 13, "v13", SIMD);
++    public static final Register v14 = new Register(48, 14, "v14", SIMD);
++    public static final Register v15 = new Register(49, 15, "v15", SIMD);
++    public static final Register v16 = new Register(50, 16, "v16", SIMD);
++    public static final Register v17 = new Register(51, 17, "v17", SIMD);
++    public static final Register v18 = new Register(52, 18, "v18", SIMD);
++    public static final Register v19 = new Register(53, 19, "v19", SIMD);
++    public static final Register v20 = new Register(54, 20, "v20", SIMD);
++    public static final Register v21 = new Register(55, 21, "v21", SIMD);
++    public static final Register v22 = new Register(56, 22, "v22", SIMD);
++    public static final Register v23 = new Register(57, 23, "v23", SIMD);
++    public static final Register v24 = new Register(58, 24, "v24", SIMD);
++    public static final Register v25 = new Register(59, 25, "v25", SIMD);
++    public static final Register v26 = new Register(60, 26, "v26", SIMD);
++    public static final Register v27 = new Register(61, 27, "v27", SIMD);
++    public static final Register v28 = new Register(62, 28, "v28", SIMD);
++    public static final Register v29 = new Register(63, 29, "v29", SIMD);
++    public static final Register v30 = new Register(64, 30, "v30", SIMD);
++    public static final Register v31 = new Register(65, 31, "v31", SIMD);
++
++    // @formatter:off
++    public static final RegisterArray simdRegisters = new RegisterArray(
++        v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7,
++        v8,  v9,  v10, v11, v12, v13, v14, v15,
++        v16, v17, v18, v19, v20, v21, v22, v23,
++        v24, v25, v26, v27, v28, v29, v30, v31
++    );
++    // @formatter:on
++
++    // @formatter:off
++    public static final RegisterArray allRegisters = new RegisterArray(
++        r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
++        r8,  r9,  r10, r11, r12, r13, r14, r15,
++        r16, r17, r18, r19, r20, r21, r22, r23,
++        r24, r25, r26, r27, r28, r29, r30, r31,
++        zr,  sp,
++
++        v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7,
++        v8,  v9,  v10, v11, v12, v13, v14, v15,
++        v16, v17, v18, v19, v20, v21, v22, v23,
++        v24, v25, v26, v27, v28, v29, v30, v31
++    );
++    // @formatter:on
++
++    /**
++     * Basic set of CPU features mirroring what is returned from the cpuid instruction. See:
++     * {@code VM_Version::cpuFeatureFlags}.
++     */
++    public enum CPUFeature {
++        FP,
++        ASIMD,
++        EVTSTRM,
++        AES,
++        PMULL,
++        SHA1,
++        SHA2,
++        CRC32,
++        LSE,
++        STXR_PREFETCH,
++        A53MAC,
++        DMB_ATOMICS
++    }
++
++    private final EnumSet<CPUFeature> features;
++
++    /**
++     * Set of flags to control code emission.
++     */
++    public enum Flag {
++        UseBarriersForVolatile,
++        UseCRC32,
++        UseNeon,
++        UseSIMDForMemoryOps,
++        AvoidUnalignedAccesses,
++        UseLSE,
++        UseBlockZeroing
++    }
++
++    private final EnumSet<Flag> flags;
++
++    public SW64(EnumSet<CPUFeature> features, EnumSet<Flag> flags) {
++        super("sw64", SW64Kind.QWORD, ByteOrder.LITTLE_ENDIAN, true, allRegisters, 0, 0, 0);
++        this.features = features;
++        this.flags = flags;
++    }
++
++    public EnumSet<CPUFeature> getFeatures() {
++        return features;
++    }
++
++    public EnumSet<Flag> getFlags() {
++        return flags;
++    }
++
++    @Override
++    public PlatformKind getPlatformKind(JavaKind javaKind) {
++        switch (javaKind) {
++            case Boolean:
++            case Byte:
++                return SW64Kind.BYTE;
++            case Short:
++            case Char:
++                return SW64Kind.WORD;
++            case Int:
++                return SW64Kind.DWORD;
++            case Long:
++            case Object:
++                return SW64Kind.QWORD;
++            case Float:
++                return SW64Kind.SINGLE;
++            case Double:
++                return SW64Kind.DOUBLE;
++            default:
++                return null;
++        }
++    }
++
++    @Override
++    public boolean canStoreValue(RegisterCategory category, PlatformKind platformKind) {
++        SW64Kind kind = (SW64Kind) platformKind;
++        if (kind.isInteger()) {
++            return category.equals(CPU);
++        } else if (kind.isSIMD()) {
++            return category.equals(SIMD);
++        }
++        return false;
++    }
++
++    @Override
++    public SW64Kind getLargestStorableKind(RegisterCategory category) {
++        if (category.equals(CPU)) {
++            return SW64Kind.QWORD;
++        } else if (category.equals(SIMD)) {
++            return SW64Kind.V128_QWORD;
++        } else {
++            return null;
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sw64/src/jdk/vm/ci/sw64/SW64Kind.java b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sw64/src/jdk/vm/ci/sw64/SW64Kind.java
+new file mode 100644
+index 0000000000..ad98425e8c
+--- /dev/null
++++ b/src/jdk.internal.vm.ci/share/classes/jdk.vm.ci.sw64/src/jdk/vm/ci/sw64/SW64Kind.java
+@@ -0,0 +1,153 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++package jdk.vm.ci.sw64;
++
++import jdk.vm.ci.meta.PlatformKind;
++
++public enum SW64Kind implements PlatformKind {
++
++    // scalar
++    BYTE(1),
++    WORD(2),
++    DWORD(4),
++    QWORD(8),
++    SINGLE(4),
++    DOUBLE(8),
++
++    // SIMD
++    V32_BYTE(4, BYTE),
++    V32_WORD(4, WORD),
++    V64_BYTE(8, BYTE),
++    V64_WORD(8, WORD),
++    V64_DWORD(8, DWORD),
++    V128_BYTE(16, BYTE),
++    V128_WORD(16, WORD),
++    V128_DWORD(16, DWORD),
++    V128_QWORD(16, QWORD),
++    V128_SINGLE(16, SINGLE),
++    V128_DOUBLE(16, DOUBLE);
++
++    private final int size;
++    private final int vectorLength;
++
++    private final SW64Kind scalar;
++    private final EnumKey<SW64Kind> key = new EnumKey<>(this);
++
++    SW64Kind(int size) {
++        this.size = size;
++        this.scalar = this;
++        this.vectorLength = 1;
++    }
++
++    SW64Kind(int size, SW64Kind scalar) {
++        this.size = size;
++        this.scalar = scalar;
++
++        assert size % scalar.size == 0;
++        this.vectorLength = size / scalar.size;
++    }
++
++    public SW64Kind getScalar() {
++        return scalar;
++    }
++
++    @Override
++    public int getSizeInBytes() {
++        return size;
++    }
++
++    @Override
++    public int getVectorLength() {
++        return vectorLength;
++    }
++
++    @Override
++    public Key getKey() {
++        return key;
++    }
++
++    public boolean isInteger() {
++        switch (this) {
++            case BYTE:
++            case WORD:
++            case DWORD:
++            case QWORD:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    public boolean isSIMD() {
++        switch (this) {
++            case SINGLE:
++            case DOUBLE:
++            case V32_BYTE:
++            case V32_WORD:
++            case V64_BYTE:
++            case V64_WORD:
++            case V64_DWORD:
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++                return true;
++            default:
++                return false;
++        }
++    }
++
++    @Override
++    public char getTypeChar() {
++        switch (this) {
++            case BYTE:
++                return 'b';
++            case WORD:
++                return 'w';
++            case DWORD:
++                return 'd';
++            case QWORD:
++                return 'q';
++            case SINGLE:
++                return 'S';
++            case DOUBLE:
++                return 'D';
++            case V32_BYTE:
++            case V32_WORD:
++            case V64_BYTE:
++            case V64_WORD:
++            case V64_DWORD:
++            case V128_BYTE:
++            case V128_WORD:
++            case V128_DWORD:
++            case V128_QWORD:
++            case V128_SINGLE:
++            case V128_DOUBLE:
++                return 'v';
++            default:
++                return '-';
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.ci/share/classes/module-info.java b/src/jdk.internal.vm.ci/share/classes/module-info.java
+index fed310d386..833237fa28 100644
+--- a/src/jdk.internal.vm.ci/share/classes/module-info.java
++++ b/src/jdk.internal.vm.ci/share/classes/module-info.java
+@@ -37,6 +37,7 @@ module jdk.internal.vm.ci {
+ 
+     provides jdk.vm.ci.hotspot.HotSpotJVMCIBackendFactory with
+         jdk.vm.ci.hotspot.aarch64.AArch64HotSpotJVMCIBackendFactory,
++        jdk.vm.ci.hotspot.sw64.SW64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.amd64.AMD64HotSpotJVMCIBackendFactory,
+         jdk.vm.ci.hotspot.sparc.SPARCHotSpotJVMCIBackendFactory;
+ }
+diff --git a/src/jdk.internal.vm.compiler/share/classes/module-info.java b/src/jdk.internal.vm.compiler/share/classes/module-info.java
+index 8f6c8b5d6f..0ec0a90be4 100644
+--- a/src/jdk.internal.vm.compiler/share/classes/module-info.java
++++ b/src/jdk.internal.vm.compiler/share/classes/module-info.java
+@@ -47,6 +47,7 @@ module jdk.internal.vm.compiler {
+     exports org.graalvm.compiler.api.replacements       to jdk.aot;
+     exports org.graalvm.compiler.asm.amd64              to jdk.aot;
+     exports org.graalvm.compiler.asm.aarch64            to jdk.aot;
++    exports org.graalvm.compiler.asm.sw64               to jdk.aot;
+     exports org.graalvm.compiler.bytecode               to jdk.aot;
+     exports org.graalvm.compiler.code                   to jdk.aot;
+     exports org.graalvm.compiler.core                   to jdk.aot;
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64.test/src/org/graalvm/compiler/asm/sw64/test/SW64MacroAssemblerTest.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64.test/src/org/graalvm/compiler/asm/sw64/test/SW64MacroAssemblerTest.java
+new file mode 100644
+index 0000000000..bbafef0303
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64.test/src/org/graalvm/compiler/asm/sw64/test/SW64MacroAssemblerTest.java
+@@ -0,0 +1,286 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.asm.sw64.test;
++
++import static org.junit.Assert.assertArrayEquals;
++
++import java.util.EnumSet;
++
++import org.junit.Assert;
++import org.junit.Before;
++import org.junit.Test;
++
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.AddressGenerationPlan;
++import org.graalvm.compiler.test.GraalTest;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.sw64.SW64.CPUFeature;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.TargetDescription;
++
++public class SW64MacroAssemblerTest extends GraalTest {
++
++    private SW64MacroAssembler masm;
++    private TestProtectedAssembler asm;
++    private Register base;
++    private Register index;
++    private Register scratch;
++
++    private static EnumSet<SW64.CPUFeature> computeFeatures() {
++        EnumSet<SW64.CPUFeature> features = EnumSet.noneOf(SW64.CPUFeature.class);
++        features.add(CPUFeature.FP);
++        return features;
++    }
++
++    private static EnumSet<SW64.Flag> computeFlags() {
++        EnumSet<SW64.Flag> flags = EnumSet.noneOf(SW64.Flag.class);
++        return flags;
++    }
++
++    private static TargetDescription createTarget() {
++        final int stackFrameAlignment = 16;
++        final int implicitNullCheckLimit = 4096;
++        final boolean inlineObjects = true;
++        Architecture arch = new SW64(computeFeatures(), computeFlags());
++        return new TargetDescription(arch, true, stackFrameAlignment, implicitNullCheckLimit, inlineObjects);
++    }
++
++    @Before
++    public void setupEnvironment() {
++        TargetDescription target = createTarget();
++        masm = new SW64MacroAssembler(target);
++        asm = new TestProtectedAssembler(target);
++        base = SW64.r10;
++        index = SW64.r13;
++        scratch = SW64.r15;
++    }
++
++    @Test
++    public void testGenerateAddressPlan() {
++        AddressGenerationPlan plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(8), false, 0);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.NO_WORK && !plan.needsScratch &&
++                        (plan.addressingMode == SW64Address.AddressingMode.IMMEDIATE_SCALED || plan.addressingMode == SW64Address.AddressingMode.IMMEDIATE_UNSCALED));
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(8), false, 1);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.NO_WORK && !plan.needsScratch &&
++                        (plan.addressingMode == SW64Address.AddressingMode.IMMEDIATE_SCALED || plan.addressingMode == SW64Address.AddressingMode.IMMEDIATE_UNSCALED));
++
++        plan = SW64MacroAssembler.generateAddressPlan(-NumUtil.getNbitNumberInt(8) - 1, false, 0);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.NO_WORK && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.IMMEDIATE_UNSCALED);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(12), false, 1);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.NO_WORK && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.IMMEDIATE_SCALED);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(12) << 2, false, 4);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.NO_WORK && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.IMMEDIATE_SCALED);
++
++        plan = SW64MacroAssembler.generateAddressPlan(0, false, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.NO_WORK && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(0, false, 0);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.NO_WORK && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(9), false, 0);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_BASE && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(12), false, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_BASE && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(13), false, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_BASE && plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(-NumUtil.getNbitNumberInt(12), false, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_BASE && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(-(NumUtil.getNbitNumberInt(12) << 12), false, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_BASE && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(12), true, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_BASE && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(12) << 3, true, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_INDEX && !plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++
++        plan = SW64MacroAssembler.generateAddressPlan(NumUtil.getNbitNumberInt(13) << 3, true, 8);
++        Assert.assertTrue(plan.workPlan == AddressGenerationPlan.WorkPlan.ADD_TO_INDEX && plan.needsScratch && plan.addressingMode == SW64Address.AddressingMode.REGISTER_OFFSET);
++    }
++
++    @Test
++    public void testMakeAddressNoAction() {
++        SW64Address address = masm.makeAddress(base, NumUtil.getNbitNumberInt(12) << 3, SW64.zr, false, 8, null, false);
++        Assert.assertTrue(address.isScaled() && address.getAddressingMode() == SW64Address.AddressingMode.IMMEDIATE_SCALED && address.getBase().equals(base) &&
++                        address.getOffset().equals(SW64.zr) && address.getImmediateRaw() == NumUtil.getNbitNumberInt(12));
++        // No code generated.
++        compareAssembly();
++    }
++
++    @Test
++    public void testMakeAddressAddIndex() {
++        SW64Address address = masm.makeAddress(base, NumUtil.getNbitNumberInt(8) << 5, index, false, 8, null, true);
++        Assert.assertTrue(address.isScaled() && address.getAddressingMode() == SW64Address.AddressingMode.REGISTER_OFFSET && address.getBase().equals(base) && address.getOffset().equals(index));
++        asm.add(64, index, index, NumUtil.getNbitNumberInt(8) << 2);
++        compareAssembly();
++    }
++
++    @Test
++    public void testMakeAddressAddIndexNoOverwrite() {
++        SW64Address address = masm.makeAddress(base, NumUtil.getNbitNumberInt(8) << 5, index, false, 8, scratch, false);
++        Assert.assertTrue(address.isScaled() && address.getAddressingMode() == SW64Address.AddressingMode.REGISTER_OFFSET && address.getBase().equals(base) && address.getOffset().equals(scratch));
++        asm.add(64, scratch, index, NumUtil.getNbitNumberInt(8) << 2);
++        compareAssembly();
++    }
++
++    @Test
++    public void testMakeAddressAddBaseNoOverwrite() {
++        SW64Address address = masm.makeAddress(base, NumUtil.getNbitNumberInt(12), index, false, 8, scratch, false);
++        Assert.assertTrue(address.isScaled() && address.getAddressingMode() == SW64Address.AddressingMode.REGISTER_OFFSET && address.getBase().equals(scratch) && address.getOffset().equals(index));
++        asm.add(64, scratch, base, NumUtil.getNbitNumberInt(12));
++        compareAssembly();
++    }
++
++    @Test
++    public void testMakeAddressAddBase() {
++        SW64Address address = masm.makeAddress(base, NumUtil.getNbitNumberInt(12), index, false, 8, null, true);
++        Assert.assertTrue(address.isScaled() && address.getAddressingMode() == SW64Address.AddressingMode.REGISTER_OFFSET && address.getBase().equals(base) && address.getOffset().equals(index));
++        asm.add(64, base, base, NumUtil.getNbitNumberInt(12));
++        compareAssembly();
++    }
++
++    @Test
++    public void testMakeAddressAddIndexNoOverwriteExtend() {
++        SW64Address address = masm.makeAddress(base, NumUtil.getNbitNumberInt(8) << 5, index, true, 8, scratch, false);
++        Assert.assertTrue(address.isScaled() && address.getAddressingMode() == SW64Address.AddressingMode.EXTENDED_REGISTER_OFFSET && address.getBase().equals(base) &&
++                        address.getOffset().equals(scratch) && address.getExtendType() == SW64Assembler.ExtendType.SXTW);
++        asm.add(32, scratch, index, NumUtil.getNbitNumberInt(8) << 2);
++        compareAssembly();
++    }
++
++    @Test
++    public void testMakeAddressAddIndexExtend() {
++        SW64Address address = masm.makeAddress(base, NumUtil.getNbitNumberInt(8) << 5, index, true, 8, scratch, true);
++        Assert.assertTrue(address.isScaled() && address.getAddressingMode() == SW64Address.AddressingMode.EXTENDED_REGISTER_OFFSET && address.getBase().equals(base) &&
++                        address.getOffset().equals(index) && address.getExtendType() == SW64Assembler.ExtendType.SXTW);
++        asm.add(32, index, index, NumUtil.getNbitNumberInt(8) << 2);
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressUnscaled() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createUnscaledImmediateAddress(base, NumUtil.getNbitNumberInt(8));
++        masm.loadAddress(dst, address, 8);
++        asm.add(64, dst, base, NumUtil.getNbitNumberInt(8));
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressUnscaled2() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createUnscaledImmediateAddress(base, -NumUtil.getNbitNumberInt(8));
++        masm.loadAddress(dst, address, 8);
++        asm.sub(64, dst, base, NumUtil.getNbitNumberInt(8));
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressScaled() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createScaledImmediateAddress(base, NumUtil.getNbitNumberInt(12));
++        masm.loadAddress(dst, address, 8);
++        asm.add(64, dst, base, NumUtil.getNbitNumberInt(9) << 3);
++        asm.add(64, dst, dst, NumUtil.getNbitNumberInt(3) << 12);
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressScaledLowerOnly() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createScaledImmediateAddress(base, NumUtil.getNbitNumberInt(5));
++        masm.loadAddress(dst, address, 8);
++        asm.add(64, dst, base, NumUtil.getNbitNumberInt(5) << 3);
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressScaledHigherOnly() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createScaledImmediateAddress(base, 1 << 11);
++        masm.loadAddress(dst, address, 8);
++        asm.add(64, dst, base, 1 << 11 << 3);
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressRegisterOffsetUnscaled() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createRegisterOffsetAddress(base, index, false);
++        masm.loadAddress(dst, address, 4);
++        asm.add(64, dst, base, index, SW64Assembler.ShiftType.LSL, 0);
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressRegisterOffsetScaled() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createRegisterOffsetAddress(base, index, true);
++        masm.loadAddress(dst, address, 4);
++        asm.add(64, dst, base, index, SW64Assembler.ShiftType.LSL, 2);
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressExtendedRegisterOffsetUnscaled() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createExtendedRegisterOffsetAddress(base, index, false, SW64Assembler.ExtendType.SXTW);
++        masm.loadAddress(dst, address, 4);
++        asm.add(64, dst, base, index, SW64Assembler.ExtendType.SXTW, 0);
++        compareAssembly();
++    }
++
++    @Test
++    public void testLoadAddressExtendedRegisterOffsetScaled() {
++        Register dst = SW64.r26;
++        SW64Address address = SW64Address.createExtendedRegisterOffsetAddress(base, index, true, SW64Assembler.ExtendType.SXTW);
++        masm.loadAddress(dst, address, 4);
++        asm.add(64, dst, base, index, SW64Assembler.ExtendType.SXTW, 2);
++        compareAssembly();
++    }
++
++    /**
++     * Compares assembly generated by the macro assembler to the hand-generated assembly.
++     */
++    private void compareAssembly() {
++        byte[] expected = asm.close(true);
++        byte[] actual = masm.close(true);
++        assertArrayEquals(expected, actual);
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64.test/src/org/graalvm/compiler/asm/sw64/test/TestProtectedAssembler.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64.test/src/org/graalvm/compiler/asm/sw64/test/TestProtectedAssembler.java
+new file mode 100644
+index 0000000000..38f5956cae
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64.test/src/org/graalvm/compiler/asm/sw64/test/TestProtectedAssembler.java
+@@ -0,0 +1,552 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.asm.sw64.test;
++
++import org.graalvm.compiler.asm.AbstractAddress;
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.TargetDescription;
++
++/**
++ * Cheat so that we can test protected functions of assembler.
++ */
++class TestProtectedAssembler extends SW64Assembler {
++
++    TestProtectedAssembler(TargetDescription target) {
++        super(target);
++    }
++
++    @Override
++    protected void cbnz(int size, Register reg, int imm21, int pos) {
++        super.cbnz(size, reg, imm21, pos);
++    }
++
++    @Override
++    protected void cbz(int size, Register reg, int imm21, int pos) {
++        super.cbz(size, reg, imm21, pos);
++    }
++
++    @Override
++    public void ands(int size, Register dst, Register src, long bimm) {
++        super.ands(size, dst, src, bimm);
++    }
++
++    @Override
++    protected void b(ConditionFlag condition, int imm21) {
++        super.b(condition, imm21);
++    }
++
++    @Override
++    protected void b(ConditionFlag condition, int imm21, int pos) {
++        super.b(condition, imm21, pos);
++    }
++
++    @Override
++    protected void cbnz(int size, Register reg, int imm21) {
++        super.cbnz(size, reg, imm21);
++    }
++
++    @Override
++    protected void cbz(int size, Register reg, int imm21) {
++        super.cbz(size, reg, imm21);
++    }
++
++    @Override
++    protected void b(int imm28) {
++        super.b(imm28);
++    }
++
++    @Override
++    protected void b(int imm28, int pos) {
++        super.b(imm28, pos);
++    }
++
++    @Override
++    public void bl(int imm28) {
++        super.bl(imm28);
++    }
++
++    @Override
++    public void blr(Register reg) {
++        super.blr(reg);
++    }
++
++    @Override
++    protected void br(Register reg) {
++        super.br(reg);
++    }
++
++    @Override
++    public void ret(Register reg) {
++        super.ret(reg);
++    }
++
++    @Override
++    public void ldr(int srcSize, Register rt, SW64Address address) {
++        super.ldr(srcSize, rt, address);
++    }
++
++    @Override
++    public void ldrs(int targetSize, int srcSize, Register rt, SW64Address address) {
++        super.ldrs(targetSize, srcSize, rt, address);
++    }
++
++    @Override
++    public void str(int destSize, Register rt, SW64Address address) {
++        super.str(destSize, rt, address);
++    }
++
++    @Override
++    protected void ldxr(int size, Register rt, Register rn) {
++        super.ldxr(size, rt, rn);
++    }
++
++    @Override
++    protected void stxr(int size, Register rs, Register rt, Register rn) {
++        super.stxr(size, rs, rt, rn);
++    }
++
++    @Override
++    protected void ldar(int size, Register rt, Register rn) {
++        super.ldar(size, rt, rn);
++    }
++
++    @Override
++    protected void stlr(int size, Register rt, Register rn) {
++        super.stlr(size, rt, rn);
++    }
++
++    @Override
++    public void ldaxr(int size, Register rt, Register rn) {
++        super.ldaxr(size, rt, rn);
++    }
++
++    @Override
++    public void stlxr(int size, Register rs, Register rt, Register rn) {
++        super.stlxr(size, rs, rt, rn);
++    }
++
++    @Override
++    public void adr(Register dst, int imm21) {
++        super.adr(dst, imm21);
++    }
++
++    @Override
++    protected void add(int size, Register dst, Register src, int aimm) {
++        super.add(size, dst, src, aimm);
++    }
++
++    @Override
++    protected void adds(int size, Register dst, Register src, int aimm) {
++        super.adds(size, dst, src, aimm);
++    }
++
++    @Override
++    protected void sub(int size, Register dst, Register src, int aimm) {
++        super.sub(size, dst, src, aimm);
++    }
++
++    @Override
++    protected void subs(int size, Register dst, Register src, int aimm) {
++        super.subs(size, dst, src, aimm);
++    }
++
++    @Override
++    public void and(int size, Register dst, Register src, long bimm) {
++        super.and(size, dst, src, bimm);
++    }
++
++    @Override
++    public void eor(int size, Register dst, Register src, long bimm) {
++        super.eor(size, dst, src, bimm);
++    }
++
++    @Override
++    protected void orr(int size, Register dst, Register src, long bimm) {
++        super.orr(size, dst, src, bimm);
++    }
++
++    @Override
++    protected void movz(int size, Register dst, int uimm16, int shiftAmt) {
++        super.movz(size, dst, uimm16, shiftAmt);
++    }
++
++    @Override
++    protected void movn(int size, Register dst, int uimm16, int shiftAmt) {
++        super.movn(size, dst, uimm16, shiftAmt);
++    }
++
++    @Override
++    protected void movk(int size, Register dst, int uimm16, int pos) {
++        super.movk(size, dst, uimm16, pos);
++    }
++
++    @Override
++    public void bfm(int size, Register dst, Register src, int r, int s) {
++        super.bfm(size, dst, src, r, s);
++    }
++
++    @Override
++    public void ubfm(int size, Register dst, Register src, int r, int s) {
++        super.ubfm(size, dst, src, r, s);
++    }
++
++    @Override
++    protected void sbfm(int size, Register dst, Register src, int r, int s) {
++        super.sbfm(size, dst, src, r, s);
++    }
++
++    @Override
++    protected void extr(int size, Register dst, Register src1, Register src2, int lsb) {
++        super.extr(size, dst, src1, src2, lsb);
++    }
++
++    @Override
++    public void adds(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        super.adds(size, dst, src1, src2, shiftType, imm);
++    }
++
++    @Override
++    public void subs(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        super.subs(size, dst, src1, src2, shiftType, imm);
++    }
++
++    @Override
++    protected void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        super.add(size, dst, src1, src2, shiftType, imm);
++    }
++
++    @Override
++    protected void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        super.sub(size, dst, src1, src2, shiftType, imm);
++    }
++
++    @Override
++    public void add(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        super.add(size, dst, src1, src2, extendType, shiftAmt);
++    }
++
++    @Override
++    protected void adds(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        super.adds(size, dst, src1, src2, extendType, shiftAmt);
++    }
++
++    @Override
++    protected void sub(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        super.sub(size, dst, src1, src2, extendType, shiftAmt);
++    }
++
++    @Override
++    public void subs(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        super.subs(size, dst, src1, src2, extendType, shiftAmt);
++    }
++
++    @Override
++    protected void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.and(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void ands(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.ands(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.bic(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void bics(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.bics(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.eon(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.eor(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void orr(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.orr(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        super.orn(size, dst, src1, src2, shiftType, shiftAmt);
++    }
++
++    @Override
++    protected void asr(int size, Register dst, Register src1, Register src2) {
++        super.asr(size, dst, src1, src2);
++    }
++
++    @Override
++    protected void lsl(int size, Register dst, Register src1, Register src2) {
++        super.lsl(size, dst, src1, src2);
++    }
++
++    @Override
++    protected void lsr(int size, Register dst, Register src1, Register src2) {
++        super.lsr(size, dst, src1, src2);
++    }
++
++    @Override
++    protected void ror(int size, Register dst, Register src1, Register src2) {
++        super.ror(size, dst, src1, src2);
++    }
++
++    @Override
++    protected void cls(int size, Register dst, Register src) {
++        super.cls(size, dst, src);
++    }
++
++    @Override
++    public void clz(int size, Register dst, Register src) {
++        super.clz(size, dst, src);
++    }
++
++    @Override
++    public void rbit(int size, Register dst, Register src) {
++        super.rbit(size, dst, src);
++    }
++
++    @Override
++    public void rev(int size, Register dst, Register src) {
++        super.rev(size, dst, src);
++    }
++
++    @Override
++    protected void csel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        super.csel(size, dst, src1, src2, condition);
++    }
++
++    @Override
++    protected void csneg(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        super.csneg(size, dst, src1, src2, condition);
++    }
++
++    @Override
++    protected void csinc(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        super.csinc(size, dst, src1, src2, condition);
++    }
++
++    @Override
++    protected void madd(int size, Register dst, Register src1, Register src2, Register src3) {
++        super.madd(size, dst, src1, src2, src3);
++    }
++
++    @Override
++    protected void msub(int size, Register dst, Register src1, Register src2, Register src3) {
++        super.msub(size, dst, src1, src2, src3);
++    }
++
++    @Override
++    public void sdiv(int size, Register dst, Register src1, Register src2) {
++        super.sdiv(size, dst, src1, src2);
++    }
++
++    @Override
++    public void udiv(int size, Register dst, Register src1, Register src2) {
++        super.udiv(size, dst, src1, src2);
++    }
++
++    @Override
++    public void fldr(int size, Register rt, SW64Address address) {
++        super.fldr(size, rt, address);
++    }
++
++    @Override
++    public void fstr(int size, Register rt, SW64Address address) {
++        super.fstr(size, rt, address);
++    }
++
++    @Override
++    protected void fmov(int size, Register dst, Register src) {
++        super.fmov(size, dst, src);
++    }
++
++    @Override
++    protected void fmovFpu2Cpu(int size, Register dst, Register src) {
++        super.fmovFpu2Cpu(size, dst, src);
++    }
++
++    @Override
++    protected void fmovCpu2Fpu(int size, Register dst, Register src) {
++        super.fmovCpu2Fpu(size, dst, src);
++    }
++
++    @Override
++    protected void fmov(int size, Register dst, double imm) {
++        super.fmov(size, dst, imm);
++    }
++
++    @Override
++    public void fcvt(int srcSize, Register dst, Register src) {
++        super.fcvt(srcSize, dst, src);
++    }
++
++    @Override
++    public void fcvtzs(int targetSize, int srcSize, Register dst, Register src) {
++        super.fcvtzs(targetSize, srcSize, dst, src);
++    }
++
++    @Override
++    public void scvtf(int targetSize, int srcSize, Register dst, Register src) {
++        super.scvtf(targetSize, srcSize, dst, src);
++    }
++
++    @Override
++    protected void frintz(int size, Register dst, Register src) {
++        super.frintz(size, dst, src);
++    }
++
++    @Override
++    public void fabs(int size, Register dst, Register src) {
++        super.fabs(size, dst, src);
++    }
++
++    @Override
++    public void fneg(int size, Register dst, Register src) {
++        super.fneg(size, dst, src);
++    }
++
++    @Override
++    public void fsqrt(int size, Register dst, Register src) {
++        super.fsqrt(size, dst, src);
++    }
++
++    @Override
++    public void fadd(int size, Register dst, Register src1, Register src2) {
++        super.fadd(size, dst, src1, src2);
++    }
++
++    @Override
++    public void fsub(int size, Register dst, Register src1, Register src2) {
++        super.fsub(size, dst, src1, src2);
++    }
++
++    @Override
++    public void fmul(int size, Register dst, Register src1, Register src2) {
++        super.fmul(size, dst, src1, src2);
++    }
++
++    @Override
++    public void fdiv(int size, Register dst, Register src1, Register src2) {
++        super.fdiv(size, dst, src1, src2);
++    }
++
++    @Override
++    protected void fmadd(int size, Register dst, Register src1, Register src2, Register src3) {
++        super.fmadd(size, dst, src1, src2, src3);
++    }
++
++    @Override
++    protected void fmsub(int size, Register dst, Register src1, Register src2, Register src3) {
++        super.fmsub(size, dst, src1, src2, src3);
++    }
++
++    @Override
++    public void fcmp(int size, Register src1, Register src2) {
++        super.fcmp(size, src1, src2);
++    }
++
++    @Override
++    public void fccmp(int size, Register src1, Register src2, int uimm4, ConditionFlag condition) {
++        super.fccmp(size, src1, src2, uimm4, condition);
++    }
++
++    @Override
++    public void fcmpZero(int size, Register src) {
++        super.fcmpZero(size, src);
++    }
++
++    @Override
++    protected void fcsel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        super.fcsel(size, dst, src1, src2, condition);
++    }
++
++    @Override
++    protected void hlt(int uimm16) {
++        super.hlt(uimm16);
++    }
++
++    @Override
++    protected void brk(int uimm16) {
++        super.brk(uimm16);
++    }
++
++    @Override
++    protected void hint(SystemHint hint) {
++        super.hint(hint);
++    }
++
++    @Override
++    protected void clrex() {
++        super.clrex();
++    }
++
++    @Override
++    public void dmb(BarrierKind barrierKind) {
++        super.dmb(barrierKind);
++    }
++
++    @Override
++    public void align(int modulus) {
++    }
++
++    @Override
++    public void jmp(Label l) {
++    }
++
++    @Override
++    protected void patchJumpTarget(int branch, int jumpTarget) {
++
++    }
++
++    @Override
++    public AbstractAddress makeAddress(Register base, int displacement) {
++        throw new UnsupportedOperationException();
++    }
++
++    @Override
++    public AbstractAddress getPlaceholder(int instructionStartPosition) {
++        throw new UnsupportedOperationException();
++    }
++
++    @Override
++    public void ensureUniquePC() {
++        throw new UnsupportedOperationException();
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64Address.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64Address.java
+new file mode 100644
+index 0000000000..a1b49b9dea
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64Address.java
+@@ -0,0 +1,389 @@
++/*
++ * Copyright (c) 2013, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.asm.sw64;
++
++import static jdk.vm.ci.sw64.SW64.zr;
++
++import org.graalvm.compiler.asm.AbstractAddress;
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.debug.GraalError;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.code.Register;
++
++/**
++ * Represents an address in target machine memory, specified using one of the different addressing
++ * modes of the SW64 ISA. - Base register only - Base register + immediate or register with
++ * shifted offset - Pre-indexed: base + immediate offset are written back to base register, value
++ * used in instruction is base + offset - Post-indexed: base + offset (immediate or register) are
++ * written back to base register, value used in instruction is base only - Literal: PC + 19-bit
++ * signed word aligned offset
++ * <p>
++ * Not all addressing modes are supported for all instructions.
++ */
++public final class SW64Address extends AbstractAddress {
++    // Placeholder for addresses that get patched later.
++    public static final SW64Address PLACEHOLDER = createPcLiteralAddress(0);
++
++    public enum AddressingMode {
++        /**
++         * base + uimm12 << log2(memory_transfer_size).
++         */
++        IMMEDIATE_SCALED,
++        /**
++         * base + imm9.
++         */
++        IMMEDIATE_UNSCALED,
++        /**
++         * base.
++         */
++        BASE_REGISTER_ONLY,
++        /**
++         * base + offset [<< log2(memory_transfer_size)].
++         */
++        REGISTER_OFFSET,
++        /**
++         * base + extend(offset) [<< log2(memory_transfer_size)].
++         */
++        EXTENDED_REGISTER_OFFSET,
++        /**
++         * PC + imm21 (word aligned).
++         */
++        PC_LITERAL,
++        /**
++         * address = base. base is updated to base + imm9
++         */
++        IMMEDIATE_POST_INDEXED,
++        /**
++         * address = base + imm9. base is updated to base + imm9
++         */
++        IMMEDIATE_PRE_INDEXED,
++        AddressingMode,
++    }
++
++    private final Register base;
++    private final Register offset;
++    private final int immediate;
++    /**
++     * Should register offset be scaled or not.
++     */
++    private final boolean scaled;
++    private final SW64Assembler.ExtendType extendType;
++    private final AddressingMode addressingMode;
++
++    /**
++     * General address generation mechanism. Accepted values for all parameters depend on the
++     * addressingMode. Null is never accepted for a register, if an addressMode doesn't use a
++     * register the register has to be the zero-register. extendType has to be null for every
++     * addressingMode except EXTENDED_REGISTER_OFFSET.
++     */
++    public static SW64Address createAddress(AddressingMode addressingMode, Register base, Register offset, int immediate, boolean isScaled, SW64Assembler.ExtendType extendType) {
++        return new SW64Address(base, offset, immediate, isScaled, extendType, addressingMode);
++    }
++
++    /**
++     * @param base may not be null or the zero-register.
++     * @param imm9 Signed 9-bit immediate value.
++     * @return an address specifying a post-indexed immediate address pointing to base. After
++     *         ldr/str instruction, base is updated to point to base + imm9
++     */
++    public static SW64Address createPostIndexedImmediateAddress(Register base, int imm9) {
++        return new SW64Address(base, zr, imm9, false, null, AddressingMode.IMMEDIATE_POST_INDEXED);
++    }
++
++    /**
++     * @param base may not be null or the zero-register.
++     * @param imm9 Signed 9-bit immediate value.
++     * @return an address specifying a pre-indexed immediate address pointing to base + imm9. After
++     *         ldr/str instruction, base is updated to point to base + imm9
++     */
++    public static SW64Address createPreIndexedImmediateAddress(Register base, int imm9) {
++        return new SW64Address(base, zr, imm9, false, null, AddressingMode.IMMEDIATE_PRE_INDEXED);
++    }
++
++    /**
++     * @param base may not be null or the zero-register.
++     * @param imm12 Unsigned 12-bit immediate value. This is scaled by the word access size. This
++     *            means if this address is used to load/store a word, the immediate is shifted by 2
++     *            (log2Ceil(4)).
++     * @return an address specifying a signed address of the form base + imm12 <<
++     *         log2(memory_transfer_size).
++     */
++    public static SW64Address createScaledImmediateAddress(Register base, int imm12) {
++        return new SW64Address(base, zr, imm12, true, null, AddressingMode.IMMEDIATE_SCALED);
++    }
++
++    /**
++     * @param base may not be null or the zero-register.
++     * @param imm9 Signed 9-bit immediate value.
++     * @return an address specifying an unscaled immediate address of the form base + imm9
++     */
++    public static SW64Address createUnscaledImmediateAddress(Register base, int imm9) {
++        return new SW64Address(base, zr, imm9, false, null, AddressingMode.IMMEDIATE_UNSCALED);
++    }
++
++    /**
++     * @param base May not be null or the zero register.
++     * @return an address specifying the address pointed to by base.
++     */
++    public static SW64Address createBaseRegisterOnlyAddress(Register base) {
++        return createRegisterOffsetAddress(base, zr, false);
++    }
++
++    /**
++     * @param base may not be null or the zero-register.
++     * @param offset Register specifying some offset, optionally scaled by the memory_transfer_size.
++     *            May not be null or the stackpointer.
++     * @param scaled Specifies whether offset should be scaled by memory_transfer_size or not.
++     * @return an address specifying a register offset address of the form base + offset [<< log2
++     *         (memory_transfer_size)]
++     */
++    public static SW64Address createRegisterOffsetAddress(Register base, Register offset, boolean scaled) {
++        return new SW64Address(base, offset, 0, scaled, null, AddressingMode.REGISTER_OFFSET);
++    }
++
++    /**
++     * @param base may not be null or the zero-register.
++     * @param imm7 Signed 7-bit immediate value.
++     * @return an address specifying an unscaled immediate address of the form base + imm7
++     */
++    public static SW64Address createPairUnscaledImmediateAddress(Register base, int imm7) {
++        return new SW64Address(base, zr, imm7, false, null, AddressingMode.IMMEDIATE_UNSCALED);
++    }
++
++    /**
++     * @param base may not be null or the zero-register.
++     * @param offset Word register specifying some offset, optionally scaled by the
++     *            memory_transfer_size. May not be null or the stackpointer.
++     * @param scaled Specifies whether offset should be scaled by memory_transfer_size or not.
++     * @param extendType Describes whether register is zero- or sign-extended. May not be null.
++     * @return an address specifying an extended register offset of the form base +
++     *         extendType(offset) [<< log2(memory_transfer_size)]
++     */
++    public static SW64Address createExtendedRegisterOffsetAddress(Register base, Register offset, boolean scaled, SW64Assembler.ExtendType extendType) {
++        return new SW64Address(base, offset, 0, scaled, extendType, AddressingMode.EXTENDED_REGISTER_OFFSET);
++    }
++
++    /**
++     * @param imm21 Signed 21-bit offset, word aligned.
++     * @return SW64Address specifying a PC-literal address of the form PC + offset
++     */
++    public static SW64Address createPcLiteralAddress(int imm21) {
++        return new SW64Address(zr, zr, imm21, false, null, AddressingMode.PC_LITERAL);
++    }
++
++    private SW64Address(Register base, Register offset, int immediate, boolean scaled, SW64Assembler.ExtendType extendType, AddressingMode addressingMode) {
++        this.base = base;
++        this.offset = offset;
++        if ((addressingMode == AddressingMode.REGISTER_OFFSET || addressingMode == AddressingMode.EXTENDED_REGISTER_OFFSET) && offset.equals(zr)) {
++            this.addressingMode = AddressingMode.BASE_REGISTER_ONLY;
++        } else {
++            this.addressingMode = addressingMode;
++        }
++        this.immediate = immediate;
++        this.scaled = scaled;
++        this.extendType = extendType;
++        assert verify();
++    }
++
++    private boolean verify() {
++        assert addressingMode != null;
++        assert base.getRegisterCategory().equals(SW64.CPU);
++        assert offset.getRegisterCategory().equals(SW64.CPU);
++
++        switch (addressingMode) {
++            case IMMEDIATE_SCALED:
++                assert !base.equals(zr);
++                assert offset.equals(zr);
++                assert extendType == null;
++                assert NumUtil.isUnsignedNbit(12, immediate);
++                break;
++            case IMMEDIATE_UNSCALED:
++                assert !base.equals(zr);
++                assert offset.equals(zr);
++                assert extendType == null;
++                assert NumUtil.isSignedNbit(9, immediate);
++                break;
++            case BASE_REGISTER_ONLY:
++                assert !base.equals(zr);
++                assert offset.equals(zr);
++                assert extendType == null;
++                assert immediate == 0;
++                break;
++            case REGISTER_OFFSET:
++                assert !base.equals(zr);
++                assert offset.getRegisterCategory().equals(SW64.CPU);
++                assert extendType == null;
++                assert immediate == 0;
++                break;
++            case EXTENDED_REGISTER_OFFSET:
++                assert !base.equals(zr);
++                assert offset.getRegisterCategory().equals(SW64.CPU);
++                assert (extendType == SW64Assembler.ExtendType.SXTW || extendType == SW64Assembler.ExtendType.UXTW);
++                assert immediate == 0;
++                break;
++            case PC_LITERAL:
++                assert base.equals(zr);
++                assert offset.equals(zr);
++                assert extendType == null;
++                assert NumUtil.isSignedNbit(21, immediate);
++                assert ((immediate & 0x3) == 0);
++                break;
++            case IMMEDIATE_POST_INDEXED:
++            case IMMEDIATE_PRE_INDEXED:
++                assert !base.equals(zr);
++                assert offset.equals(zr);
++                assert extendType == null;
++                assert NumUtil.isSignedNbit(9, immediate);
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++
++        return true;
++    }
++
++    public Register getBase() {
++        return base;
++    }
++
++    public Register getOffset() {
++        return offset;
++    }
++
++    /**
++     * @return immediate in correct representation for the given addressing mode. For example in
++     *         case of <code>addressingMode ==IMMEDIATE_UNSCALED </code> the value will be returned
++     *         as the 9-bit signed representation.
++     */
++    public int getImmediate() {
++        switch (addressingMode) {
++            case IMMEDIATE_UNSCALED:
++            case IMMEDIATE_POST_INDEXED:
++            case IMMEDIATE_PRE_INDEXED:
++                // 9-bit signed value
++                assert NumUtil.isSignedNbit(9, immediate);
++                return immediate & NumUtil.getNbitNumberInt(9);
++            case IMMEDIATE_SCALED:
++                // Unsigned value can be returned as-is.
++                assert NumUtil.isUnsignedNbit(12, immediate);
++                return immediate;
++            case PC_LITERAL:
++                // 21-bit signed value, but lower 2 bits are always 0 and are shifted out.
++                assert NumUtil.isSignedNbit(19, immediate >> 2);
++                return (immediate >> 2) & NumUtil.getNbitNumberInt(19);
++            default:
++                throw GraalError.shouldNotReachHere("Should only be called for addressing modes that use immediate values.");
++        }
++    }
++
++    /**
++     * @return Raw immediate as a 32-bit signed value.
++     */
++    public int getImmediateRaw() {
++        switch (addressingMode) {
++            case IMMEDIATE_UNSCALED:
++            case IMMEDIATE_SCALED:
++            case IMMEDIATE_POST_INDEXED:
++            case IMMEDIATE_PRE_INDEXED:
++            case PC_LITERAL:
++                return immediate;
++            default:
++                throw GraalError.shouldNotReachHere("Should only be called for addressing modes that use immediate values.");
++        }
++    }
++
++    public boolean isScaled() {
++        return scaled;
++    }
++
++    public SW64Assembler.ExtendType getExtendType() {
++        return extendType;
++    }
++
++    public AddressingMode getAddressingMode() {
++        return addressingMode;
++    }
++
++    public String toString(int log2TransferSize) {
++        int shiftVal = scaled ? log2TransferSize : 0;
++        switch (addressingMode) {
++            case IMMEDIATE_SCALED:
++                return String.format("[X%d, %d]", base.encoding, immediate << log2TransferSize);
++            case IMMEDIATE_UNSCALED:
++                return String.format("[X%d, %d]", base.encoding, immediate);
++            case BASE_REGISTER_ONLY:
++                return String.format("[X%d]", base.encoding);
++            case EXTENDED_REGISTER_OFFSET:
++                if (shiftVal != 0) {
++                    return String.format("[X%d, W%d, %s %d]", base.encoding, offset.encoding, extendType.name(), shiftVal);
++                } else {
++                    return String.format("[X%d, W%d, %s]", base.encoding, offset.encoding, extendType.name());
++                }
++            case REGISTER_OFFSET:
++                if (shiftVal != 0) {
++                    return String.format("[X%d, X%d, LSL %d]", base.encoding, offset.encoding, shiftVal);
++                } else {
++                    // LSL 0 may be optional, but still encoded differently so we always leave it
++                    // off
++                    return String.format("[X%d, X%d]", base.encoding, offset.encoding);
++                }
++            case PC_LITERAL:
++                return String.format(".%s%d", immediate >= 0 ? "+" : "", immediate);
++            case IMMEDIATE_POST_INDEXED:
++                return String.format("[X%d],%d", base.encoding, immediate);
++            case IMMEDIATE_PRE_INDEXED:
++                return String.format("[X%d,%d]!", base.encoding, immediate);
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    /**
++     * Loads an address into Register r.
++     *
++     * @param masm the macro assembler.
++     * @param r general purpose register. May not be null.
++     */
++    public void lea(SW64MacroAssembler masm, Register r) {
++        switch (addressingMode) {
++            case IMMEDIATE_UNSCALED:
++                if (immediate == 0 && base.equals(r)) { // it's a nop
++                    break;
++                }
++                masm.add(64, r, base, immediate);
++                break;
++            case REGISTER_OFFSET:
++                masm.add(64, r, base, offset);
++                break;
++            case PC_LITERAL: {
++                masm.mov(r, getImmediate());
++                break;
++            }
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64Assembler.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64Assembler.java
+new file mode 100644
+index 0000000000..38f18df82f
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64Assembler.java
+@@ -0,0 +1,2835 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.asm.sw64;
++
++import static jdk.vm.ci.sw64.SW64.cpuRegisters;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ADD;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ADDS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ADR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ADRP;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.AND;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ANDS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ASRV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.BFM;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.BIC;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.BICS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.BLR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.BR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.BRK;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.CAS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.CLREX;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.CLS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.CLZ;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.CSEL;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.CSINC;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.CSNEG;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.DMB;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.EON;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.EOR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.EXTR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FABS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FADD;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FCCMP;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FCMP;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FCMPZERO;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FCSEL;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FCVTDS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FCVTSD;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FCVTZS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FDIV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FMADD;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FMOV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FMSUB;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FMUL;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FNEG;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FRINTM;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FRINTN;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FRINTP;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FRINTZ;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FSQRT;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.FSUB;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.HINT;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.HLT;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LDADD;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LDAR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LDAXR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LDP;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LDR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LDRS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LDXR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LSLV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.LSRV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.MADD;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.MOVK;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.MOVN;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.MOVZ;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.MSUB;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ORN;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.ORR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.RBIT;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.RET;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.REVW;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.REVX;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.RORV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.SBFM;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.SCVTF;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.SDIV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.STLR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.STLXR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.STP;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.STR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.STXR;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.SUB;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.SUBS;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.SWP;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.TBZ;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.TBNZ;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.UBFM;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.Instruction.UDIV;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.InstructionType.FP32;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.InstructionType.FP64;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.InstructionType.General32;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.InstructionType.General64;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.InstructionType.floatFromSize;
++import static org.graalvm.compiler.asm.sw64.SW64Assembler.InstructionType.generalFromSize;
++import static jdk.vm.ci.sw64.SW64.CPU;
++import static jdk.vm.ci.sw64.SW64.SIMD;
++import static jdk.vm.ci.sw64.SW64.r0;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.sw64.SW64.zr;
++
++import java.util.Arrays;
++
++import org.graalvm.compiler.asm.Assembler;
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode;
++import org.graalvm.compiler.debug.GraalError;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.sw64.SW64.CPUFeature;
++import jdk.vm.ci.sw64.SW64.Flag;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.TargetDescription;
++
++public abstract class SW64Assembler extends Assembler {
++
++    public static class LogicalImmediateTable {
++
++        private static final Immediate[] IMMEDIATE_TABLE = buildImmediateTable();
++
++        private static final int ImmediateOffset = 10;
++        private static final int ImmediateRotateOffset = 16;
++        private static final int ImmediateSizeOffset = 22;
++
++        /**
++         * Specifies whether immediate can be represented in all cases (YES), as a 64bit instruction
++         * (SIXTY_FOUR_BIT_ONLY) or not at all (NO).
++         */
++        enum Representable {
++            YES,
++            SIXTY_FOUR_BIT_ONLY,
++            NO
++        }
++
++        /**
++         * Tests whether an immediate can be encoded for logical instructions.
++         *
++         * @param is64bit if true immediate is considered a 64-bit pattern. If false we may use a
++         *            64-bit instruction to load the 32-bit pattern into a register.
++         * @return enum specifying whether immediate can be used for 32- and 64-bit logical
++         *         instructions ({@code #Representable.YES}), for 64-bit instructions only (
++         *         {@link Representable#SIXTY_FOUR_BIT_ONLY}) or not at all (
++         *         {@link Representable#NO}).
++         */
++        public static Representable isRepresentable(boolean is64bit, long immediate) {
++            int pos = getLogicalImmTablePos(is64bit, immediate);
++            if (pos < 0) {
++                // if 32bit instruction we can try again as 64bit immediate which may succeed.
++                // i.e. 0xffffffff fails as a 32bit immediate but works as 64bit one.
++                if (!is64bit) {
++                    assert NumUtil.isUnsignedNbit(32, immediate);
++                    pos = getLogicalImmTablePos(true, immediate);
++                    return pos >= 0 ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.NO;
++                }
++                return Representable.NO;
++            }
++            Immediate imm = IMMEDIATE_TABLE[pos];
++            return imm.only64bit() ? Representable.SIXTY_FOUR_BIT_ONLY : Representable.YES;
++        }
++
++        public static Representable isRepresentable(int immediate) {
++            return isRepresentable(false, immediate & 0xFFFF_FFFFL);
++        }
++
++        public static int getLogicalImmEncoding(boolean is64bit, long value) {
++            int pos = getLogicalImmTablePos(is64bit, value);
++            assert pos >= 0 : "Value cannot be represented as logical immediate: " + value + ", is64bit=" + is64bit;
++            Immediate imm = IMMEDIATE_TABLE[pos];
++            assert is64bit || !imm.only64bit() : "Immediate can only be represented for 64bit, but 32bit instruction specified";
++            return IMMEDIATE_TABLE[pos].encoding;
++        }
++
++        /**
++         * @param is64bit if true also allow 64-bit only encodings to be returned.
++         * @return If positive the return value is the position into the IMMEDIATE_TABLE for the
++         *         given immediate, if negative the immediate cannot be encoded.
++         */
++        private static int getLogicalImmTablePos(boolean is64bit, long value) {
++            Immediate imm;
++            if (!is64bit) {
++                // 32bit instructions can only have 32bit immediates.
++                if (!NumUtil.isUnsignedNbit(32, value)) {
++                    return -1;
++                }
++                // If we have a 32bit instruction (and therefore immediate) we have to duplicate it
++                // across 64bit to find it in the table.
++                imm = new Immediate(value << 32 | value);
++            } else {
++                imm = new Immediate(value);
++            }
++            int pos = Arrays.binarySearch(IMMEDIATE_TABLE, imm);
++            if (pos < 0) {
++                return -1;
++            }
++            if (!is64bit && IMMEDIATE_TABLE[pos].only64bit()) {
++                return -1;
++            }
++            return pos;
++        }
++
++        /**
++         * To quote 5.4.2: [..] an immediate is a 32 or 64 bit pattern viewed as a vector of
++         * identical elements of size e = 2, 4, 8, 16, 32 or (in the case of bimm64) 64 bits. Each
++         * element contains the same sub-pattern: a single run of 1 to e-1 non-zero bits, rotated by
++         * 0 to e-1 bits. It is encoded in the following: 10-16: rotation amount (6bit) starting
++         * from 1s in the LSB (i.e. 0111->1011->1101->1110) 16-22: This stores a combination of the
++         * number of set bits and the pattern size. The pattern size is encoded as follows (x is
++         * used to store the number of 1 bits - 1) e pattern 2 1111xx 4 1110xx 8 110xxx 16 10xxxx 32
++         * 0xxxxx 64 xxxxxx 22: if set we have an instruction with 64bit pattern?
++         */
++        private static final class Immediate implements Comparable<Immediate> {
++            public final long imm;
++            public final int encoding;
++
++            Immediate(long imm, boolean is64, int s, int r) {
++                this.imm = imm;
++                this.encoding = computeEncoding(is64, s, r);
++            }
++
++            // Used to be able to binary search for an immediate in the table.
++            Immediate(long imm) {
++                this(imm, false, 0, 0);
++            }
++
++            /**
++             * Returns true if this pattern is only representable as 64bit.
++             */
++            public boolean only64bit() {
++                return (encoding & (1 << ImmediateSizeOffset)) != 0;
++            }
++
++            private static int computeEncoding(boolean is64, int s, int r) {
++                int sf = is64 ? 1 : 0;
++                return sf << ImmediateSizeOffset | r << ImmediateRotateOffset | s << ImmediateOffset;
++            }
++
++            @Override
++            public int compareTo(Immediate o) {
++                return Long.compare(imm, o.imm);
++            }
++        }
++
++        private static Immediate[] buildImmediateTable() {
++            final int nrImmediates = 5334;
++            final Immediate[] table = new Immediate[nrImmediates];
++            int nrImms = 0;
++            for (int logE = 1; logE <= 6; logE++) {
++                int e = 1 << logE;
++                long mask = NumUtil.getNbitNumberLong(e);
++                for (int nrOnes = 1; nrOnes < e; nrOnes++) {
++                    long val = (1L << nrOnes) - 1;
++                    // r specifies how much we rotate the value
++                    for (int r = 0; r < e; r++) {
++                        long immediate = (val >>> r | val << (e - r)) & mask;
++                        // Duplicate pattern to fill whole 64bit range.
++                        switch (logE) {
++                            case 1:
++                                immediate |= immediate << 2;
++                                immediate |= immediate << 4;
++                                immediate |= immediate << 8;
++                                immediate |= immediate << 16;
++                                immediate |= immediate << 32;
++                                break;
++                            case 2:
++                                immediate |= immediate << 4;
++                                immediate |= immediate << 8;
++                                immediate |= immediate << 16;
++                                immediate |= immediate << 32;
++                                break;
++                            case 3:
++                                immediate |= immediate << 8;
++                                immediate |= immediate << 16;
++                                immediate |= immediate << 32;
++                                break;
++                            case 4:
++                                immediate |= immediate << 16;
++                                immediate |= immediate << 32;
++                                break;
++                            case 5:
++                                immediate |= immediate << 32;
++                                break;
++                        }
++                        // 5 - logE can underflow to -1, but we shift this bogus result
++                        // out of the masked area.
++                        int sizeEncoding = (1 << (5 - logE)) - 1;
++                        int s = ((sizeEncoding << (logE + 1)) & 0x3f) | (nrOnes - 1);
++                        table[nrImms++] = new Immediate(immediate, /* is64bit */e == 64, s, r);
++                    }
++                }
++            }
++            Arrays.sort(table);
++            assert nrImms == nrImmediates : nrImms + " instead of " + nrImmediates + " in table.";
++            assert checkDuplicates(table) : "Duplicate values in table.";
++            return table;
++        }
++
++        private static boolean checkDuplicates(Immediate[] table) {
++            for (int i = 0; i < table.length - 1; i++) {
++                if (table[i].imm >= table[i + 1].imm) {
++                    return false;
++                }
++            }
++            return true;
++        }
++    }
++
++    private static final int RdOffset = 0;
++    private static final int Rs1Offset = 5;
++    private static final int Rs2Offset = 16;
++    private static final int Rs3Offset = 10;
++    private static final int RtOffset = 0;
++    private static final int RnOffset = 5;
++    private static final int Rt2Offset = 10;
++
++    /* Helper functions */
++    private static int rd(Register reg) {
++        return reg.encoding << RdOffset;
++    }
++
++    private static int rs1(Register reg) {
++        return reg.encoding << Rs1Offset;
++    }
++
++    private static int rs2(Register reg) {
++        return reg.encoding << Rs2Offset;
++    }
++
++    private static int rs3(Register reg) {
++        return reg.encoding << Rs3Offset;
++    }
++
++    private static int rt(Register reg) {
++        return reg.encoding << RtOffset;
++    }
++
++    private static int rt2(Register reg) {
++        return reg.encoding << Rt2Offset;
++    }
++
++    private static int rn(Register reg) {
++        return reg.encoding << RnOffset;
++    }
++
++    private static int maskField(int sizeInBits, int n) {
++        assert NumUtil.isSignedNbit(sizeInBits, n);
++        return n & NumUtil.getNbitNumberInt(sizeInBits);
++    }
++
++    /**
++     * Enumeration of all different instruction kinds: General32/64 are the general instructions
++     * (integer, branch, etc.), for 32-, respectively 64-bit operands. FP32/64 is the encoding for
++     * the 32/64bit float operations
++     */
++    protected enum InstructionType {
++        General32(0b00 << 30, 32, true),
++        General64(0b10 << 30, 64, true),
++        FP32(0x00000000, 32, false),
++        FP64(0x00400000, 64, false);
++
++        public final int encoding;
++        public final int width;
++        public final boolean isGeneral;
++
++        InstructionType(int encoding, int width, boolean isGeneral) {
++            this.encoding = encoding;
++            this.width = width;
++            this.isGeneral = isGeneral;
++        }
++
++        public static InstructionType generalFromSize(int size) {
++            assert size == 32 || size == 64;
++            return size == 32 ? General32 : General64;
++        }
++
++        public static InstructionType floatFromSize(int size) {
++            assert size == 32 || size == 64;
++            return size == 32 ? FP32 : FP64;
++        }
++
++    }
++
++    private static final int ImmediateOffset = 10;
++    private static final int ImmediateRotateOffset = 16;
++    private static final int ImmediateSizeOffset = 22;
++    private static final int ExtendTypeOffset = 13;
++
++    private static final int AddSubImmOp = 0x11000000;
++    private static final int AddSubShift12 = 0b01 << 22;
++    private static final int AddSubSetFlag = 0x20000000;
++
++    private static final int LogicalImmOp = 0x12000000;
++
++    private static final int MoveWideImmOp = 0x12800000;
++    private static final int MoveWideImmOffset = 5;
++    private static final int MoveWideShiftOffset = 21;
++
++    private static final int BitfieldImmOp = 0x13000000;
++
++    private static final int AddSubShiftedOp = 0x0B000000;
++    private static final int ShiftTypeOffset = 22;
++
++    private static final int AddSubExtendedOp = 0x0B200000;
++
++    private static final int MulOp = 0x1B000000;
++    private static final int DataProcessing1SourceOp = 0x5AC00000;
++    private static final int DataProcessing2SourceOp = 0x1AC00000;
++
++    private static final int Fp1SourceOp = 0x1E204000;
++    private static final int Fp2SourceOp = 0x1E200800;
++    private static final int Fp3SourceOp = 0x1F000000;
++
++    private static final int FpConvertOp = 0x1E200000;
++    private static final int FpImmOp = 0x1E201000;
++    private static final int FpImmOffset = 13;
++
++    private static final int FpCmpOp = 0x1E202000;
++
++    private static final int PcRelImmHiOffset = 5;
++    private static final int PcRelImmLoOffset = 29;
++
++    private static final int PcRelImmOp = 0x10000000;
++
++    private static final int UnconditionalBranchImmOp = 0x14000000;
++    private static final int UnconditionalBranchRegOp = 0xD6000000;
++    private static final int CompareBranchOp = 0x34000000;
++
++    private static final int ConditionalBranchImmOffset = 5;
++
++    private static final int ConditionalSelectOp = 0x1A800000;
++    private static final int ConditionalConditionOffset = 12;
++
++    private static final int LoadStoreScaledOp = 0b111_0_01_00 << 22;
++    private static final int LoadStoreUnscaledOp = 0b111_0_00_00 << 22;
++
++    private static final int LoadStoreRegisterOp = 0b111_0_00_00_1 << 21 | 0b10 << 10;
++
++    private static final int LoadLiteralOp = 0x18000000;
++
++    private static final int LoadStorePostIndexedOp = 0b111_0_00_00_0 << 21 | 0b01 << 10;
++    private static final int LoadStorePreIndexedOp = 0b111_0_00_00_0 << 21 | 0b11 << 10;
++
++    private static final int LoadStoreUnscaledImmOffset = 12;
++    private static final int LoadStoreScaledImmOffset = 10;
++    private static final int LoadStoreScaledRegOffset = 12;
++    private static final int LoadStoreIndexedImmOffset = 12;
++    private static final int LoadStoreTransferSizeOffset = 30;
++    private static final int LoadStoreFpFlagOffset = 26;
++    private static final int LoadLiteralImmeOffset = 5;
++
++    private static final int LoadStorePairOp = 0b101_0 << 26;
++    @SuppressWarnings("unused") private static final int LoadStorePairPostIndexOp = 0b101_0_001 << 23;
++    @SuppressWarnings("unused") private static final int LoadStorePairPreIndexOp = 0b101_0_011 << 23;
++    private static final int LoadStorePairImm7Offset = 15;
++
++    private static final int LogicalShiftOp = 0x0A000000;
++
++    private static final int ExceptionOp = 0xD4000000;
++    private static final int SystemImmediateOffset = 5;
++
++    @SuppressWarnings("unused") private static final int SimdImmediateOffset = 16;
++
++    private static final int BarrierOp = 0xD503301F;
++    private static final int BarrierKindOffset = 8;
++
++    private static final int CASAcquireOffset = 22;
++    private static final int CASReleaseOffset = 15;
++
++    private static final int LDADDAcquireOffset = 23;
++    private static final int LDADDReleaseOffset = 22;
++
++    /**
++     * Encoding for all instructions.
++     */
++    public enum Instruction {
++        BCOND(0x54000000),
++        CBNZ(0x01000000),
++        CBZ(0x00000000),
++        TBZ(0x36000000),
++        TBNZ(0x37000000),
++
++        B(0x00000000),
++        BL(0x80000000),
++        BR(0x001F0000),
++        BLR(0x003F0000),
++        RET(0x005F0000),
++
++        LDR(0x00000000),
++        LDRS(0x00800000),
++        LDXR(0x081f7c00),
++        LDAR(0x8dffc00),
++        LDAXR(0x85ffc00),
++
++        STR(0x00000000),
++        STXR(0x08007c00),
++        STLR(0x089ffc00),
++        STLXR(0x0800fc00),
++
++        LDP(0b1 << 22),
++        STP(0b0 << 22),
++
++        CAS(0x08A07C00),
++        LDADD(0x38200000),
++        SWP(0x38208000),
++
++        ADR(0x00000000),
++        ADRP(0x80000000),
++
++        ADD(0x00000000),
++        ADDS(ADD.encoding | AddSubSetFlag),
++        SUB(0x40000000),
++        SUBS(SUB.encoding | AddSubSetFlag),
++
++        NOT(0x00200000),
++        AND(0x00000000),
++        BIC(AND.encoding | NOT.encoding),
++        ORR(0x20000000),
++        ORN(ORR.encoding | NOT.encoding),
++        EOR(0x40000000),
++        EON(EOR.encoding | NOT.encoding),
++        ANDS(0x60000000),
++        BICS(ANDS.encoding | NOT.encoding),
++
++        ASRV(0x00002800),
++        RORV(0x00002C00),
++        LSRV(0x00002400),
++        LSLV(0x00002000),
++
++        CLS(0x00001400),
++        CLZ(0x00001000),
++        RBIT(0x00000000),
++        REVX(0x00000C00),
++        REVW(0x00000800),
++
++        MOVN(0x00000000),
++        MOVZ(0x40000000),
++        MOVK(0x60000000),
++
++        CSEL(0x00000000),
++        CSNEG(0x40000400),
++        CSINC(0x00000400),
++
++        BFM(0x20000000),
++        SBFM(0x00000000),
++        UBFM(0x40000000),
++        EXTR(0x13800000),
++
++        MADD(0x00000000),
++        MSUB(0x00008000),
++        SDIV(0x00000C00),
++        UDIV(0x00000800),
++
++        FMOV(0x00000000),
++        FMOVCPU2FPU(0x00070000),
++        FMOVFPU2CPU(0x00060000),
++
++        FCVTDS(0x00028000),
++        FCVTSD(0x00020000),
++
++        FCVTZS(0x00180000),
++        SCVTF(0x00020000),
++
++        FABS(0x00008000),
++        FSQRT(0x00018000),
++        FNEG(0x00010000),
++
++        FRINTM(0x00050000),
++        FRINTN(0x00040000),
++        FRINTP(0x00048000),
++        FRINTZ(0x00058000),
++
++        FADD(0x00002000),
++        FSUB(0x00003000),
++        FMUL(0x00000000),
++        FDIV(0x00001000),
++        FMAX(0x00004000),
++        FMIN(0x00005000),
++
++        FMADD(0x00000000),
++        FMSUB(0x00008000),
++
++        FCMP(0x00000000),
++        FCMPZERO(0x00000008),
++        FCCMP(0x1E200400),
++        FCSEL(0x1E200C00),
++
++        INS(0x4e081c00),
++        UMOV(0x4e083c00),
++
++        CNT(0xe205800),
++        USRA(0x6f001400),
++
++        HLT(0x00400000),
++        BRK(0x00200000),
++
++        CLREX(0xd5033f5f),
++        HINT(0xD503201F),
++        DMB(0x000000A0),
++
++        BLR_NATIVE(0xc0000000);
++
++        public final int encoding;
++
++        Instruction(int encoding) {
++            this.encoding = encoding;
++        }
++
++    }
++
++    public enum ShiftType {
++        LSL(0),
++        LSR(1),
++        ASR(2),
++        ROR(3);
++
++        public final int encoding;
++
++        ShiftType(int encoding) {
++            this.encoding = encoding;
++        }
++    }
++
++    public enum ExtendType {
++        UXTB(0),
++        UXTH(1),
++        UXTW(2),
++        UXTX(3),
++        SXTB(4),
++        SXTH(5),
++        SXTW(6),
++        SXTX(7);
++
++        public final int encoding;
++
++        ExtendType(int encoding) {
++            this.encoding = encoding;
++        }
++    }
++
++    /**
++     * Condition Flags for branches. See 4.3
++     */
++    public enum ConditionFlag {
++        // Integer | Floating-point meanings
++        /** Equal | Equal. */
++        EQ(0x0),
++
++        /** Not Equal | Not equal or unordered. */
++        NE(0x1),
++
++        /** Unsigned Higher or Same | Greater than, equal or unordered. */
++        HS(0x2),
++
++        /** Unsigned lower | less than. */
++        LO(0x3),
++
++        /** Minus (negative) | less than. */
++        MI(0x4),
++
++        /** Plus (positive or zero) | greater than, equal or unordered. */
++        PL(0x5),
++
++        /** Overflow set | unordered. */
++        VS(0x6),
++
++        /** Overflow clear | ordered. */
++        VC(0x7),
++
++        /** Unsigned higher | greater than or unordered. */
++        HI(0x8),
++
++        /** Unsigned lower or same | less than or equal. */
++        LS(0x9),
++
++        /** Signed greater than or equal | greater than or equal. */
++        GE(0xA),
++
++        /** Signed less than | less than or unordered. */
++        LT(0xB),
++
++        /** Signed greater than | greater than. */
++        GT(0xC),
++
++        /** Signed less than or equal | less than, equal or unordered. */
++        LE(0xD),
++
++        /** Always | always. */
++        AL(0xE),
++
++        /** Always | always (identical to AL, just to have valid 0b1111 encoding). */
++        NV(0xF);
++
++        public final int encoding;
++
++        ConditionFlag(int encoding) {
++            this.encoding = encoding;
++        }
++
++        /**
++         * @return ConditionFlag specified by decoding.
++         */
++        public static ConditionFlag fromEncoding(int encoding) {
++            return values()[encoding];
++        }
++
++        public ConditionFlag negate() {
++            switch (this) {
++                case EQ:
++                    return NE;
++                case NE:
++                    return EQ;
++                case HS:
++                    return LO;
++                case LO:
++                    return HS;
++                case MI:
++                    return PL;
++                case PL:
++                    return MI;
++                case VS:
++                    return VC;
++                case VC:
++                    return VS;
++                case HI:
++                    return LS;
++                case LS:
++                    return HI;
++                case GE:
++                    return LT;
++                case LT:
++                    return GE;
++                case GT:
++                    return LE;
++                case LE:
++                    return GT;
++                case AL:
++                case NV:
++                default:
++                    throw GraalError.shouldNotReachHere();
++            }
++        }
++    }
++
++    public SW64Assembler(TargetDescription target) {
++        super(target);
++    }
++
++    public boolean supports(CPUFeature feature) {
++        return ((SW64) target.arch).getFeatures().contains(feature);
++    }
++
++    public boolean isFlagSet(Flag flag) {
++        return ((SW64) target.arch).getFlags().contains(flag);
++    }
++
++    /* Conditional Branch (5.2.1) */
++
++    /**
++     * Branch conditionally.
++     *
++     * @param condition may not be null.
++     * @param imm21 Signed 21-bit offset, has to be word aligned.
++     */
++    protected void b(ConditionFlag condition, int imm21) {
++        b(condition, imm21, -1);
++    }
++
++    /**
++     * Branch conditionally. Inserts instruction into code buffer at pos.
++     *
++     * @param condition may not be null.
++     * @param imm21 Signed 21-bit offset, has to be word aligned.
++     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
++     */
++    protected void b(ConditionFlag condition, int imm21, int pos) {
++        if (pos == -1) {
++            emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding);
++        } else {
++            emitInt(Instruction.BCOND.encoding | getConditionalBranchImm(imm21) | condition.encoding, pos);
++        }
++    }
++
++    /**
++     * Compare register and branch if non-zero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param size Instruction size in bits. Should be either 32 or 64.
++     * @param imm21 Signed 21-bit offset, has to be word aligned.
++     */
++    protected void cbnz(int size, Register reg, int imm21) {
++        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, -1);
++    }
++
++    /**
++     * Compare register and branch if non-zero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param size Instruction size in bits. Should be either 32 or 64.
++     * @param imm21 Signed 21-bit offset, has to be word aligned.
++     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
++     */
++    protected void cbnz(int size, Register reg, int imm21, int pos) {
++        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBNZ, pos);
++    }
++
++    /**
++     * Compare and branch if zero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param size Instruction size in bits. Should be either 32 or 64.
++     * @param imm21 Signed 21-bit offset, has to be word aligned.
++     */
++    protected void cbz(int size, Register reg, int imm21) {
++        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, -1);
++    }
++
++    /**
++     * Compare register and branch if zero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param size Instruction size in bits. Should be either 32 or 64.
++     * @param imm21 Signed 21-bit offset, has to be word aligned.
++     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
++     */
++    protected void cbz(int size, Register reg, int imm21, int pos) {
++        conditionalBranchInstruction(reg, imm21, generalFromSize(size), Instruction.CBZ, pos);
++    }
++
++    /**
++     * Test a single bit and branch if the bit is nonzero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param uimm6 Unsigned 6-bit bit index.
++     * @param imm16 signed 16 bit offset
++     */
++    protected void tbnz(Register reg, int uimm6, int imm16) {
++        tbnz(reg, uimm6, imm16, -1);
++    }
++
++    /**
++     * Test a single bit and branch if the bit is zero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param uimm6 Unsigned 6-bit bit index.
++     * @param imm16 signed 16 bit offset
++     */
++    protected void tbz(Register reg, int uimm6, int imm16) {
++        tbz(reg, uimm6, imm16, -1);
++    }
++
++    /**
++     * Test a single bit and branch if the bit is nonzero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param uimm6 Unsigned 6-bit bit index.
++     * @param imm16 signed 16 bit offset
++     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
++     */
++    protected void tbnz(Register reg, int uimm6, int imm16, int pos) {
++        assert reg.getRegisterCategory().equals(CPU);
++        assert NumUtil.isUnsignedNbit(6, uimm6);
++        assert NumUtil.isSignedNbit(18, imm16);
++        assert (imm16 & 3) == 0;
++        // size bit is overloaded as top bit of uimm6 bit index
++        int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
++        // remaining 5 bits are encoded lower down
++        int uimm5 = uimm6 >> 1;
++        int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
++        InstructionType type = generalFromSize(size);
++        int encoding = type.encoding | TBNZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
++        if (pos == -1) {
++            emitInt(encoding);
++        } else {
++            emitInt(encoding, pos);
++        }
++    }
++
++    /**
++     * Test a single bit and branch if the bit is zero.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     * @param uimm6 Unsigned 6-bit bit index.
++     * @param imm16 signed 16 bit offset
++     * @param pos Position at which instruction is inserted into buffer. -1 means insert at end.
++     */
++    protected void tbz(Register reg, int uimm6, int imm16, int pos) {
++        assert reg.getRegisterCategory().equals(CPU);
++        assert NumUtil.isUnsignedNbit(6, uimm6);
++        assert NumUtil.isSignedNbit(18, imm16);
++        assert (imm16 & 3) == 0;
++        // size bit is overloaded as top bit of uimm6 bit index
++        int size = (((uimm6 >> 5) & 1) == 0 ? 32 : 64);
++        // remaining 5 bits are encoded lower down
++        int uimm5 = uimm6 >> 1;
++        int offset = (imm16 & NumUtil.getNbitNumberInt(16)) >> 2;
++        InstructionType type = generalFromSize(size);
++        int encoding = type.encoding | TBZ.encoding | (uimm5 << 19) | (offset << 5) | rd(reg);
++        if (pos == -1) {
++            emitInt(encoding);
++        } else {
++            emitInt(encoding, pos);
++        }
++    }
++
++    private void conditionalBranchInstruction(Register reg, int imm21, InstructionType type, Instruction instr, int pos) {
++        assert reg.getRegisterCategory().equals(CPU);
++        int instrEncoding = instr.encoding | CompareBranchOp;
++        if (pos == -1) {
++            emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg));
++        } else {
++            emitInt(type.encoding | instrEncoding | getConditionalBranchImm(imm21) | rd(reg), pos);
++        }
++    }
++
++    private static int getConditionalBranchImm(int imm21) {
++        assert NumUtil.isSignedNbit(21, imm21) && (imm21 & 0x3) == 0 : "Immediate has to be 21bit signed number and word aligned";
++        int imm = (imm21 & NumUtil.getNbitNumberInt(21)) >> 2;
++        return imm << ConditionalBranchImmOffset;
++    }
++
++    /* Unconditional Branch (immediate) (5.2.2) */
++
++    /**
++     * @param imm28 Signed 28-bit offset, has to be word aligned.
++     */
++    protected void b(int imm28) {
++        unconditionalBranchImmInstruction(imm28, Instruction.B, -1);
++    }
++
++    /**
++     *
++     * @param imm28 Signed 28-bit offset, has to be word aligned.
++     * @param pos Position where instruction is inserted into code buffer.
++     */
++    protected void b(int imm28, int pos) {
++        unconditionalBranchImmInstruction(imm28, Instruction.B, pos);
++    }
++
++    /**
++     * Branch and link return address to register X30.
++     *
++     * @param imm28 Signed 28-bit offset, has to be word aligned.
++     */
++    public void bl(int imm28) {
++        unconditionalBranchImmInstruction(imm28, Instruction.BL, -1);
++    }
++
++    private void unconditionalBranchImmInstruction(int imm28, Instruction instr, int pos) {
++        assert NumUtil.isSignedNbit(28, imm28) && (imm28 & 0x3) == 0 : "Immediate has to be 28bit signed number and word aligned";
++        int imm = (imm28 & NumUtil.getNbitNumberInt(28)) >> 2;
++        int instrEncoding = instr.encoding | UnconditionalBranchImmOp;
++        if (pos == -1) {
++            emitInt(instrEncoding | imm);
++        } else {
++            emitInt(instrEncoding | imm, pos);
++        }
++    }
++
++    /* Unconditional Branch (register) (5.2.3) */
++
++    /**
++     * Branches to address in register and writes return address into register X30.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     */
++    public void blr(Register reg) {
++        unconditionalBranchRegInstruction(BLR, reg);
++    }
++
++    /**
++     * Branches to address in register.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     */
++    protected void br(Register reg) {
++        unconditionalBranchRegInstruction(BR, reg);
++    }
++
++    /**
++     * Return to address in register.
++     *
++     * @param reg general purpose register. May not be null, zero-register or stackpointer.
++     */
++    public void ret(Register reg) {
++        unconditionalBranchRegInstruction(RET, reg);
++    }
++
++    private void unconditionalBranchRegInstruction(Instruction instr, Register reg) {
++        assert reg.getRegisterCategory().equals(CPU);
++        assert !reg.equals(zr);
++        assert !reg.equals(sp);
++        emitInt(instr.encoding | UnconditionalBranchRegOp | rs1(reg));
++    }
++
++    /* Load-Store Single Register (5.3.1) */
++
++    /**
++     * Loads a srcSize value from address into rt zero-extending it.
++     *
++     * @param srcSize size of memory read in bits. Must be 8, 16, 32 or 64.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param address all addressing modes allowed. May not be null.
++     */
++    public void ldr(int srcSize, Register rt, SW64Address address) {
++        assert rt.getRegisterCategory().equals(CPU);
++        assert srcSize == 8 || srcSize == 16 || srcSize == 32 || srcSize == 64;
++        int transferSize = NumUtil.log2Ceil(srcSize / 8);
++        loadStoreInstruction(LDR, rt, address, General32, transferSize);
++    }
++
++    /**
++     * Loads a srcSize value from address into rt sign-extending it.
++     *
++     * @param targetSize size of target register in bits. Must be 32 or 64.
++     * @param srcSize size of memory read in bits. Must be 8, 16 or 32, but may not be equivalent to
++     *            targetSize.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param address all addressing modes allowed. May not be null.
++     */
++    protected void ldrs(int targetSize, int srcSize, Register rt, SW64Address address) {
++        assert rt.getRegisterCategory().equals(CPU);
++        assert (srcSize == 8 || srcSize == 16 || srcSize == 32) && srcSize != targetSize;
++        int transferSize = NumUtil.log2Ceil(srcSize / 8);
++        loadStoreInstruction(LDRS, rt, address, generalFromSize(targetSize), transferSize);
++    }
++
++    public enum PrefetchMode {
++        PLDL1KEEP(0b00000),
++        PLDL1STRM(0b00001),
++        PLDL2KEEP(0b00010),
++        PLDL2STRM(0b00011),
++        PLDL3KEEP(0b00100),
++        PLDL3STRM(0b00101),
++
++        PLIL1KEEP(0b01000),
++        PLIL1STRM(0b01001),
++        PLIL2KEEP(0b01010),
++        PLIL2STRM(0b01011),
++        PLIL3KEEP(0b01100),
++        PLIL3STRM(0b01101),
++
++        PSTL1KEEP(0b10000),
++        PSTL1STRM(0b10001),
++        PSTL2KEEP(0b10010),
++        PSTL2STRM(0b10011),
++        PSTL3KEEP(0b10100),
++        PSTL3STRM(0b10101);
++
++        private final int encoding;
++
++        PrefetchMode(int encoding) {
++            this.encoding = encoding;
++        }
++
++        private static PrefetchMode[] modes = {
++                        PLDL1KEEP,
++                        PLDL1STRM,
++                        PLDL2KEEP,
++                        PLDL2STRM,
++                        PLDL3KEEP,
++                        PLDL3STRM,
++
++                        null,
++                        null,
++
++                        PLIL1KEEP,
++                        PLIL1STRM,
++                        PLIL2KEEP,
++                        PLIL2STRM,
++                        PLIL3KEEP,
++                        PLIL3STRM,
++
++                        null,
++                        null,
++
++                        PSTL1KEEP,
++                        PSTL1STRM,
++                        PSTL2KEEP,
++                        PSTL2STRM,
++                        PSTL3KEEP,
++                        PSTL3STRM
++        };
++
++        public static PrefetchMode lookup(int enc) {
++            assert enc >= 00 && enc < modes.length;
++            return modes[enc];
++        }
++
++        public Register toRegister() {
++            return cpuRegisters.get(encoding);
++        }
++    }
++
++    /*
++     * implements a prefetch at a 64-bit aligned address using a scaled 12 bit or unscaled 9 bit
++     * displacement addressing mode
++     *
++     * @param rt general purpose register. May not be null, zr or stackpointer.
++     *
++     * @param address only displacement addressing modes allowed. May not be null.
++     */
++    public void prfm(SW64Address address, PrefetchMode mode) {
++        assert (address.getAddressingMode() == AddressingMode.IMMEDIATE_SCALED ||
++                        address.getAddressingMode() == AddressingMode.IMMEDIATE_UNSCALED ||
++                        address.getAddressingMode() == AddressingMode.REGISTER_OFFSET);
++        assert mode != null;
++        final int srcSize = 64;
++        final int transferSize = NumUtil.log2Ceil(srcSize / 8);
++        final Register rt = mode.toRegister();
++        // this looks weird but that's because loadStoreInstruction is weird
++        // instruction select fields are size [31:30], v [26] and opc [25:24]
++        // prfm requires size == 0b11, v == 0b0 and opc == 0b11
++        // passing LDRS ensures opc[1] == 0b1
++        // (n.b. passing LDR/STR makes no difference to opc[1:0]!!)
++        // passing General64 ensures opc[0] == 0b1 and v = 0b0
++        // (n.b. passing General32 ensures opc[0] == 0b0 and v = 0b0)
++        // srcSize 64 ensures size == 0b11
++        loadStoreInstruction(LDRS, rt, address, General64, transferSize);
++    }
++
++    /**
++     * Stores register rt into memory pointed by address.
++     *
++     * @param destSize number of bits written to memory. Must be 8, 16, 32 or 64.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param address all addressing modes allowed. May not be null.
++     */
++    public void str(int destSize, Register rt, SW64Address address) {
++        assert rt.getRegisterCategory().equals(CPU);
++        assert destSize == 8 || destSize == 16 || destSize == 32 || destSize == 64;
++        int transferSize = NumUtil.log2Ceil(destSize / 8);
++        loadStoreInstruction(STR, rt, address, General64, transferSize);
++    }
++
++    private void loadStoreInstruction(Instruction instr, Register reg, SW64Address address, InstructionType type, int log2TransferSize) {
++        assert log2TransferSize >= 0 && log2TransferSize < 4;
++        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
++        int is32Bit = type.width == 32 ? 1 << ImmediateSizeOffset : 0;
++        int isFloat = !type.isGeneral ? 1 << LoadStoreFpFlagOffset : 0;
++        int memop = instr.encoding | transferSizeEncoding | is32Bit | isFloat | rt(reg);
++        switch (address.getAddressingMode()) {
++            case IMMEDIATE_SCALED:
++                emitInt(memop | LoadStoreScaledOp | address.getImmediate() << LoadStoreScaledImmOffset | rs1(address.getBase()));
++                break;
++            case IMMEDIATE_UNSCALED:
++                emitInt(memop | LoadStoreUnscaledOp | address.getImmediate() << LoadStoreUnscaledImmOffset | rs1(address.getBase()));
++                break;
++            case BASE_REGISTER_ONLY:
++                emitInt(memop | LoadStoreScaledOp | rs1(address.getBase()));
++                break;
++            case EXTENDED_REGISTER_OFFSET:
++            case REGISTER_OFFSET:
++                ExtendType extendType = address.getAddressingMode() == AddressingMode.EXTENDED_REGISTER_OFFSET ? address.getExtendType() : ExtendType.UXTX;
++                boolean shouldScale = address.isScaled() && log2TransferSize != 0;
++                emitInt(memop | LoadStoreRegisterOp | rs2(address.getOffset()) | extendType.encoding << ExtendTypeOffset | (shouldScale ? 1 : 0) << LoadStoreScaledRegOffset | rs1(address.getBase()));
++                break;
++            case PC_LITERAL:
++                assert log2TransferSize >= 2 : "PC literal loads only works for load/stores of 32-bit and larger";
++                transferSizeEncoding = (log2TransferSize - 2) << LoadStoreTransferSizeOffset;
++                emitInt(transferSizeEncoding | isFloat | LoadLiteralOp | rd(reg) | address.getImmediate() << LoadLiteralImmeOffset);
++                break;
++            case IMMEDIATE_POST_INDEXED:
++                emitInt(memop | LoadStorePostIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
++                break;
++            case IMMEDIATE_PRE_INDEXED:
++                emitInt(memop | LoadStorePreIndexedOp | rs1(address.getBase()) | address.getImmediate() << LoadStoreIndexedImmOffset);
++                break;
++            default:
++                throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
++        }
++    }
++
++    /**
++     * Load Pair of Registers calculates an address from a base register value and an immediate
++     * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
++     * two registers.
++     */
++    public void ldp(int size, Register rt, Register rt2, SW64Address address) {
++        assert size == 32 || size == 64;
++        loadStorePairInstruction(LDP, rt, rt2, address, generalFromSize(size));
++    }
++
++    /**
++     * Store Pair of Registers calculates an address from a base register value and an immediate
++     * offset, and stores two 32-bit words or two 64-bit doublewords to the calculated address, from
++     * two registers.
++     */
++    public void stp(int size, Register rt, Register rt2, SW64Address address) {
++        assert size == 32 || size == 64;
++        loadStorePairInstruction(STP, rt, rt2, address, generalFromSize(size));
++    }
++
++    private void loadStorePairInstruction(Instruction instr, Register rt, Register rt2, SW64Address address, InstructionType type) {
++        int scaledOffset = maskField(7, address.getImmediateRaw());  // LDP/STP use a 7-bit scaled
++                                                                     // offset
++        int memop = type.encoding | instr.encoding | scaledOffset << LoadStorePairImm7Offset | rt2(rt2) | rn(address.getBase()) | rt(rt);
++        switch (address.getAddressingMode()) {
++            case IMMEDIATE_SCALED:
++                emitInt(memop | LoadStorePairOp | (0b010 << 23));
++                break;
++            case IMMEDIATE_POST_INDEXED:
++                emitInt(memop | LoadStorePairOp | (0b001 << 23));
++                break;
++            case IMMEDIATE_PRE_INDEXED:
++                emitInt(memop | LoadStorePairOp | (0b011 << 23));
++                break;
++            default:
++                throw GraalError.shouldNotReachHere("Unhandled addressing mode: " + address.getAddressingMode());
++        }
++    }
++
++    /* Load-Store Exclusive (5.3.6) */
++
++    /**
++     * Load address exclusive. Natural alignment of address is required.
++     *
++     * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param rn general purpose register.
++     */
++    protected void ldxr(int size, Register rt, Register rn) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        exclusiveLoadInstruction(LDXR, rt, rn, transferSize);
++    }
++
++    /**
++     * Store address exclusive. Natural alignment of address is required. rs and rt may not point to
++     * the same register.
++     *
++     * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
++     * @param rs general purpose register. Set to exclusive access status. 0 means success,
++     *            everything else failure. May not be null, or stackpointer.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param rn general purpose register.
++     */
++    protected void stxr(int size, Register rs, Register rt, Register rn) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        exclusiveStoreInstruction(STXR, rs, rt, rn, transferSize);
++    }
++
++    /* Load-Acquire/Store-Release (5.3.7) */
++
++    /* non exclusive access */
++    /**
++     * Load acquire. Natural alignment of address is required.
++     *
++     * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param rn general purpose register.
++     */
++    protected void ldar(int size, Register rt, Register rn) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        exclusiveLoadInstruction(LDAR, rt, rn, transferSize);
++    }
++
++    /**
++     * Store-release. Natural alignment of address is required.
++     *
++     * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param rn general purpose register.
++     */
++    protected void stlr(int size, Register rt, Register rn) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        // Hack: Passing the zero-register means it is ignored when building the encoding.
++        exclusiveStoreInstruction(STLR, r0, rt, rn, transferSize);
++    }
++
++    /* exclusive access */
++    /**
++     * Load acquire exclusive. Natural alignment of address is required.
++     *
++     * @param size size of memory read in bits. Must be 8, 16, 32 or 64.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param rn general purpose register.
++     */
++    public void ldaxr(int size, Register rt, Register rn) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        exclusiveLoadInstruction(LDAXR, rt, rn, transferSize);
++    }
++
++    /**
++     * Store-release exclusive. Natural alignment of address is required. rs and rt may not point to
++     * the same register.
++     *
++     * @param size size of bits written to memory. Must be 8, 16, 32 or 64.
++     * @param rs general purpose register. Set to exclusive access status. 0 means success,
++     *            everything else failure. May not be null, or stackpointer.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param rn general purpose register.
++     */
++    public void stlxr(int size, Register rs, Register rt, Register rn) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        exclusiveStoreInstruction(STLXR, rs, rt, rn, transferSize);
++    }
++
++    private void exclusiveLoadInstruction(Instruction instr, Register reg, Register rn, int log2TransferSize) {
++        assert log2TransferSize >= 0 && log2TransferSize < 4;
++        assert reg.getRegisterCategory().equals(CPU);
++        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
++        emitInt(transferSizeEncoding | instr.encoding | 1 << ImmediateSizeOffset | rn(rn) | rt(reg));
++    }
++
++    /**
++     * Stores data from rt into address and sets rs to the returned exclusive access status.
++     *
++     * @param rs general purpose register into which the exclusive access status is written. May not
++     *            be null.
++     * @param rt general purpose register containing data to be written to memory at address. May
++     *            not be null
++     * @param rn general purpose register containing the address specifying where rt is written to.
++     * @param log2TransferSize log2Ceil of memory transfer size.
++     */
++    private void exclusiveStoreInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize) {
++        assert log2TransferSize >= 0 && log2TransferSize < 4;
++        assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
++        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
++        emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt));
++    }
++
++    /**
++     * Compare And Swap word or doubleword in memory. This reads a value from an address rn,
++     * compares it against a given value rs, and, if equal, stores the value rt to memory. The value
++     * read from address rn is stored in register rs.
++     *
++     * @param size size of bits read from memory. Must be 32 or 64.
++     * @param rs general purpose register to be compared and loaded. May not be null.
++     * @param rt general purpose register to be conditionally stored. May not be null.
++     * @param rn general purpose register containing the address from which to read.
++     * @param acquire boolean value signifying if the load should use acquire semantics.
++     * @param release boolean value signifying if the store should use release semantics.
++     */
++    public void cas(int size, Register rs, Register rt, Register rn, boolean acquire, boolean release) {
++        assert size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        compareAndSwapInstruction(CAS, rs, rt, rn, transferSize, acquire, release);
++    }
++
++    private void compareAndSwapInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize, boolean acquire, boolean release) {
++        assert log2TransferSize >= 0 && log2TransferSize < 4;
++        assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
++        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
++        emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt) | (acquire ? 1 : 0) << CASAcquireOffset | (release ? 1 : 0) << CASReleaseOffset);
++    }
++
++    /**
++     * Atomic add. This reads a value from an address rn, stores the value in rt, and adds the value
++     * in rs to it, and stores the result back at address rn. The initial value read from memory is
++     * stored in rt.
++     *
++     * @param size size of operand to read from memory. Must be 8, 16, 32, or 64.
++     * @param rs general purpose register to be added to contents. May not be null.
++     * @param rt general purpose register to be loaded. May not be null.
++     * @param rn general purpose register or stack pointer holding an address from which to load.
++     * @param acquire boolean value signifying if the load should use acquire semantics.
++     * @param release boolean value signifying if the store should use release semantics.
++     */
++    public void ldadd(int size, Register rs, Register rt, Register rn, boolean acquire, boolean release) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        loadAndAddInstruction(LDADD, rs, rt, rn, transferSize, acquire, release);
++    }
++
++    private void loadAndAddInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize, boolean acquire, boolean release) {
++        assert log2TransferSize >= 0 && log2TransferSize < 4;
++        assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
++        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
++        emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt) | (acquire ? 1 : 0) << LDADDAcquireOffset | (release ? 1 : 0) << LDADDReleaseOffset);
++    }
++
++    /**
++     * Atomic swap. This reads a value from an address rn, stores the value in rt, and then stores
++     * the value in rs back at address rn.
++     *
++     * @param size size of operand to read from memory. Must be 8, 16, 32, or 64.
++     * @param rs general purpose register to be stored. May not be null.
++     * @param rt general purpose register to be loaded. May not be null.
++     * @param rn general purpose register or stack pointer holding an address from which to load.
++     * @param acquire boolean value signifying if the load should use acquire semantics.
++     * @param release boolean value signifying if the store should use release semantics.
++     */
++    public void swp(int size, Register rs, Register rt, Register rn, boolean acquire, boolean release) {
++        assert size == 8 || size == 16 || size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        swapInstruction(SWP, rs, rt, rn, transferSize, acquire, release);
++    }
++
++    private void swapInstruction(Instruction instr, Register rs, Register rt, Register rn, int log2TransferSize, boolean acquire, boolean release) {
++        assert log2TransferSize >= 0 && log2TransferSize < 4;
++        assert rt.getRegisterCategory().equals(CPU) && rs.getRegisterCategory().equals(CPU) && !rs.equals(rt);
++        int transferSizeEncoding = log2TransferSize << LoadStoreTransferSizeOffset;
++        emitInt(transferSizeEncoding | instr.encoding | rs2(rs) | rn(rn) | rt(rt) | (acquire ? 1 : 0) << LDADDAcquireOffset | (release ? 1 : 0) << LDADDReleaseOffset);
++    }
++
++    /* PC-relative Address Calculation (5.4.4) */
++
++    /**
++     * Address of page: sign extends 21-bit offset, shifts if left by 12 and adds it to the value of
++     * the PC with its bottom 12-bits cleared, writing the result to dst. No offset is emitted; the
++     * instruction will be patched later.
++     *
++     * @param dst general purpose register. May not be null, zero-register or stackpointer.
++     */
++    public void adrp(Register dst) {
++        emitInt(ADRP.encoding | PcRelImmOp | rd(dst));
++    }
++
++    /**
++     * Adds a 21-bit signed offset to the program counter and writes the result to dst.
++     *
++     * @param dst general purpose register. May not be null, zero-register or stackpointer.
++     * @param imm21 Signed 21-bit offset.
++     */
++    public void adr(Register dst, int imm21) {
++        emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21));
++    }
++
++    /**
++     * Adds a 21-bit signed offset to the program counter and writes the result to dst.
++     *
++     * @param dst general purpose register. May not be null, zero-register or stackpointer.
++     * @param imm21 Signed 21-bit offset.
++     * @param pos the position in the code that the instruction is emitted.
++     */
++    public void adr(Register dst, int imm21, int pos) {
++        emitInt(ADR.encoding | PcRelImmOp | rd(dst) | getPcRelativeImmEncoding(imm21), pos);
++    }
++
++    private static int getPcRelativeImmEncoding(int imm21) {
++        assert NumUtil.isSignedNbit(21, imm21);
++        int imm = imm21 & NumUtil.getNbitNumberInt(21);
++        // higher 19 bit
++        int immHi = (imm >> 2) << PcRelImmHiOffset;
++        // lower 2 bit
++        int immLo = (imm & 0x3) << PcRelImmLoOffset;
++        return immHi | immLo;
++    }
++
++    /* Arithmetic (Immediate) (5.4.1) */
++
++    /**
++     * dst = src + aimm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
++     *            the lower 12-bit cleared.
++     */
++    protected void add(int size, Register dst, Register src, int aimm) {
++        assert !dst.equals(zr);
++        assert !src.equals(zr);
++        addSubImmInstruction(ADD, dst, src, aimm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src + aimm and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
++     *            the lower 12-bit cleared.
++     */
++    protected void adds(int size, Register dst, Register src, int aimm) {
++        assert !dst.equals(sp);
++        assert !src.equals(zr);
++        addSubImmInstruction(ADDS, dst, src, aimm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src - aimm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
++     *            the lower 12-bit cleared.
++     */
++    protected void sub(int size, Register dst, Register src, int aimm) {
++        assert !dst.equals(zr);
++        assert !src.equals(zr);
++        addSubImmInstruction(SUB, dst, src, aimm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src - aimm and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param aimm arithmetic immediate. Either unsigned 12-bit value or unsigned 24-bit value with
++     *            the lower 12-bit cleared.
++     */
++    protected void subs(int size, Register dst, Register src, int aimm) {
++        assert !dst.equals(sp);
++        assert !src.equals(zr);
++        addSubImmInstruction(SUBS, dst, src, aimm, generalFromSize(size));
++    }
++
++    private void addSubImmInstruction(Instruction instr, Register dst, Register src, int aimm, InstructionType type) {
++        emitInt(type.encoding | instr.encoding | AddSubImmOp | encodeAimm(aimm) | rd(dst) | rs1(src));
++    }
++
++    /**
++     * Encodes arithmetic immediate.
++     *
++     * @param imm Immediate has to be either an unsigned 12-bit value or an unsigned 24-bit value
++     *            with the lower 12 bits zero.
++     * @return Representation of immediate for use with arithmetic instructions.
++     */
++    private static int encodeAimm(int imm) {
++        assert isAimm(imm) : "Immediate has to be legal arithmetic immediate value " + imm;
++        if (NumUtil.isUnsignedNbit(12, imm)) {
++            return imm << ImmediateOffset;
++        } else {
++            // First 12-bit are zero, so shift immediate 12-bit and set flag to indicate
++            // shifted immediate value.
++            return (imm >>> 12 << ImmediateOffset) | AddSubShift12;
++        }
++    }
++
++    /**
++     * Checks whether immediate can be encoded as an arithmetic immediate.
++     *
++     * @param imm Immediate has to be either an unsigned 12bit value or un unsigned 24bit value with
++     *            the lower 12 bits 0.
++     * @return true if valid arithmetic immediate, false otherwise.
++     */
++    protected static boolean isAimm(int imm) {
++        return NumUtil.isUnsignedNbit(12, imm) || NumUtil.isUnsignedNbit(12, imm >>> 12) && (imm & 0xfff) == 0;
++    }
++
++    /* Logical (immediate) (5.4.2) */
++
++    /**
++     * dst = src & bimm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or stack-pointer.
++     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
++     */
++    public void and(int size, Register dst, Register src, long bimm) {
++        assert !dst.equals(zr);
++        assert !src.equals(sp);
++        logicalImmInstruction(AND, dst, src, bimm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src & bimm and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stack-pointer.
++     * @param src general purpose register. May not be null or stack-pointer.
++     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
++     */
++    public void ands(int size, Register dst, Register src, long bimm) {
++        assert !dst.equals(sp);
++        assert !src.equals(sp);
++        logicalImmInstruction(ANDS, dst, src, bimm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src ^ bimm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or stack-pointer.
++     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
++     */
++    public void eor(int size, Register dst, Register src, long bimm) {
++        assert !dst.equals(zr);
++        assert !src.equals(sp);
++        logicalImmInstruction(EOR, dst, src, bimm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src | bimm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or stack-pointer.
++     * @param bimm logical immediate. See {@link LogicalImmediateTable} for exact definition.
++     */
++    protected void orr(int size, Register dst, Register src, long bimm) {
++        assert !dst.equals(zr);
++        assert !src.equals(sp);
++        logicalImmInstruction(ORR, dst, src, bimm, generalFromSize(size));
++    }
++
++    private void logicalImmInstruction(Instruction instr, Register dst, Register src, long bimm, InstructionType type) {
++        // Mask higher bits off, since we always pass longs around even for the 32-bit instruction.
++        long bimmValue;
++        if (type == General32) {
++            assert (bimm >> 32) == 0 || (bimm >> 32) == -1L : "Higher order bits for 32-bit instruction must either all be 0 or 1.";
++            bimmValue = bimm & NumUtil.getNbitNumberLong(32);
++        } else {
++            bimmValue = bimm;
++        }
++        int immEncoding = LogicalImmediateTable.getLogicalImmEncoding(type == General64, bimmValue);
++        emitInt(type.encoding | instr.encoding | LogicalImmOp | immEncoding | rd(dst) | rs1(src));
++    }
++
++    /* Move (wide immediate) (5.4.3) */
++
++    /**
++     * dst = uimm16 << shiftAmt.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param uimm16 16-bit unsigned immediate
++     * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
++     *            than size.
++     */
++    protected void movz(int size, Register dst, int uimm16, int shiftAmt) {
++        moveWideImmInstruction(MOVZ, dst, uimm16, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = ~(uimm16 << shiftAmt).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param uimm16 16-bit unsigned immediate
++     * @param shiftAmt amount by which uimm16 is left shifted. Can be any multiple of 16 smaller
++     *            than size.
++     */
++    protected void movn(int size, Register dst, int uimm16, int shiftAmt) {
++        moveWideImmInstruction(MOVN, dst, uimm16, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst<pos+15:pos> = uimm16.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param uimm16 16-bit unsigned immediate
++     * @param pos position into which uimm16 is inserted. Can be any multiple of 16 smaller than
++     *            size.
++     */
++    protected void movk(int size, Register dst, int uimm16, int pos) {
++        moveWideImmInstruction(MOVK, dst, uimm16, pos, generalFromSize(size));
++    }
++
++    private void moveWideImmInstruction(Instruction instr, Register dst, int uimm16, int shiftAmt, InstructionType type) {
++        assert dst.getRegisterCategory().equals(CPU);
++        assert NumUtil.isUnsignedNbit(16, uimm16) : "Immediate has to be unsigned 16bit";
++        assert shiftAmt == 0 || shiftAmt == 16 || (type == InstructionType.General64 && (shiftAmt == 32 || shiftAmt == 48)) : "Invalid shift amount: " + shiftAmt;
++        int shiftValue = shiftAmt >> 4;
++        emitInt(type.encoding | instr.encoding | MoveWideImmOp | rd(dst) | uimm16 << MoveWideImmOffset | shiftValue << MoveWideShiftOffset);
++    }
++
++    /* Bitfield Operations (5.4.5) */
++
++    /**
++     * Bitfield move.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param src general purpose register. May not be null, stackpointer or zero-register.
++     * @param r must be in the range 0 to size - 1
++     * @param s must be in the range 0 to size - 1
++     */
++    public void bfm(int size, Register dst, Register src, int r, int s) {
++        bitfieldInstruction(BFM, dst, src, r, s, generalFromSize(size));
++    }
++
++    /**
++     * Unsigned bitfield move.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param src general purpose register. May not be null, stackpointer or zero-register.
++     * @param r must be in the range 0 to size - 1
++     * @param s must be in the range 0 to size - 1
++     */
++    public void ubfm(int size, Register dst, Register src, int r, int s) {
++        bitfieldInstruction(UBFM, dst, src, r, s, generalFromSize(size));
++    }
++
++    /**
++     * Signed bitfield move.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param src general purpose register. May not be null, stackpointer or zero-register.
++     * @param r must be in the range 0 to size - 1
++     * @param s must be in the range 0 to size - 1
++     */
++    protected void sbfm(int size, Register dst, Register src, int r, int s) {
++        bitfieldInstruction(SBFM, dst, src, r, s, generalFromSize(size));
++    }
++
++    private void bitfieldInstruction(Instruction instr, Register dst, Register src, int r, int s, InstructionType type) {
++        assert !dst.equals(sp) && !dst.equals(zr);
++        assert !src.equals(sp) && !src.equals(zr);
++        assert s >= 0 && s < type.width && r >= 0 && r < type.width;
++        int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
++        emitInt(type.encoding | instr.encoding | BitfieldImmOp | sf | r << ImmediateRotateOffset | s << ImmediateOffset | rd(dst) | rs1(src));
++    }
++
++    /* Extract (Immediate) (5.4.6) */
++
++    /**
++     * Extract. dst = src1:src2<lsb+31:lsb>
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param lsb must be in range 0 to size - 1.
++     */
++    protected void extr(int size, Register dst, Register src1, Register src2, int lsb) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        InstructionType type = generalFromSize(size);
++        assert lsb >= 0 && lsb < type.width;
++        int sf = type == General64 ? 1 << ImmediateSizeOffset : 0;
++        emitInt(type.encoding | EXTR.encoding | sf | lsb << ImmediateOffset | rd(dst) | rs1(src1) | rs2(src2));
++    }
++
++    /* Arithmetic (shifted register) (5.5.1) */
++
++    /**
++     * dst = src1 + shiftType(src2, imm).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType any type but ROR.
++     * @param imm must be in range 0 to size - 1.
++     */
++    protected void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        addSubShiftedInstruction(ADD, dst, src1, src2, shiftType, imm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 + shiftType(src2, imm) and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType any type but ROR.
++     * @param imm must be in range 0 to size - 1.
++     */
++    public void adds(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        addSubShiftedInstruction(ADDS, dst, src1, src2, shiftType, imm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 - shiftType(src2, imm).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType any type but ROR.
++     * @param imm must be in range 0 to size - 1.
++     */
++    protected void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        addSubShiftedInstruction(SUB, dst, src1, src2, shiftType, imm, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 - shiftType(src2, imm) and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType any type but ROR.
++     * @param imm must be in range 0 to size - 1.
++     */
++    public void subs(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int imm) {
++        addSubShiftedInstruction(SUBS, dst, src1, src2, shiftType, imm, generalFromSize(size));
++    }
++
++    private void addSubShiftedInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int imm, InstructionType type) {
++        assert shiftType != ShiftType.ROR;
++        assert imm >= 0 && imm < type.width;
++        emitInt(type.encoding | instr.encoding | AddSubShiftedOp | imm << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
++    }
++
++    /* Arithmetic (extended register) (5.5.2) */
++    /**
++     * dst = src1 + extendType(src2) << imm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register..
++     * @param src1 general purpose register. May not be null or zero-register.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param extendType defines how src2 is extended to the same size as src1.
++     * @param shiftAmt must be in range 0 to 4.
++     */
++    public void add(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        assert !dst.equals(zr);
++        assert !src1.equals(zr);
++        assert !src2.equals(sp);
++        addSubExtendedInstruction(ADD, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 + extendType(src2) << imm and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer..
++     * @param src1 general purpose register. May not be null or zero-register.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param extendType defines how src2 is extended to the same size as src1.
++     * @param shiftAmt must be in range 0 to 4.
++     */
++    protected void adds(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        assert !dst.equals(sp);
++        assert !src1.equals(zr);
++        assert !src2.equals(sp);
++        addSubExtendedInstruction(ADDS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 - extendType(src2) << imm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register..
++     * @param src1 general purpose register. May not be null or zero-register.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param extendType defines how src2 is extended to the same size as src1.
++     * @param shiftAmt must be in range 0 to 4.
++     */
++    protected void sub(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        assert !dst.equals(zr);
++        assert !src1.equals(zr);
++        assert !src2.equals(sp);
++        addSubExtendedInstruction(SUB, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 - extendType(src2) << imm and sets flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer..
++     * @param src1 general purpose register. May not be null or zero-register.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param extendType defines how src2 is extended to the same size as src1.
++     * @param shiftAmt must be in range 0 to 4.
++     */
++    public void subs(int size, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt) {
++        assert !dst.equals(sp);
++        assert !src1.equals(zr);
++        assert !src2.equals(sp);
++        addSubExtendedInstruction(SUBS, dst, src1, src2, extendType, shiftAmt, generalFromSize(size));
++    }
++
++    private void addSubExtendedInstruction(Instruction instr, Register dst, Register src1, Register src2, ExtendType extendType, int shiftAmt, InstructionType type) {
++        assert shiftAmt >= 0 && shiftAmt <= 4;
++        emitInt(type.encoding | instr.encoding | AddSubExtendedOp | shiftAmt << ImmediateOffset | extendType.encoding << ExtendTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
++    }
++
++    /* Logical (shifted register) (5.5.3) */
++    /**
++     * dst = src1 & shiftType(src2, imm).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void and(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(AND, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 & shiftType(src2, imm) and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void ands(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(ANDS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 & ~(shiftType(src2, imm)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void bic(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(BIC, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 & ~(shiftType(src2, imm)) and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void bics(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(BICS, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 ^ ~(shiftType(src2, imm)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void eon(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(EON, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 ^ shiftType(src2, imm).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void eor(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(EOR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 | shiftType(src2, imm).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void orr(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(ORR, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 | ~(shiftType(src2, imm)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType all types allowed, may not be null.
++     * @param shiftAmt must be in range 0 to size - 1.
++     */
++    protected void orn(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        logicalRegInstruction(ORN, dst, src1, src2, shiftType, shiftAmt, generalFromSize(size));
++    }
++
++    private void logicalRegInstruction(Instruction instr, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt, InstructionType type) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        assert shiftAmt >= 0 && shiftAmt < type.width;
++        emitInt(type.encoding | instr.encoding | LogicalShiftOp | shiftAmt << ImmediateOffset | shiftType.encoding << ShiftTypeOffset | rd(dst) | rs1(src1) | rs2(src2));
++    }
++
++    /* Variable Shift (5.5.4) */
++    /**
++     * dst = src1 >> (src2 & log2(size)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    protected void asr(int size, Register dst, Register src1, Register src2) {
++        dataProcessing2SourceOp(ASRV, dst, src1, src2, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 << (src2 & log2(size)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    protected void lsl(int size, Register dst, Register src1, Register src2) {
++        dataProcessing2SourceOp(LSLV, dst, src1, src2, generalFromSize(size));
++    }
++
++    /**
++     * dst = src1 >>> (src2 & log2(size)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    protected void lsr(int size, Register dst, Register src1, Register src2) {
++        dataProcessing2SourceOp(LSRV, dst, src1, src2, generalFromSize(size));
++    }
++
++    /**
++     * dst = rotateRight(src1, (src2 & log2(size))).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    protected void ror(int size, Register dst, Register src1, Register src2) {
++        dataProcessing2SourceOp(RORV, dst, src1, src2, generalFromSize(size));
++    }
++
++    /* Bit Operations (5.5.5) */
++
++    /**
++     * Counts leading sign bits. Sets Wd to the number of consecutive bits following the topmost bit
++     * in dst, that are the same as the topmost bit. The count does not include the topmost bit
++     * itself , so the result will be in the range 0 to size-1 inclusive.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, zero-register or the stackpointer.
++     * @param src source register. May not be null, zero-register or the stackpointer.
++     */
++    protected void cls(int size, Register dst, Register src) {
++        dataProcessing1SourceOp(CLS, dst, src, generalFromSize(size));
++    }
++
++    /**
++     * Counts leading zeros.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, zero-register or the stackpointer.
++     * @param src source register. May not be null, zero-register or the stackpointer.
++     */
++    public void clz(int size, Register dst, Register src) {
++        dataProcessing1SourceOp(CLZ, dst, src, generalFromSize(size));
++    }
++
++    /**
++     * Reverses bits.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, zero-register or the stackpointer.
++     * @param src source register. May not be null, zero-register or the stackpointer.
++     */
++    public void rbit(int size, Register dst, Register src) {
++        dataProcessing1SourceOp(RBIT, dst, src, generalFromSize(size));
++    }
++
++    /**
++     * Reverses bytes.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src source register. May not be null or the stackpointer.
++     */
++    public void rev(int size, Register dst, Register src) {
++        if (size == 64) {
++            dataProcessing1SourceOp(REVX, dst, src, generalFromSize(size));
++        } else {
++            assert size == 32;
++            dataProcessing1SourceOp(REVW, dst, src, generalFromSize(size));
++        }
++    }
++
++    /* Conditional Data Processing (5.5.6) */
++
++    /**
++     * Conditional select. dst = src1 if condition else src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     * @param condition any condition flag. May not be null.
++     */
++    protected void csel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        conditionalSelectInstruction(CSEL, dst, src1, src2, condition, generalFromSize(size));
++    }
++
++    /**
++     * Conditional select negate. dst = src1 if condition else -src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     * @param condition any condition flag. May not be null.
++     */
++    protected void csneg(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        conditionalSelectInstruction(CSNEG, dst, src1, src2, condition, generalFromSize(size));
++    }
++
++    /**
++     * Conditional increase. dst = src1 if condition else src2 + 1.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     * @param condition any condition flag. May not be null.
++     */
++    protected void csinc(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        conditionalSelectInstruction(CSINC, dst, src1, src2, condition, generalFromSize(size));
++    }
++
++    private void conditionalSelectInstruction(Instruction instr, Register dst, Register src1, Register src2, ConditionFlag condition, InstructionType type) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        emitInt(type.encoding | instr.encoding | ConditionalSelectOp | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
++    }
++
++    /* Integer Multiply/Divide (5.6) */
++
++    /**
++     * dst = src1 * src2 + src3.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     * @param src3 general purpose register. May not be null or the stackpointer.
++     */
++    protected void madd(int size, Register dst, Register src1, Register src2, Register src3) {
++        mulInstruction(MADD, dst, src1, src2, src3, generalFromSize(size));
++    }
++
++    /**
++     * dst = src3 - src1 * src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     * @param src3 general purpose register. May not be null or the stackpointer.
++     */
++    protected void msub(int size, Register dst, Register src1, Register src2, Register src3) {
++        mulInstruction(MSUB, dst, src1, src2, src3, generalFromSize(size));
++    }
++
++    /**
++     * Signed multiply high. dst = (src1 * src2)[127:64]
++     *
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     */
++    protected void smulh(Register dst, Register src1, Register src2) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        emitInt(0b10011011010 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
++    }
++
++    /**
++     * unsigned multiply high. dst = (src1 * src2)[127:64]
++     *
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     */
++    protected void umulh(Register dst, Register src1, Register src2) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        emitInt(0b10011011110 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
++    }
++
++    /**
++     * unsigned multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
++     *
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     * @param src3 general purpose register. May not be null or the stackpointer.
++     */
++    protected void umaddl(Register dst, Register src1, Register src2, Register src3) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        assert !src3.equals(sp);
++        emitInt(0b10011011101 << 21 | dst.encoding | rs1(src1) | rs2(src2) | 0b011111 << ImmediateOffset);
++    }
++
++    /**
++     * signed multiply add-long. xDst = xSrc3 + (wSrc1 * wSrc2)
++     *
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     * @param src3 general purpose register. May not be null or the stackpointer.
++     */
++    public void smaddl(Register dst, Register src1, Register src2, Register src3) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        assert !src3.equals(sp);
++        emitInt(0b10011011001 << 21 | dst.encoding | rs1(src1) | rs2(src2) | rs3(src3));
++    }
++
++    private void mulInstruction(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        assert !src3.equals(sp);
++        emitInt(type.encoding | instr.encoding | MulOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
++    }
++
++    /**
++     * Signed divide. dst = src1 / src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     */
++    public void sdiv(int size, Register dst, Register src1, Register src2) {
++        dataProcessing2SourceOp(SDIV, dst, src1, src2, generalFromSize(size));
++    }
++
++    /**
++     * Unsigned divide. dst = src1 / src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     */
++    public void udiv(int size, Register dst, Register src1, Register src2) {
++        dataProcessing2SourceOp(UDIV, dst, src1, src2, generalFromSize(size));
++    }
++
++    private void dataProcessing1SourceOp(Instruction instr, Register dst, Register src, InstructionType type) {
++        emitInt(type.encoding | instr.encoding | DataProcessing1SourceOp | rd(dst) | rs1(src));
++    }
++
++    private void dataProcessing2SourceOp(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
++        assert !dst.equals(sp);
++        assert !src1.equals(sp);
++        assert !src2.equals(sp);
++        emitInt(type.encoding | instr.encoding | DataProcessing2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
++    }
++
++    /* Floating point operations */
++
++    /* Load-Store Single FP register (5.7.1.1) */
++    /**
++     * Floating point load.
++     *
++     * @param size number of bits read from memory into rt. Must be 32 or 64.
++     * @param rt floating point register. May not be null.
++     * @param address all addressing modes allowed. May not be null.
++     */
++    public void fldr(int size, Register rt, SW64Address address) {
++        assert rt.getRegisterCategory().equals(SIMD);
++        assert size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        loadStoreInstruction(LDR, rt, address, InstructionType.FP32, transferSize);
++    }
++
++    /**
++     * Floating point store.
++     *
++     * @param size number of bits read from memory into rt. Must be 32 or 64.
++     * @param rt floating point register. May not be null.
++     * @param address all addressing modes allowed. May not be null.
++     */
++    public void fstr(int size, Register rt, SW64Address address) {
++        assert rt.getRegisterCategory().equals(SIMD);
++        assert size == 32 || size == 64;
++        int transferSize = NumUtil.log2Ceil(size / 8);
++        loadStoreInstruction(STR, rt, address, InstructionType.FP64, transferSize);
++    }
++
++    /* Floating-point Move (register) (5.7.2) */
++
++    /**
++     * Floating point move.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    protected void fmov(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FMOV, dst, src, floatFromSize(size));
++    }
++
++    /**
++     * Move size bits from floating point register unchanged to general purpose register.
++     *
++     * @param size number of bits read from memory into rt. Must be 32 or 64.
++     * @param dst general purpose register. May not be null, stack-pointer or zero-register
++     * @param src floating point register. May not be null.
++     */
++    protected void fmovFpu2Cpu(int size, Register dst, Register src) {
++        assert dst.getRegisterCategory().equals(CPU);
++        assert src.getRegisterCategory().equals(SIMD);
++        fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVFPU2CPU);
++    }
++
++    /**
++     * Move size bits from general purpose register unchanged to floating point register.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst floating point register. May not be null.
++     * @param src general purpose register. May not be null or stack-pointer.
++     */
++    protected void fmovCpu2Fpu(int size, Register dst, Register src) {
++        assert dst.getRegisterCategory().equals(SIMD);
++        assert src.getRegisterCategory().equals(CPU);
++        fmovCpuFpuInstruction(dst, src, size == 64, Instruction.FMOVCPU2FPU);
++    }
++
++    private void fmovCpuFpuInstruction(Register dst, Register src, boolean is64bit, Instruction instr) {
++        int sf = is64bit ? FP64.encoding | General64.encoding : FP32.encoding | General32.encoding;
++        emitInt(sf | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
++    }
++
++    /* Floating-point Move (immediate) (5.7.3) */
++
++    /**
++     * Move immediate into register.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst floating point register. May not be null.
++     * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
++     *            loaded, i.e. (float) imm == imm must be true. In all cases
++     *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
++     *            depending on size.
++     */
++    protected void fmov(int size, Register dst, double imm) {
++        assert dst.getRegisterCategory().equals(SIMD);
++        InstructionType type = floatFromSize(size);
++        int immEncoding;
++        if (type == FP64) {
++            immEncoding = getDoubleImmediate(imm);
++        } else {
++            assert imm == (float) imm : "float mov must use an immediate that can be represented using a float.";
++            immEncoding = getFloatImmediate((float) imm);
++        }
++        emitInt(type.encoding | FMOV.encoding | FpImmOp | immEncoding | rd(dst));
++    }
++
++    private static int getDoubleImmediate(double imm) {
++        assert isDoubleImmediate(imm);
++        // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
++        // 0000.0000.0000.0000.0000.0000.0000.0000
++        long repr = Double.doubleToRawLongBits(imm);
++        int a = (int) (repr >>> 63) << 7;
++        int b = (int) ((repr >>> 61) & 0x1) << 6;
++        int cToH = (int) (repr >>> 48) & 0x3f;
++        return (a | b | cToH) << FpImmOffset;
++    }
++
++    protected static boolean isDoubleImmediate(double imm) {
++        // Valid values will have the form:
++        // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000
++        // 0000.0000.0000.0000.0000.0000.0000.0000
++        long bits = Double.doubleToRawLongBits(imm);
++        // lower 48 bits are cleared
++        if ((bits & NumUtil.getNbitNumberLong(48)) != 0) {
++            return false;
++        }
++        // bits[61..54] are all set or all cleared.
++        long pattern = (bits >> 54) & NumUtil.getNbitNumberLong(7);
++        if (pattern != 0 && pattern != NumUtil.getNbitNumberLong(7)) {
++            return false;
++        }
++        // bits[62] and bits[61] are opposites.
++        return ((bits ^ (bits << 1)) & (1L << 62)) != 0;
++    }
++
++    private static int getFloatImmediate(float imm) {
++        assert isFloatImmediate(imm);
++        // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000
++        int repr = Float.floatToRawIntBits(imm);
++        int a = (repr >>> 31) << 7;
++        int b = ((repr >>> 29) & 0x1) << 6;
++        int cToH = (repr >>> 19) & NumUtil.getNbitNumberInt(6);
++        return (a | b | cToH) << FpImmOffset;
++    }
++
++    protected static boolean isFloatImmediate(float imm) {
++        // Valid values will have the form:
++        // aBbb.bbbc.defg.h000.0000.0000.0000.0000
++        int bits = Float.floatToRawIntBits(imm);
++        // lower 20 bits are cleared.
++        if ((bits & NumUtil.getNbitNumberInt(19)) != 0) {
++            return false;
++        }
++        // bits[29..25] are all set or all cleared
++        int pattern = (bits >> 25) & NumUtil.getNbitNumberInt(5);
++        if (pattern != 0 && pattern != NumUtil.getNbitNumberInt(5)) {
++            return false;
++        }
++        // bits[29] and bits[30] have to be opposite
++        return ((bits ^ (bits << 1)) & (1 << 30)) != 0;
++    }
++
++    /* Convert Floating-point Precision (5.7.4.1) */
++    /* Converts float to double and vice-versa */
++
++    /**
++     * Convert float to double and vice-versa.
++     *
++     * @param srcSize size of source register in bits.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    public void fcvt(int srcSize, Register dst, Register src) {
++        if (srcSize == 32) {
++            fpDataProcessing1Source(FCVTDS, dst, src, floatFromSize(srcSize));
++        } else {
++            fpDataProcessing1Source(FCVTSD, dst, src, floatFromSize(srcSize));
++        }
++    }
++
++    /* Convert to Integer (5.7.4.2) */
++
++    /**
++     * Convert floating point to integer. Rounds towards zero.
++     *
++     * @param targetSize size of integer register. 32 or 64.
++     * @param srcSize size of floating point register. 32 or 64.
++     * @param dst general purpose register. May not be null, the zero-register or the stackpointer.
++     * @param src floating point register. May not be null.
++     */
++    public void fcvtzs(int targetSize, int srcSize, Register dst, Register src) {
++        assert !dst.equals(zr) && !dst.equals(sp);
++        assert src.getRegisterCategory().equals(SIMD);
++        fcvtCpuFpuInstruction(FCVTZS, dst, src, generalFromSize(targetSize), floatFromSize(srcSize));
++    }
++
++    /* Convert from Integer (5.7.4.2) */
++    /**
++     * Converts integer to floating point. Uses rounding mode defined by FCPR.
++     *
++     * @param targetSize size of floating point register. 32 or 64.
++     * @param srcSize size of integer register. 32 or 64.
++     * @param dst floating point register. May not be null.
++     * @param src general purpose register. May not be null or the stackpointer.
++     */
++    public void scvtf(int targetSize, int srcSize, Register dst, Register src) {
++        assert dst.getRegisterCategory().equals(SIMD);
++        assert !src.equals(sp);
++        fcvtCpuFpuInstruction(SCVTF, dst, src, floatFromSize(targetSize), generalFromSize(srcSize));
++    }
++
++    private void fcvtCpuFpuInstruction(Instruction instr, Register dst, Register src, InstructionType type1, InstructionType type2) {
++        emitInt(type1.encoding | type2.encoding | instr.encoding | FpConvertOp | rd(dst) | rs1(src));
++    }
++
++    /* Floating-point Round to Integral (5.7.5) */
++
++    /**
++     * Rounds floating-point to integral. Rounds towards zero.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    protected void frintz(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FRINTZ, dst, src, floatFromSize(size));
++    }
++
++    /**
++     * Rounds floating-point to integral. Rounds towards nearest with ties to even.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    public void frintn(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FRINTN, dst, src, floatFromSize(size));
++    }
++
++    /**
++     * Rounds floating-point to integral. Rounds towards minus infinity.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    public void frintm(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FRINTM, dst, src, floatFromSize(size));
++    }
++
++    /**
++     * Rounds floating-point to integral. Rounds towards plus infinity.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    public void frintp(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FRINTP, dst, src, floatFromSize(size));
++    }
++
++    /* Floating-point Arithmetic (1 source) (5.7.6) */
++
++    /**
++     * dst = |src|.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    public void fabs(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FABS, dst, src, floatFromSize(size));
++    }
++
++    /**
++     * dst = -neg.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    public void fneg(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FNEG, dst, src, floatFromSize(size));
++    }
++
++    /**
++     * dst = Sqrt(src).
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src floating point register. May not be null.
++     */
++    public void fsqrt(int size, Register dst, Register src) {
++        fpDataProcessing1Source(FSQRT, dst, src, floatFromSize(size));
++    }
++
++    private void fpDataProcessing1Source(Instruction instr, Register dst, Register src, InstructionType type) {
++        assert dst.getRegisterCategory().equals(SIMD);
++        assert src.getRegisterCategory().equals(SIMD);
++        emitInt(type.encoding | instr.encoding | Fp1SourceOp | rd(dst) | rs1(src));
++    }
++
++    /* Floating-point Arithmetic (2 source) (5.7.7) */
++
++    /**
++     * dst = src1 + src2.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     */
++    public void fadd(int size, Register dst, Register src1, Register src2) {
++        fpDataProcessing2Source(FADD, dst, src1, src2, floatFromSize(size));
++    }
++
++    /**
++     * dst = src1 - src2.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     */
++    public void fsub(int size, Register dst, Register src1, Register src2) {
++        fpDataProcessing2Source(FSUB, dst, src1, src2, floatFromSize(size));
++    }
++
++    /**
++     * dst = src1 * src2.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     */
++    public void fmul(int size, Register dst, Register src1, Register src2) {
++        fpDataProcessing2Source(FMUL, dst, src1, src2, floatFromSize(size));
++    }
++
++    /**
++     * dst = src1 / src2.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     */
++    public void fdiv(int size, Register dst, Register src1, Register src2) {
++        fpDataProcessing2Source(FDIV, dst, src1, src2, floatFromSize(size));
++    }
++
++    private void fpDataProcessing2Source(Instruction instr, Register dst, Register src1, Register src2, InstructionType type) {
++        assert dst.getRegisterCategory().equals(SIMD);
++        assert src1.getRegisterCategory().equals(SIMD);
++        assert src2.getRegisterCategory().equals(SIMD);
++        emitInt(type.encoding | instr.encoding | Fp2SourceOp | rd(dst) | rs1(src1) | rs2(src2));
++    }
++
++    /* Floating-point Multiply-Add (5.7.9) */
++
++    /**
++     * dst = src1 * src2 + src3.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     * @param src3 floating point register. May not be null.
++     */
++    protected void fmadd(int size, Register dst, Register src1, Register src2, Register src3) {
++        fpDataProcessing3Source(FMADD, dst, src1, src2, src3, floatFromSize(size));
++    }
++
++    /**
++     * dst = src3 - src1 * src2.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     * @param src3 floating point register. May not be null.
++     */
++    protected void fmsub(int size, Register dst, Register src1, Register src2, Register src3) {
++        fpDataProcessing3Source(FMSUB, dst, src1, src2, src3, floatFromSize(size));
++    }
++
++    private void fpDataProcessing3Source(Instruction instr, Register dst, Register src1, Register src2, Register src3, InstructionType type) {
++        assert dst.getRegisterCategory().equals(SIMD);
++        assert src1.getRegisterCategory().equals(SIMD);
++        assert src2.getRegisterCategory().equals(SIMD);
++        assert src3.getRegisterCategory().equals(SIMD);
++        emitInt(type.encoding | instr.encoding | Fp3SourceOp | rd(dst) | rs1(src1) | rs2(src2) | rs3(src3));
++    }
++
++    /* Floating-point Comparison (5.7.10) */
++
++    /**
++     * Compares src1 to src2.
++     *
++     * @param size register size.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     */
++    public void fcmp(int size, Register src1, Register src2) {
++        assert src1.getRegisterCategory().equals(SIMD);
++        assert src2.getRegisterCategory().equals(SIMD);
++        InstructionType type = floatFromSize(size);
++        emitInt(type.encoding | FCMP.encoding | FpCmpOp | rs1(src1) | rs2(src2));
++    }
++
++    /**
++     * Conditional compare. NZCV = fcmp(src1, src2) if condition else uimm4.
++     *
++     * @param size register size.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     * @param uimm4 condition flags that are used if condition is false.
++     * @param condition every condition allowed. May not be null.
++     */
++    public void fccmp(int size, Register src1, Register src2, int uimm4, ConditionFlag condition) {
++        assert NumUtil.isUnsignedNbit(4, uimm4);
++        assert src1.getRegisterCategory().equals(SIMD);
++        assert src2.getRegisterCategory().equals(SIMD);
++        InstructionType type = floatFromSize(size);
++        emitInt(type.encoding | FCCMP.encoding | uimm4 | condition.encoding << ConditionalConditionOffset | rs1(src1) | rs2(src2));
++    }
++
++    /**
++     * Compare register to 0.0 .
++     *
++     * @param size register size.
++     * @param src floating point register. May not be null.
++     */
++    public void fcmpZero(int size, Register src) {
++        assert src.getRegisterCategory().equals(SIMD);
++        InstructionType type = floatFromSize(size);
++        emitInt(type.encoding | FCMPZERO.encoding | FpCmpOp | rs1(src));
++    }
++
++    /* Floating-point Conditional Select (5.7.11) */
++
++    /**
++     * Conditional select. dst = src1 if condition else src2.
++     *
++     * @param size register size.
++     * @param dst floating point register. May not be null.
++     * @param src1 floating point register. May not be null.
++     * @param src2 floating point register. May not be null.
++     * @param condition every condition allowed. May not be null.
++     */
++    protected void fcsel(int size, Register dst, Register src1, Register src2, ConditionFlag condition) {
++        assert dst.getRegisterCategory().equals(SIMD);
++        assert src1.getRegisterCategory().equals(SIMD);
++        assert src2.getRegisterCategory().equals(SIMD);
++        InstructionType type = floatFromSize(size);
++        emitInt(type.encoding | FCSEL.encoding | rd(dst) | rs1(src1) | rs2(src2) | condition.encoding << ConditionalConditionOffset);
++    }
++
++    /* Debug exceptions (5.9.1.2) */
++
++    /**
++     * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
++     * UNALLOCATED instruction.
++     *
++     * @param uimm16 Arbitrary 16-bit unsigned payload.
++     */
++    protected void hlt(int uimm16) {
++        exceptionInstruction(HLT, uimm16);
++    }
++
++    /**
++     * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
++     * exception level.
++     *
++     * @param uimm16 Arbitrary 16-bit unsigned payload.
++     */
++    protected void brk(int uimm16) {
++        exceptionInstruction(BRK, uimm16);
++    }
++
++    private void exceptionInstruction(Instruction instr, int uimm16) {
++        assert NumUtil.isUnsignedNbit(16, uimm16);
++        emitInt(instr.encoding | ExceptionOp | uimm16 << SystemImmediateOffset);
++    }
++
++    /* Architectural hints (5.9.4) */
++    public enum SystemHint {
++        NOP(0x0),
++        YIELD(0x1),
++        WFE(0x2),
++        WFI(0x3),
++        SEV(0x4),
++        SEVL(0x5);
++
++        private final int encoding;
++
++        SystemHint(int encoding) {
++            this.encoding = encoding;
++        }
++    }
++
++    /**
++     * Architectural hints.
++     *
++     * @param hint Can be any of the defined hints. May not be null.
++     */
++    protected void hint(SystemHint hint) {
++        emitInt(HINT.encoding | hint.encoding << SystemImmediateOffset);
++    }
++
++    /**
++     * Clear Exclusive: clears the local record of the executing processor that an address has had a
++     * request for an exclusive access.
++     */
++    protected void clrex() {
++        emitInt(CLREX.encoding);
++    }
++
++    /**
++     * Possible barrier definitions for Aarch64. LOAD_LOAD and LOAD_STORE map to the same underlying
++     * barrier.
++     *
++     * We only need synchronization across the inner shareable domain (see B2-90 in the Reference
++     * documentation).
++     */
++    public enum BarrierKind {
++        LOAD_LOAD(0x9, "ISHLD"),
++        LOAD_STORE(0x9, "ISHLD"),
++        STORE_STORE(0xA, "ISHST"),
++        ANY_ANY(0xB, "ISH");
++
++        public final int encoding;
++        public final String optionName;
++
++        BarrierKind(int encoding, String optionName) {
++            this.encoding = encoding;
++            this.optionName = optionName;
++        }
++    }
++
++    /**
++     * Data Memory Barrier.
++     *
++     * @param barrierKind barrier that is issued. May not be null.
++     */
++    public void dmb(BarrierKind barrierKind) {
++        emitInt(DMB.encoding | BarrierOp | barrierKind.encoding << BarrierKindOffset);
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64MacroAssembler.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64MacroAssembler.java
+new file mode 100644
+index 0000000000..65a940f747
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.asm.sw64/src/org/graalvm/compiler/asm/sw64/SW64MacroAssembler.java
+@@ -0,0 +1,1604 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.asm.sw64;
++
++import static org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode.BASE_REGISTER_ONLY;
++import static org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode.EXTENDED_REGISTER_OFFSET;
++import static org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode.IMMEDIATE_SCALED;
++import static org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode.IMMEDIATE_UNSCALED;
++import static org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode.REGISTER_OFFSET;
++import static org.graalvm.compiler.asm.sw64.SW64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_BASE;
++import static org.graalvm.compiler.asm.sw64.SW64MacroAssembler.AddressGenerationPlan.WorkPlan.ADD_TO_INDEX;
++import static org.graalvm.compiler.asm.sw64.SW64MacroAssembler.AddressGenerationPlan.WorkPlan.NO_WORK;
++import static jdk.vm.ci.sw64.SW64.CPU;
++import static jdk.vm.ci.sw64.SW64.r8;
++import static jdk.vm.ci.sw64.SW64.r9;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.sw64.SW64.zr;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.debug.GraalError;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.TargetDescription;
++
++public class SW64MacroAssembler extends SW64Assembler {
++
++    private final ScratchRegister[] scratchRegister = new ScratchRegister[]{new ScratchRegister(r8), new ScratchRegister(r9)};
++
++    // Points to the next free scratch register
++    private int nextFreeScratchRegister = 0;
++
++    public SW64MacroAssembler(TargetDescription target) {
++        super(target);
++    }
++
++    public class ScratchRegister implements AutoCloseable {
++        private final Register register;
++
++        public ScratchRegister(Register register) {
++            this.register = register;
++        }
++
++        public Register getRegister() {
++            return register;
++        }
++
++        @Override
++        public void close() {
++            assert nextFreeScratchRegister > 0 : "Close called too often";
++            nextFreeScratchRegister--;
++        }
++    }
++
++    public ScratchRegister getScratchRegister() {
++        return scratchRegister[nextFreeScratchRegister++];
++    }
++
++    /**
++     * Specifies what actions have to be taken to turn an arbitrary address of the form
++     * {@code base + displacement [+ index [<< scale]]} into a valid SW64Address.
++     */
++    public static class AddressGenerationPlan {
++        public final WorkPlan workPlan;
++        public final SW64Address.AddressingMode addressingMode;
++        public final boolean needsScratch;
++
++        public enum WorkPlan {
++            /**
++             * Can be used as-is without extra work.
++             */
++            NO_WORK,
++            /**
++             * Add scaled displacement to index register.
++             */
++            ADD_TO_INDEX,
++            /**
++             * Add unscaled displacement to base register.
++             */
++            ADD_TO_BASE,
++        }
++
++        /**
++         * @param workPlan Work necessary to generate a valid address.
++         * @param addressingMode Addressing mode of generated address.
++         * @param needsScratch True if generating address needs a scatch register, false otherwise.
++         */
++        public AddressGenerationPlan(WorkPlan workPlan, SW64Address.AddressingMode addressingMode, boolean needsScratch) {
++            this.workPlan = workPlan;
++            this.addressingMode = addressingMode;
++            this.needsScratch = needsScratch;
++        }
++    }
++
++    /**
++     * Generates an addressplan for an address of the form
++     * {@code base + displacement [+ index [<< log2(transferSize)]]} with the index register and
++     * scaling being optional.
++     *
++     * @param displacement an arbitrary displacement.
++     * @param hasIndexRegister true if the address uses an index register, false otherwise. non null
++     * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
++     *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
++     * @return AddressGenerationPlan that specifies the actions necessary to generate a valid
++     *         SW64Address for the given parameters.
++     */
++    public static AddressGenerationPlan generateAddressPlan(long displacement, boolean hasIndexRegister, int transferSize) {
++        assert transferSize == 0 || transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
++        boolean indexScaled = transferSize != 0;
++        int log2Scale = NumUtil.log2Ceil(transferSize);
++        long scaledDisplacement = displacement >> log2Scale;
++        boolean displacementScalable = indexScaled && (displacement & (transferSize - 1)) == 0;
++        if (displacement == 0) {
++            // register offset without any work beforehand.
++            return new AddressGenerationPlan(NO_WORK, REGISTER_OFFSET, false);
++        } else {
++            if (hasIndexRegister) {
++                if (displacementScalable) {
++                    boolean needsScratch = !isArithmeticImmediate(scaledDisplacement);
++                    return new AddressGenerationPlan(ADD_TO_INDEX, REGISTER_OFFSET, needsScratch);
++                } else {
++                    boolean needsScratch = !isArithmeticImmediate(displacement);
++                    return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
++                }
++            } else {
++                if (displacementScalable && NumUtil.isUnsignedNbit(12, scaledDisplacement)) {
++                    return new AddressGenerationPlan(NO_WORK, IMMEDIATE_SCALED, false);
++                } else if (NumUtil.isSignedNbit(9, displacement)) {
++                    return new AddressGenerationPlan(NO_WORK, IMMEDIATE_UNSCALED, false);
++                } else {
++                    boolean needsScratch = !isArithmeticImmediate(displacement);
++                    return new AddressGenerationPlan(ADD_TO_BASE, REGISTER_OFFSET, needsScratch);
++                }
++            }
++        }
++    }
++
++    /**
++     * Returns an SW64Address pointing to
++     * {@code base + displacement + index << log2(transferSize)}.
++     *
++     * @param base general purpose register. May not be null or the zero register.
++     * @param displacement arbitrary displacement added to base.
++     * @param index general purpose register. May not be null or the stack pointer.
++     * @param signExtendIndex if true consider index register a word register that should be
++     *            sign-extended before being added.
++     * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
++     *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
++     * @param additionalReg additional register used either as a scratch register or as part of the
++     *            final address, depending on whether allowOverwrite is true or not. May not be null
++     *            or stackpointer.
++     * @param allowOverwrite if true allows to change value of base or index register to generate
++     *            address.
++     * @return SW64Address pointing to memory at
++     *         {@code base + displacement + index << log2(transferSize)}.
++     */
++    public SW64Address makeAddress(Register base, long displacement, Register index, boolean signExtendIndex, int transferSize, Register additionalReg, boolean allowOverwrite) {
++        AddressGenerationPlan plan = generateAddressPlan(displacement, !index.equals(zr), transferSize);
++        assert allowOverwrite || !zr.equals(additionalReg) || plan.workPlan == NO_WORK;
++        assert !plan.needsScratch || !zr.equals(additionalReg);
++        int log2Scale = NumUtil.log2Ceil(transferSize);
++        long scaledDisplacement = displacement >> log2Scale;
++        Register newIndex = index;
++        Register newBase = base;
++        int immediate;
++        switch (plan.workPlan) {
++            case NO_WORK:
++                if (plan.addressingMode == IMMEDIATE_SCALED) {
++                    immediate = (int) scaledDisplacement;
++                } else {
++                    immediate = (int) displacement;
++                }
++                break;
++            case ADD_TO_INDEX:
++                newIndex = allowOverwrite ? index : additionalReg;
++                assert !newIndex.equals(sp) && !newIndex.equals(zr);
++                if (plan.needsScratch) {
++                    mov(additionalReg, scaledDisplacement);
++                    add(signExtendIndex ? 32 : 64, newIndex, index, additionalReg);
++                } else {
++                    add(signExtendIndex ? 32 : 64, newIndex, index, (int) scaledDisplacement);
++                }
++                immediate = 0;
++                break;
++            case ADD_TO_BASE:
++                newBase = allowOverwrite ? base : additionalReg;
++                assert !newBase.equals(sp) && !newBase.equals(zr);
++                if (plan.needsScratch) {
++                    mov(additionalReg, displacement);
++                    add(64, newBase, base, additionalReg);
++                } else {
++                    add(64, newBase, base, (int) displacement);
++                }
++                immediate = 0;
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++        SW64Address.AddressingMode addressingMode = plan.addressingMode;
++        ExtendType extendType = null;
++        if (addressingMode == REGISTER_OFFSET) {
++            if (newIndex.equals(zr)) {
++                addressingMode = BASE_REGISTER_ONLY;
++            } else if (signExtendIndex) {
++                addressingMode = EXTENDED_REGISTER_OFFSET;
++                extendType = ExtendType.SXTW;
++            }
++        }
++        return SW64Address.createAddress(addressingMode, newBase, newIndex, immediate, transferSize != 0, extendType);
++    }
++
++    /**
++     * Returns an SW64Address pointing to {@code base + displacement}. Specifies the memory
++     * transfer size to allow some optimizations when building the address.
++     *
++     * @param base general purpose register. May not be null or the zero register.
++     * @param displacement arbitrary displacement added to base.
++     * @param transferSize the memory transfer size in bytes.
++     * @param additionalReg additional register used either as a scratch register or as part of the
++     *            final address, depending on whether allowOverwrite is true or not. May not be
++     *            null, zero register or stackpointer.
++     * @param allowOverwrite if true allows to change value of base or index register to generate
++     *            address.
++     * @return SW64Address pointing to memory at {@code base + displacement}.
++     */
++    public SW64Address makeAddress(Register base, long displacement, Register additionalReg, int transferSize, boolean allowOverwrite) {
++        assert additionalReg.getRegisterCategory().equals(CPU);
++        return makeAddress(base, displacement, zr, /* sign-extend */false, transferSize, additionalReg, allowOverwrite);
++    }
++
++    /**
++     * Returns an SW64Address pointing to {@code base + displacement}. Fails if address cannot be
++     * represented without overwriting base register or using a scratch register.
++     *
++     * @param base general purpose register. May not be null or the zero register.
++     * @param displacement arbitrary displacement added to base.
++     * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
++     *            the index register is scaled. If 0 no scaling is assumed. Can be 0, 1, 2, 4 or 8.
++     * @return SW64Address pointing to memory at {@code base + displacement}.
++     */
++    public SW64Address makeAddress(Register base, long displacement, int transferSize) {
++        return makeAddress(base, displacement, zr, /* signExtend */false, transferSize, zr, /* allowOverwrite */false);
++    }
++
++    /**
++     * Loads memory address into register.
++     *
++     * @param dst general purpose register. May not be null, zero-register or stackpointer.
++     * @param address address whose value is loaded into dst. May not be null,
++     *            {@link org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode#IMMEDIATE_POST_INDEXED
++     *            POST_INDEXED} or
++     *            {@link org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode#IMMEDIATE_PRE_INDEXED
++     *            IMMEDIATE_PRE_INDEXED}
++     * @param transferSize the memory transfer size in bytes. The log2 of this specifies how much
++     *            the index register is scaled. Can be 1, 2, 4 or 8.
++     */
++    public void loadAddress(Register dst, SW64Address address, int transferSize) {
++        assert transferSize == 1 || transferSize == 2 || transferSize == 4 || transferSize == 8;
++        assert dst.getRegisterCategory().equals(CPU);
++        int shiftAmt = NumUtil.log2Ceil(transferSize);
++        switch (address.getAddressingMode()) {
++            case IMMEDIATE_SCALED:
++                int scaledImmediate = address.getImmediateRaw() << shiftAmt;
++                int lowerBits = scaledImmediate & NumUtil.getNbitNumberInt(12);
++                int higherBits = scaledImmediate & ~NumUtil.getNbitNumberInt(12);
++                boolean firstAdd = true;
++                if (lowerBits != 0) {
++                    add(64, dst, address.getBase(), lowerBits);
++                    firstAdd = false;
++                }
++                if (higherBits != 0) {
++                    Register src = firstAdd ? address.getBase() : dst;
++                    add(64, dst, src, higherBits);
++                }
++                break;
++            case IMMEDIATE_UNSCALED:
++                int immediate = address.getImmediateRaw();
++                add(64, dst, address.getBase(), immediate);
++                break;
++            case REGISTER_OFFSET:
++                add(64, dst, address.getBase(), address.getOffset(), ShiftType.LSL, address.isScaled() ? shiftAmt : 0);
++                break;
++            case EXTENDED_REGISTER_OFFSET:
++                add(64, dst, address.getBase(), address.getOffset(), address.getExtendType(), address.isScaled() ? shiftAmt : 0);
++                break;
++            case PC_LITERAL: {
++                addressOf(dst);
++                break;
++            }
++            case BASE_REGISTER_ONLY:
++                movx(dst, address.getBase());
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    public void movx(Register dst, Register src) {
++        mov(64, dst, src);
++    }
++
++    public void mov(int size, Register dst, Register src) {
++        if (dst.equals(sp) || src.equals(sp)) {
++            add(size, dst, src, 0);
++        } else {
++            or(size, dst, zr, src);
++        }
++    }
++
++    /**
++     * Generates a 64-bit immediate move code sequence.
++     *
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param imm
++     */
++    private void mov64(Register dst, long imm) {
++        // We have to move all non zero parts of the immediate in 16-bit chunks
++        boolean firstMove = true;
++        for (int offset = 0; offset < 64; offset += 16) {
++            int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
++            if (chunk == 0) {
++                continue;
++            }
++            if (firstMove) {
++                movz(64, dst, chunk, offset);
++                firstMove = false;
++            } else {
++                movk(64, dst, chunk, offset);
++            }
++        }
++        assert !firstMove;
++    }
++
++    /**
++     * Loads immediate into register.
++     *
++     * @param dst general purpose register. May not be null, zero-register or stackpointer.
++     * @param imm immediate loaded into register.
++     */
++    public void mov(Register dst, long imm) {
++        assert dst.getRegisterCategory().equals(CPU);
++        if (imm == 0L) {
++            movx(dst, zr);
++        } else if (LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
++            or(64, dst, zr, imm);
++        } else if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
++            // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
++            // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
++            // then sign extend
++            // them. This allows us to cover immediates like ~1L with 2 instructions.
++            mov(dst, (int) imm);
++            sxt(64, 32, dst, dst);
++        } else {
++            mov64(dst, imm);
++        }
++    }
++
++    /**
++     * Loads immediate into register.
++     *
++     * @param dst general purpose register. May not be null, zero-register or stackpointer.
++     * @param imm immediate loaded into register.
++     */
++    public void mov(Register dst, int imm) {
++        mov(dst, imm & 0xFFFF_FFFFL);
++    }
++
++    /**
++     * Generates a 48-bit immediate move code sequence. The immediate may later be updated by
++     * HotSpot.
++     *
++     * In SW64 mode the virtual address space is 48-bits in size, so we only need three
++     * instructions to create a patchable instruction sequence that can reach anywhere.
++     *
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param imm
++     */
++    public void movNativeAddress(Register dst, long imm) {
++        assert (imm & 0xFFFF_0000_0000_0000L) == 0;
++        // We have to move all non zero parts of the immediate in 16-bit chunks
++        boolean firstMove = true;
++        for (int offset = 0; offset < 48; offset += 16) {
++            int chunk = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
++            if (firstMove) {
++                movz(64, dst, chunk, offset);
++                firstMove = false;
++            } else {
++                movk(64, dst, chunk, offset);
++            }
++        }
++        assert !firstMove;
++    }
++
++    /**
++     * Generates a 32-bit immediate move code sequence. The immediate may later be updated by
++     * HotSpot.
++     *
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param imm
++     */
++    public void movNarrowAddress(Register dst, long imm) {
++        assert (imm & 0xFFFF_FFFF_0000_0000L) == 0;
++        movz(64, dst, (int) (imm >>> 16), 16);
++        movk(64, dst, (int) (imm & 0xffff), 0);
++    }
++
++    /**
++     * @return Number of instructions necessary to load immediate into register.
++     */
++    public static int nrInstructionsToMoveImmediate(long imm) {
++        if (imm == 0L || LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO) {
++            return 1;
++        }
++        if (imm >> 32 == -1L && (int) imm < 0 && LogicalImmediateTable.isRepresentable((int) imm) != LogicalImmediateTable.Representable.NO) {
++            // If the higher 32-bit are 1s and the sign bit of the lower 32-bits is set *and* we can
++            // represent the lower 32 bits as a logical immediate we can create the lower 32-bit and
++            // then sign extend
++            // them. This allows us to cover immediates like ~1L with 2 instructions.
++            return 2;
++        }
++        int nrInstructions = 0;
++        for (int offset = 0; offset < 64; offset += 16) {
++            int part = (int) (imm >> offset) & NumUtil.getNbitNumberInt(16);
++            if (part != 0) {
++                nrInstructions++;
++            }
++        }
++        return nrInstructions;
++    }
++
++    /**
++     * Loads a srcSize value from address into rt sign-extending it if necessary.
++     *
++     * @param targetSize size of target register in bits. Must be 32 or 64.
++     * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
++     *            targetSize.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param address all addressing modes allowed. May not be null.
++     */
++    @Override
++    public void ldrs(int targetSize, int srcSize, Register rt, SW64Address address) {
++        assert targetSize == 32 || targetSize == 64;
++        assert srcSize <= targetSize;
++        if (targetSize == srcSize) {
++            super.ldr(srcSize, rt, address);
++        } else {
++            super.ldrs(targetSize, srcSize, rt, address);
++        }
++    }
++
++    /**
++     * Loads a srcSize value from address into rt zero-extending it if necessary.
++     *
++     * @param srcSize size of memory read in bits. Must be 8, 16 or 32 and smaller or equal to
++     *            targetSize.
++     * @param rt general purpose register. May not be null or stackpointer.
++     * @param address all addressing modes allowed. May not be null.
++     */
++    @Override
++    public void ldr(int srcSize, Register rt, SW64Address address) {
++        super.ldr(srcSize, rt, address);
++    }
++
++    /**
++     * Conditional move. dst = src1 if condition else src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param result general purpose register. May not be null or the stackpointer.
++     * @param trueValue general purpose register. May not be null or the stackpointer.
++     * @param falseValue general purpose register. May not be null or the stackpointer.
++     * @param cond any condition flag. May not be null.
++     */
++    public void cmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag cond) {
++        super.csel(size, result, trueValue, falseValue, cond);
++    }
++
++    /**
++     * Conditional set. dst = 1 if condition else 0.
++     *
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param condition any condition. May not be null.
++     */
++    public void cset(Register dst, ConditionFlag condition) {
++        super.csinc(32, dst, zr, zr, condition.negate());
++    }
++
++    /**
++     * dst = src1 + src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null.
++     * @param src1 general purpose register. May not be null.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    public void add(int size, Register dst, Register src1, Register src2) {
++        if (dst.equals(sp) || src1.equals(sp)) {
++            super.add(size, dst, src1, src2, ExtendType.UXTX, 0);
++        } else {
++            super.add(size, dst, src1, src2, ShiftType.LSL, 0);
++        }
++    }
++
++    /**
++     * dst = src1 + src2 and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null.
++     * @param src1 general purpose register. May not be null.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    public void adds(int size, Register dst, Register src1, Register src2) {
++        if (dst.equals(sp) || src1.equals(sp)) {
++            super.adds(size, dst, src1, src2, ExtendType.UXTX, 0);
++        } else {
++            super.adds(size, dst, src1, src2, ShiftType.LSL, 0);
++        }
++    }
++
++    /**
++     * dst = src1 - src2 and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null.
++     * @param src1 general purpose register. May not be null.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    public void subs(int size, Register dst, Register src1, Register src2) {
++        if (dst.equals(sp) || src1.equals(sp)) {
++            super.subs(size, dst, src1, src2, ExtendType.UXTX, 0);
++        } else {
++            super.subs(size, dst, src1, src2, ShiftType.LSL, 0);
++        }
++    }
++
++    /**
++     * dst = src1 - src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null.
++     * @param src1 general purpose register. May not be null.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    public void sub(int size, Register dst, Register src1, Register src2) {
++        if (dst.equals(sp) || src1.equals(sp)) {
++            super.sub(size, dst, src1, src2, ExtendType.UXTX, 0);
++        } else {
++            super.sub(size, dst, src1, src2, ShiftType.LSL, 0);
++        }
++    }
++
++    /**
++     * dst = src1 + shiftType(src2, shiftAmt & (size - 1)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType any type but ROR.
++     * @param shiftAmt arbitrary shift amount.
++     */
++    @Override
++    public void add(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        int shift = clampShiftAmt(size, shiftAmt);
++        super.add(size, dst, src1, src2, shiftType, shift);
++    }
++
++    /**
++     * dst = src1 + shiftType(src2, shiftAmt & (size-1)) and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     * @param shiftType any type but ROR.
++     * @param shiftAmt arbitrary shift amount.
++     */
++    @Override
++    public void sub(int size, Register dst, Register src1, Register src2, ShiftType shiftType, int shiftAmt) {
++        int shift = clampShiftAmt(size, shiftAmt);
++        super.sub(size, dst, src1, src2, shiftType, shift);
++    }
++
++    /**
++     * dst = -src1.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or stackpointer.
++     */
++    public void neg(int size, Register dst, Register src) {
++        sub(size, dst, zr, src);
++    }
++
++    /**
++     * dst = src + immediate.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param immediate 32-bit signed int
++     */
++    @Override
++    public void add(int size, Register dst, Register src, int immediate) {
++        assert (!dst.equals(zr) && !src.equals(zr));
++        if (immediate < 0) {
++            sub(size, dst, src, -immediate);
++        } else if (isAimm(immediate)) {
++            if (!(dst.equals(src) && immediate == 0)) {
++                super.add(size, dst, src, immediate);
++            }
++        } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
++            super.add(size, dst, src, immediate & -(1 << 12));
++            super.add(size, dst, dst, immediate & ((1 << 12) - 1));
++        } else {
++            assert !dst.equals(src);
++            mov(dst, immediate);
++            add(size, src, dst, dst);
++        }
++    }
++
++    /**
++     * dst = src + immediate.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param immediate 64-bit signed int
++     */
++    public void add(int size, Register dst, Register src, long immediate) {
++        if (NumUtil.isInt(immediate)) {
++            add(size, dst, src, (int) immediate);
++        } else {
++            assert (!dst.equals(zr) && !src.equals(zr));
++            assert !dst.equals(src);
++            assert size == 64;
++            mov(dst, immediate);
++            add(size, src, dst, dst);
++        }
++    }
++
++    /**
++     * dst = src + aimm and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param immediate arithmetic immediate.
++     */
++    @Override
++    public void adds(int size, Register dst, Register src, int immediate) {
++        assert (!dst.equals(sp) && !src.equals(zr));
++        if (immediate < 0) {
++            subs(size, dst, src, -immediate);
++        } else if (!(dst.equals(src) && immediate == 0)) {
++            super.adds(size, dst, src, immediate);
++        }
++    }
++
++    /**
++     * dst = src - immediate.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param immediate 32-bit signed int
++     */
++    @Override
++    public void sub(int size, Register dst, Register src, int immediate) {
++        assert (!dst.equals(zr) && !src.equals(zr));
++        if (immediate < 0) {
++            add(size, dst, src, -immediate);
++        } else if (isAimm(immediate)) {
++            if (!(dst.equals(src) && immediate == 0)) {
++                super.sub(size, dst, src, immediate);
++            }
++        } else if (immediate >= -(1 << 24) && immediate < (1 << 24)) {
++            super.sub(size, dst, src, immediate & -(1 << 12));
++            super.sub(size, dst, dst, immediate & ((1 << 12) - 1));
++        } else {
++            assert !dst.equals(src);
++            mov(dst, immediate);
++            sub(size, src, dst, dst);
++        }
++    }
++
++    /**
++     * dst = src - aimm and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or zero-register.
++     * @param immediate arithmetic immediate.
++     */
++    @Override
++    public void subs(int size, Register dst, Register src, int immediate) {
++        assert (!dst.equals(sp) && !src.equals(zr));
++        if (immediate < 0) {
++            adds(size, dst, src, -immediate);
++        } else if (!dst.equals(src) || immediate != 0) {
++            super.subs(size, dst, src, immediate);
++        }
++    }
++
++    /**
++     * dst = src1 * src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     */
++    public void mul(int size, Register dst, Register src1, Register src2) {
++        super.madd(size, dst, src1, src2, zr);
++    }
++
++    /**
++     * unsigned multiply high. dst = (src1 * src2) >> size
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     */
++    public void umulh(int size, Register dst, Register src1, Register src2) {
++        assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
++        assert size == 32 || size == 64;
++        if (size == 64) {
++            super.umulh(dst, src1, src2);
++        } else {
++            // xDst = wSrc1 * wSrc2
++            super.umaddl(dst, src1, src2, zr);
++            // xDst = xDst >> 32
++            lshr(64, dst, dst, 32);
++        }
++    }
++
++    /**
++     * signed multiply high. dst = (src1 * src2) >> size
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src1 general purpose register. May not be null or the stackpointer.
++     * @param src2 general purpose register. May not be null or the stackpointer.
++     */
++    public void smulh(int size, Register dst, Register src1, Register src2) {
++        assert (!dst.equals(sp) && !src1.equals(sp) && !src2.equals(sp));
++        assert size == 32 || size == 64;
++        if (size == 64) {
++            super.smulh(dst, src1, src2);
++        } else {
++            // xDst = wSrc1 * wSrc2
++            super.smaddl(dst, src1, src2, zr);
++            // xDst = xDst >> 32
++            lshr(64, dst, dst, 32);
++        }
++    }
++
++    /**
++     * dst = src1 % src2. Signed.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param n numerator. General purpose register. May not be null or the stackpointer.
++     * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
++     */
++    public void rem(int size, Register dst, Register n, Register d) {
++        assert (!dst.equals(sp) && !n.equals(sp) && !d.equals(sp));
++        // There is no irem or similar instruction. Instead we use the relation:
++        // n % d = n - Floor(n / d) * d if nd >= 0
++        // n % d = n - Ceil(n / d) * d else
++        // Which is equivalent to n - TruncatingDivision(n, d) * d
++        super.sdiv(size, dst, n, d);
++        super.msub(size, dst, dst, d, n);
++    }
++
++    /**
++     * dst = src1 % src2. Unsigned.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param n numerator. General purpose register. May not be null or the stackpointer.
++     * @param d denominator. General purpose register. Divisor May not be null or the stackpointer.
++     */
++    public void urem(int size, Register dst, Register n, Register d) {
++        // There is no irem or similar instruction. Instead we use the relation:
++        // n % d = n - Floor(n / d) * d
++        // Which is equivalent to n - TruncatingDivision(n, d) * d
++        super.udiv(size, dst, n, d);
++        super.msub(size, dst, dst, d, n);
++    }
++
++    /**
++     * Add/subtract instruction encoding supports 12-bit immediate values.
++     *
++     * @param imm immediate value to be tested.
++     * @return true if immediate can be used directly for arithmetic instructions (add/sub), false
++     *         otherwise.
++     */
++    public static boolean isArithmeticImmediate(long imm) {
++        // If we have a negative immediate we just use the opposite operator. I.e.: x - (-5) == x +
++        // 5.
++        return NumUtil.isInt(Math.abs(imm)) && isAimm((int) Math.abs(imm));
++    }
++
++    /**
++     * Compare instructions are add/subtract instructions and so support 12-bit immediate values.
++     *
++     * @param imm immediate value to be tested.
++     * @return true if immediate can be used directly with comparison instructions, false otherwise.
++     */
++    public static boolean isComparisonImmediate(long imm) {
++        return isArithmeticImmediate(imm);
++    }
++
++    /**
++     * Move wide immediate instruction encoding supports 16-bit immediate values which can be
++     * optionally-shifted by multiples of 16 (i.e. 0, 16, 32, 48).
++     *
++     * @return true if immediate can be moved directly into a register, false otherwise.
++     */
++    public static boolean isMovableImmediate(long imm) {
++        // // Positions of first, respectively last set bit.
++        // int start = Long.numberOfTrailingZeros(imm);
++        // int end = 64 - Long.numberOfLeadingZeros(imm);
++        // int length = end - start;
++        // if (length > 16) {
++        // return false;
++        // }
++        // // We can shift the necessary part of the immediate (i.e. everything between the first
++        // and
++        // // last set bit) by as much as 16 - length around to arrive at a valid shift amount
++        // int tolerance = 16 - length;
++        // int prevMultiple = NumUtil.roundDown(start, 16);
++        // int nextMultiple = NumUtil.roundUp(start, 16);
++        // return start - prevMultiple <= tolerance || nextMultiple - start <= tolerance;
++        /*
++         * This is a bit optimistic because the constant could also be for an arithmetic instruction
++         * which only supports 12-bits. That case needs to be handled in the backend.
++         */
++        return NumUtil.isInt(Math.abs(imm)) && NumUtil.isUnsignedNbit(16, (int) Math.abs(imm));
++    }
++
++    /**
++     * dst = src << (shiftAmt & (size - 1)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param src general purpose register. May not be null, stackpointer or zero-register.
++     * @param shiftAmt amount by which src is shifted.
++     */
++    public void shl(int size, Register dst, Register src, long shiftAmt) {
++        int shift = clampShiftAmt(size, shiftAmt);
++        super.ubfm(size, dst, src, (size - shift) & (size - 1), size - 1 - shift);
++    }
++
++    /**
++     * dst = src1 << (src2 & (size - 1)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or stackpointer.
++     * @param shift general purpose register. May not be null or stackpointer.
++     */
++    public void shl(int size, Register dst, Register src, Register shift) {
++        super.lsl(size, dst, src, shift);
++    }
++
++    /**
++     * dst = src >>> (shiftAmt & (size - 1)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param src general purpose register. May not be null, stackpointer or zero-register.
++     * @param shiftAmt amount by which src is shifted.
++     */
++    public void lshr(int size, Register dst, Register src, long shiftAmt) {
++        int shift = clampShiftAmt(size, shiftAmt);
++        super.ubfm(size, dst, src, shift, size - 1);
++    }
++
++    /**
++     * dst = src1 >>> (src2 & (size - 1)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or stackpointer.
++     * @param shift general purpose register. May not be null or stackpointer.
++     */
++    public void lshr(int size, Register dst, Register src, Register shift) {
++        super.lsr(size, dst, src, shift);
++    }
++
++    /**
++     * dst = src >> (shiftAmt & log2(size)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param src general purpose register. May not be null, stackpointer or zero-register.
++     * @param shiftAmt amount by which src is shifted.
++     */
++    public void ashr(int size, Register dst, Register src, long shiftAmt) {
++        int shift = clampShiftAmt(size, shiftAmt);
++        super.sbfm(size, dst, src, shift, size - 1);
++    }
++
++    /**
++     * dst = src1 >> (src2 & log2(size)).
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or stackpointer.
++     * @param shift general purpose register. May not be null or stackpointer.
++     */
++    public void ashr(int size, Register dst, Register src, Register shift) {
++        super.asr(size, dst, src, shift);
++    }
++
++    /**
++     * Clamps shiftAmt into range 0 <= shiftamt < size according to JLS.
++     *
++     * @param size size of operation.
++     * @param shiftAmt arbitrary shift amount.
++     * @return value between 0 and size - 1 inclusive that is equivalent to shiftAmt according to
++     *         JLS.
++     */
++    private static int clampShiftAmt(int size, long shiftAmt) {
++        return (int) (shiftAmt & (size - 1));
++    }
++
++    /**
++     * dst = src1 & src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    public void and(int size, Register dst, Register src1, Register src2) {
++        super.and(size, dst, src1, src2, ShiftType.LSL, 0);
++    }
++
++    /**
++     * dst = src1 ^ src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    public void eor(int size, Register dst, Register src1, Register src2) {
++        super.eor(size, dst, src1, src2, ShiftType.LSL, 0);
++    }
++
++    /**
++     * dst = src1 | src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src1 general purpose register. May not be null or stackpointer.
++     * @param src2 general purpose register. May not be null or stackpointer.
++     */
++    public void or(int size, Register dst, Register src1, Register src2) {
++        super.orr(size, dst, src1, src2, ShiftType.LSL, 0);
++    }
++
++    /**
++     * dst = src | bimm.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or zero-register.
++     * @param src general purpose register. May not be null or stack-pointer.
++     * @param bimm logical immediate. See {@link SW64Assembler.LogicalImmediateTable} for exact
++     *            definition.
++     */
++    public void or(int size, Register dst, Register src, long bimm) {
++        super.orr(size, dst, src, bimm);
++    }
++
++    /**
++     * dst = ~src.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stackpointer.
++     * @param src general purpose register. May not be null or stackpointer.
++     */
++    public void not(int size, Register dst, Register src) {
++        super.orn(size, dst, zr, src, ShiftType.LSL, 0);
++    }
++
++    /**
++     * Sign-extend value from src into dst.
++     *
++     * @param destSize destination register size. Must be 32 or 64.
++     * @param srcSize source register size. Must be smaller than destSize.
++     * @param dst general purpose register. May not be null, stackpointer or zero-register.
++     * @param src general purpose register. May not be null, stackpointer or zero-register.
++     */
++    public void sxt(int destSize, int srcSize, Register dst, Register src) {
++        assert (srcSize < destSize && srcSize > 0);
++        super.sbfm(destSize, dst, src, 0, srcSize - 1);
++    }
++
++    /**
++     * dst = src if condition else -src.
++     *
++     * @param size register size. Must be 32 or 64.
++     * @param dst general purpose register. May not be null or the stackpointer.
++     * @param src general purpose register. May not be null or the stackpointer.
++     * @param condition any condition except AV or NV. May not be null.
++     */
++    public void csneg(int size, Register dst, Register src, ConditionFlag condition) {
++        super.csneg(size, dst, src, src, condition.negate());
++    }
++
++    /**
++     * @return True if the immediate can be used directly for logical 64-bit instructions.
++     */
++    public static boolean isLogicalImmediate(long imm) {
++        return LogicalImmediateTable.isRepresentable(true, imm) != LogicalImmediateTable.Representable.NO;
++    }
++
++    /**
++     * @return True if the immediate can be used directly for logical 32-bit instructions.
++     */
++    public static boolean isLogicalImmediate(int imm) {
++        return LogicalImmediateTable.isRepresentable(imm) == LogicalImmediateTable.Representable.YES;
++    }
++
++    /* Float instructions */
++
++    /**
++     * Moves integer to float, float to integer, or float to float. Does not support integer to
++     * integer moves.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst Either floating-point or general-purpose register. If general-purpose register may
++     *            not be stackpointer or zero register. Cannot be null in any case.
++     * @param src Either floating-point or general-purpose register. If general-purpose register may
++     *            not be stackpointer. Cannot be null in any case.
++     */
++    @Override
++    public void fmov(int size, Register dst, Register src) {
++        assert !(dst.getRegisterCategory().equals(CPU) && src.getRegisterCategory().equals(CPU)) : "src and dst cannot both be integer registers.";
++        if (dst.getRegisterCategory().equals(CPU)) {
++            super.fmovFpu2Cpu(size, dst, src);
++        } else if (src.getRegisterCategory().equals(CPU)) {
++            super.fmovCpu2Fpu(size, dst, src);
++        } else {
++            super.fmov(size, dst, src);
++        }
++    }
++
++    /**
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst floating point register. May not be null.
++     * @param imm immediate that is loaded into dst. If size is 32 only float immediates can be
++     *            loaded, i.e. (float) imm == imm must be true. In all cases
++     *            {@code isFloatImmediate}, respectively {@code #isDoubleImmediate} must be true
++     *            depending on size.
++     */
++    @Override
++    public void fmov(int size, Register dst, double imm) {
++        if (imm == 0.0) {
++            assert Double.doubleToRawLongBits(imm) == 0L : "-0.0 is no valid immediate.";
++            super.fmovCpu2Fpu(size, dst, zr);
++        } else {
++            super.fmov(size, dst, imm);
++        }
++    }
++
++    /**
++     *
++     * @return true if immediate can be loaded directly into floating-point register, false
++     *         otherwise.
++     */
++    public static boolean isDoubleImmediate(double imm) {
++        return Double.doubleToRawLongBits(imm) == 0L || SW64Assembler.isDoubleImmediate(imm);
++    }
++
++    /**
++     *
++     * @return true if immediate can be loaded directly into floating-point register, false
++     *         otherwise.
++     */
++    public static boolean isFloatImmediate(float imm) {
++        return Float.floatToRawIntBits(imm) == 0 || SW64Assembler.isFloatImmediate(imm);
++    }
++
++    /**
++     * Conditional move. dst = src1 if condition else src2.
++     *
++     * @param size register size.
++     * @param result floating point register. May not be null.
++     * @param trueValue floating point register. May not be null.
++     * @param falseValue floating point register. May not be null.
++     * @param condition every condition allowed. May not be null.
++     */
++    public void fcmov(int size, Register result, Register trueValue, Register falseValue, ConditionFlag condition) {
++        super.fcsel(size, result, trueValue, falseValue, condition);
++    }
++
++    /**
++     * dst = src1 % src2.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst floating-point register. May not be null.
++     * @param n numerator. Floating-point register. May not be null.
++     * @param d denominator. Floating-point register. May not be null.
++     */
++    public void frem(int size, Register dst, Register n, Register d) {
++        // There is no frem instruction, instead we compute the remainder using the relation:
++        // rem = n - Truncating(n / d) * d
++        super.fdiv(size, dst, n, d);
++        super.frintz(size, dst, dst);
++        super.fmsub(size, dst, dst, d, n);
++    }
++
++    /* Branches */
++
++    /**
++     * Compares x and y and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param x general purpose register. May not be null or stackpointer.
++     * @param y general purpose register. May not be null or stackpointer.
++     */
++    public void cmp(int size, Register x, Register y) {
++        assert size == 32 || size == 64;
++        super.subs(size, zr, x, y, ShiftType.LSL, 0);
++    }
++
++    /**
++     * Compares x to y and sets condition flags.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param x general purpose register. May not be null or stackpointer.
++     * @param y comparison immediate, {@link #isComparisonImmediate(long)} has to be true for it.
++     */
++    public void cmp(int size, Register x, int y) {
++        assert size == 32 || size == 64;
++        if (y < 0) {
++            super.adds(size, zr, x, -y);
++        } else {
++            super.subs(size, zr, x, y);
++        }
++    }
++
++    /**
++     * Sets condition flags according to result of x & y.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stack-pointer.
++     * @param x general purpose register. May not be null or stackpointer.
++     * @param y general purpose register. May not be null or stackpointer.
++     */
++    public void ands(int size, Register dst, Register x, Register y) {
++        super.ands(size, dst, x, y, ShiftType.LSL, 0);
++    }
++
++    /**
++     * Sets overflow flag according to result of x * y.
++     *
++     * @param size register size. Has to be 32 or 64.
++     * @param dst general purpose register. May not be null or stack-pointer.
++     * @param x general purpose register. May not be null or stackpointer.
++     * @param y general purpose register. May not be null or stackpointer.
++     */
++    public void mulvs(int size, Register dst, Register x, Register y) {
++        try (ScratchRegister sc1 = getScratchRegister();
++                        ScratchRegister sc2 = getScratchRegister()) {
++            switch (size) {
++                case 64: {
++                    // Be careful with registers: it's possible that x, y, and dst are the same
++                    // register.
++                    Register rscratch1 = sc1.getRegister();
++                    Register rscratch2 = sc2.getRegister();
++                    mul(64, rscratch1, x, y);     // Result bits 0..63
++                    smulh(64, rscratch2, x, y);  // Result bits 64..127
++                    // Top is pure sign ext
++                    subs(64, zr, rscratch2, rscratch1, ShiftType.ASR, 63);
++                    // Copy all 64 bits of the result into dst
++                    mov(64, dst, rscratch1);
++                    mov(rscratch1, 0x80000000);
++                    // Develop 0 (EQ), or 0x80000000 (NE)
++                    cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
++                    cmp(32, rscratch1, 1);
++                    // 0x80000000 - 1 => VS
++                    break;
++                }
++                case 32: {
++                    Register rscratch1 = sc1.getRegister();
++                    smaddl(rscratch1, x, y, zr);
++                    // Copy the low 32 bits of the result into dst
++                    mov(32, dst, rscratch1);
++                    subs(64, zr, rscratch1, rscratch1, ExtendType.SXTW, 0);
++                    // NE => overflow
++                    mov(rscratch1, 0x80000000);
++                    // Develop 0 (EQ), or 0x80000000 (NE)
++                    cmov(32, rscratch1, rscratch1, zr, ConditionFlag.NE);
++                    cmp(32, rscratch1, 1);
++                    // 0x80000000 - 1 => VS
++                    break;
++                }
++            }
++        }
++    }
++
++    /**
++     * When patching up Labels we have to know what kind of code to generate.
++     */
++    public enum PatchLabelKind {
++        BRANCH_CONDITIONALLY(0x0),
++        BRANCH_UNCONDITIONALLY(0x1),
++        BRANCH_NONZERO(0x2),
++        BRANCH_ZERO(0x3),
++        BRANCH_BIT_NONZERO(0x4),
++        BRANCH_BIT_ZERO(0x5),
++        JUMP_ADDRESS(0x6),
++        ADR(0x7);
++
++        /**
++         * Offset by which additional information for branch conditionally, branch zero and branch
++         * non zero has to be shifted.
++         */
++        public static final int INFORMATION_OFFSET = 5;
++
++        public final int encoding;
++
++        PatchLabelKind(int encoding) {
++            this.encoding = encoding;
++        }
++
++        /**
++         * @return PatchLabelKind with given encoding.
++         */
++        private static PatchLabelKind fromEncoding(int encoding) {
++            return values()[encoding & NumUtil.getNbitNumberInt(INFORMATION_OFFSET)];
++        }
++
++    }
++
++    public void adr(Register dst, Label label) {
++        // TODO Handle case where offset is too large for a single jump instruction
++        if (label.isBound()) {
++            int offset = label.position() - position();
++            super.adr(dst, offset);
++        } else {
++            label.addPatchAt(position());
++            // Encode condition flag so that we know how to patch the instruction later
++            emitInt(PatchLabelKind.ADR.encoding | dst.encoding << PatchLabelKind.INFORMATION_OFFSET);
++        }
++    }
++
++    /**
++     * Compare register and branch if non-zero.
++     *
++     * @param size Instruction size in bits. Should be either 32 or 64.
++     * @param cmp general purpose register. May not be null, zero-register or stackpointer.
++     * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
++     */
++    public void cbnz(int size, Register cmp, Label label) {
++        // TODO Handle case where offset is too large for a single jump instruction
++        if (label.isBound()) {
++            int offset = label.position() - position();
++            super.cbnz(size, cmp, offset);
++        } else {
++            label.addPatchAt(position());
++            int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
++            int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
++            // Encode condition flag so that we know how to patch the instruction later
++            emitInt(PatchLabelKind.BRANCH_NONZERO.encoding | regEncoding | sizeEncoding);
++        }
++    }
++
++    /**
++     * Compare register and branch if zero.
++     *
++     * @param size Instruction size in bits. Should be either 32 or 64.
++     * @param cmp general purpose register. May not be null, zero-register or stackpointer.
++     * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
++     */
++    public void cbz(int size, Register cmp, Label label) {
++        // TODO Handle case where offset is too large for a single jump instruction
++        if (label.isBound()) {
++            int offset = label.position() - position();
++            super.cbz(size, cmp, offset);
++        } else {
++            label.addPatchAt(position());
++            int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 1);
++            int sizeEncoding = (size == 64 ? 1 : 0) << PatchLabelKind.INFORMATION_OFFSET;
++            // Encode condition flag so that we know how to patch the instruction later
++            emitInt(PatchLabelKind.BRANCH_ZERO.encoding | regEncoding | sizeEncoding);
++        }
++    }
++
++    /**
++     * Test a single bit and branch if the bit is nonzero.
++     *
++     * @param cmp general purpose register. May not be null, zero-register or stackpointer.
++     * @param uimm6 Unsigned 6-bit bit index.
++     * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
++     */
++    public void tbnz(Register cmp, int uimm6, Label label) {
++        assert NumUtil.isUnsignedNbit(6, uimm6);
++        if (label.isBound()) {
++            int offset = label.position() - position();
++            super.tbnz(cmp, uimm6, offset);
++        } else {
++            label.addPatchAt(position());
++            int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
++            int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
++            emitInt(PatchLabelKind.BRANCH_BIT_NONZERO.encoding | indexEncoding | regEncoding);
++        }
++    }
++
++    /**
++     * Test a single bit and branch if the bit is zero.
++     *
++     * @param cmp general purpose register. May not be null, zero-register or stackpointer.
++     * @param uimm6 Unsigned 6-bit bit index.
++     * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
++     */
++    public void tbz(Register cmp, int uimm6, Label label) {
++        assert NumUtil.isUnsignedNbit(6, uimm6);
++        if (label.isBound()) {
++            int offset = label.position() - position();
++            super.tbz(cmp, uimm6, offset);
++        } else {
++            label.addPatchAt(position());
++            int indexEncoding = uimm6 << PatchLabelKind.INFORMATION_OFFSET;
++            int regEncoding = cmp.encoding << (PatchLabelKind.INFORMATION_OFFSET + 6);
++            emitInt(PatchLabelKind.BRANCH_BIT_ZERO.encoding | indexEncoding | regEncoding);
++        }
++    }
++
++    /**
++     * Branches to label if condition is true.
++     *
++     * @param condition any condition value allowed. Non null.
++     * @param label Can only handle 21-bit word-aligned offsets for now. May be unbound. Non null.
++     */
++    public void branchConditionally(ConditionFlag condition, Label label) {
++        // TODO Handle case where offset is too large for a single jump instruction
++        if (label.isBound()) {
++            int offset = label.position() - position();
++            super.b(condition, offset);
++        } else {
++            label.addPatchAt(position());
++            // Encode condition flag so that we know how to patch the instruction later
++            emitInt(PatchLabelKind.BRANCH_CONDITIONALLY.encoding | condition.encoding << PatchLabelKind.INFORMATION_OFFSET);
++        }
++    }
++
++    /**
++     * Branches if condition is true. Address of jump is patched up by HotSpot c++ code.
++     *
++     * @param condition any condition value allowed. Non null.
++     */
++    public void branchConditionally(ConditionFlag condition) {
++        // Correct offset is fixed up by HotSpot later.
++        super.b(condition, 0);
++    }
++
++    /**
++     * Jumps to label.
++     *
++     * param label Can only handle signed 28-bit offsets. May be unbound. Non null.
++     */
++    @Override
++    public void jmp(Label label) {
++        // TODO Handle case where offset is too large for a single jump instruction
++        if (label.isBound()) {
++            int offset = label.position() - position();
++            super.b(offset);
++        } else {
++            label.addPatchAt(position());
++            emitInt(PatchLabelKind.BRANCH_UNCONDITIONALLY.encoding);
++        }
++    }
++
++    /**
++     * Jump to address in dest.
++     *
++     * @param dest General purpose register. May not be null, zero-register or stackpointer.
++     */
++    public void jmp(Register dest) {
++        super.br(dest);
++    }
++
++    /**
++     * Immediate jump instruction fixed up by HotSpot c++ code.
++     */
++    public void jmp() {
++        // Offset has to be fixed up by c++ code.
++        super.b(0);
++    }
++
++    /**
++     *
++     * @return true if immediate offset can be used in a single branch instruction.
++     */
++    public static boolean isBranchImmediateOffset(long imm) {
++        return NumUtil.isSignedNbit(28, imm);
++    }
++
++    /* system instructions */
++
++    /**
++     * Exception codes used when calling hlt instruction.
++     */
++    public enum SW64ExceptionCode {
++        NO_SWITCH_TARGET(0x0),
++        BREAKPOINT(0x1);
++
++        public final int encoding;
++
++        SW64ExceptionCode(int encoding) {
++            this.encoding = encoding;
++        }
++    }
++
++    /**
++     * Halting mode software breakpoint: Enters halting mode debug state if enabled, else treated as
++     * UNALLOCATED instruction.
++     *
++     * @param exceptionCode exception code specifying why halt was called. Non null.
++     */
++    public void hlt(SW64ExceptionCode exceptionCode) {
++        super.hlt(exceptionCode.encoding);
++    }
++
++    /**
++     * Monitor mode software breakpoint: exception routed to a debug monitor executing in a higher
++     * exception level.
++     *
++     * @param exceptionCode exception code specifying why break was called. Non null.
++     */
++    public void brk(SW64ExceptionCode exceptionCode) {
++        super.brk(exceptionCode.encoding);
++    }
++
++    public void pause() {
++        throw GraalError.unimplemented();
++    }
++
++    /**
++     * Executes no-op instruction. No registers or flags are updated, except for PC.
++     */
++    public void nop() {
++        super.hint(SystemHint.NOP);
++    }
++
++    /**
++     * Same as {@link #nop()}.
++     */
++    @Override
++    public void ensureUniquePC() {
++        nop();
++    }
++
++    /**
++     * Aligns PC.
++     *
++     * @param modulus Has to be positive multiple of 4.
++     */
++    @Override
++    public void align(int modulus) {
++        assert modulus > 0 && (modulus & 0x3) == 0 : "Modulus has to be a positive multiple of 4.";
++        if (position() % modulus == 0) {
++            return;
++        }
++        int offset = modulus - position() % modulus;
++        for (int i = 0; i < offset; i += 4) {
++            nop();
++        }
++    }
++
++    /**
++     * Patches jump targets when label gets bound.
++     */
++    @Override
++    protected void patchJumpTarget(int branch, int jumpTarget) {
++        int instruction = getInt(branch);
++        int branchOffset = jumpTarget - branch;
++        PatchLabelKind type = PatchLabelKind.fromEncoding(instruction);
++        switch (type) {
++            case BRANCH_CONDITIONALLY:
++                ConditionFlag cf = ConditionFlag.fromEncoding(instruction >>> PatchLabelKind.INFORMATION_OFFSET);
++                super.b(cf, branchOffset, branch);
++                break;
++            case BRANCH_UNCONDITIONALLY:
++                super.b(branchOffset, branch);
++                break;
++            case JUMP_ADDRESS:
++                int offset = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
++                emitInt(jumpTarget - offset, branch);
++                break;
++            case BRANCH_NONZERO:
++            case BRANCH_ZERO: {
++                int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
++                int sizeEncoding = information & 1;
++                int regEncoding = information >>> 1;
++                Register reg = SW64.cpuRegisters.get(regEncoding);
++                // 1 => 64; 0 => 32
++                int size = sizeEncoding * 32 + 32;
++                switch (type) {
++                    case BRANCH_NONZERO:
++                        super.cbnz(size, reg, branchOffset, branch);
++                        break;
++                    case BRANCH_ZERO:
++                        super.cbz(size, reg, branchOffset, branch);
++                        break;
++                }
++                break;
++            }
++            case BRANCH_BIT_NONZERO:
++            case BRANCH_BIT_ZERO: {
++                int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
++                int sizeEncoding = information & NumUtil.getNbitNumberInt(6);
++                int regEncoding = information >>> 6;
++                Register reg = SW64.cpuRegisters.get(regEncoding);
++                switch (type) {
++                    case BRANCH_BIT_NONZERO:
++                        super.tbnz(reg, sizeEncoding, branchOffset, branch);
++                        break;
++                    case BRANCH_BIT_ZERO:
++                        super.tbz(reg, sizeEncoding, branchOffset, branch);
++                        break;
++                }
++                break;
++            }
++            case ADR: {
++                int information = instruction >>> PatchLabelKind.INFORMATION_OFFSET;
++                int regEncoding = information;
++                Register reg = SW64.cpuRegisters.get(regEncoding);
++                super.adr(reg, branchOffset, branch);
++                break;
++            }
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    /**
++     * Generates an address of the form {@code base + displacement}.
++     *
++     * Does not change base register to fulfill this requirement. Will fail if displacement cannot
++     * be represented directly as address.
++     *
++     * @param base general purpose register. May not be null or the zero register.
++     * @param displacement arbitrary displacement added to base.
++     * @return SW64Address referencing memory at {@code base + displacement}.
++     */
++    @Override
++    public SW64Address makeAddress(Register base, int displacement) {
++        return makeAddress(base, displacement, zr, /* signExtend */false, /* transferSize */0, zr, /* allowOverwrite */false);
++    }
++
++    @Override
++    public SW64Address getPlaceholder(int instructionStartPosition) {
++        return SW64Address.PLACEHOLDER;
++    }
++
++    public void addressOf(Register dst) {
++        // This will be fixed up later.
++        super.adrp(dst);
++        super.add(64, dst, dst, 0);
++    }
++
++    /**
++     * Loads an address into Register d.
++     *
++     * @param d general purpose register. May not be null.
++     * @param a SW64Address the address of an operand.
++     */
++    public void lea(Register d, SW64Address a) {
++        a.lea(this, d);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64AddressLoweringByUse.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64AddressLoweringByUse.java
+new file mode 100644
+index 0000000000..4669ecc0ac
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64AddressLoweringByUse.java
+@@ -0,0 +1,227 @@
++/*
++ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.core.common.type.Stamp;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.calc.AddNode;
++import org.graalvm.compiler.nodes.memory.address.AddressNode;
++import org.graalvm.compiler.nodes.memory.address.OffsetAddressNode;
++import org.graalvm.compiler.phases.common.AddressLoweringByUsePhase;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.meta.JavaConstant;
++
++public class SW64AddressLoweringByUse extends AddressLoweringByUsePhase.AddressLoweringByUse {
++    private SW64LIRKindTool kindtool;
++
++    public SW64AddressLoweringByUse(SW64LIRKindTool kindtool) {
++        this.kindtool = kindtool;
++    }
++
++    @Override
++    public AddressNode lower(ValueNode use, Stamp stamp, AddressNode address) {
++        if (address instanceof OffsetAddressNode) {
++            OffsetAddressNode offsetAddress = (OffsetAddressNode) address;
++            return doLower(stamp, offsetAddress.getBase(), offsetAddress.getOffset());
++        } else {
++            // must be an already transformed SW64AddressNode
++            return address;
++        }
++    }
++
++    @Override
++    public AddressNode lower(AddressNode address) {
++        return lower(null, null, address);
++    }
++
++    private AddressNode doLower(Stamp stamp, ValueNode base, ValueNode index) {
++        SW64AddressNode ret = new SW64AddressNode(base, index);
++        SW64Kind sw64Kind = (stamp == null ? null : getSW64Kind(stamp));
++
++        // improve the address as much as possible
++        boolean changed;
++        do {
++            changed = improve(sw64Kind, ret);
++        } while (changed);
++
++        // avoid duplicates
++        return base.graph().unique(ret);
++    }
++
++    protected boolean improve(SW64Kind kind, SW64AddressNode ret) {
++        SW64Address.AddressingMode mode = ret.getAddressingMode();
++        // if we have already set a displacement or set to base only mode then we are done
++        if (isDisplacementMode(mode) || isBaseOnlyMode(mode)) {
++            return false;
++        }
++        ValueNode base = ret.getBase();
++        ValueNode index = ret.getIndex();
++
++        // avoid a constant or null base if possible
++        if (base == null) {
++            ret.setBase(index);
++            ret.setIndex(base);
++            return true;
++        }
++        // make sure any integral JavaConstant
++        // is the index rather than the base
++        // strictly we don't need the conditions on index
++        // as we ought not to see two JavaConstant values
++        if (base.isJavaConstant() && base.asJavaConstant().getJavaKind().isNumericInteger() &&
++                        index != null && !index.isJavaConstant()) {
++            ret.setBase(index);
++            ret.setIndex(base);
++            return true;
++        }
++
++        // if the base is an add then move it up
++        if (index == null && base instanceof AddNode) {
++            AddNode add = (AddNode) base;
++            ret.setBase(add.getX());
++            ret.setIndex(add.getY());
++            return true;
++        }
++
++        // we can try to fold a JavaConstant index into a displacement
++        if (index != null && index.isJavaConstant()) {
++            JavaConstant javaConstant = index.asJavaConstant();
++            if (javaConstant.getJavaKind().isNumericInteger()) {
++                long disp = javaConstant.asLong();
++                mode = immediateMode(kind, disp);
++                if (isDisplacementMode(mode)) {
++                    index = null;
++                    // we can fold this in as a displacement
++                    // but first see if we can pull up any additional
++                    // constants added into the base
++                    boolean tryNextBase = (base instanceof AddNode);
++                    while (tryNextBase) {
++                        AddNode add = (AddNode) base;
++                        tryNextBase = false;
++                        ValueNode child = add.getX();
++                        if (child.isJavaConstant() && child.asJavaConstant().getJavaKind().isNumericInteger()) {
++                            long newDisp = disp + child.asJavaConstant().asLong();
++                            SW64Address.AddressingMode newMode = immediateMode(kind, newDisp);
++                            if (newMode != SW64Address.AddressingMode.REGISTER_OFFSET) {
++                                disp = newDisp;
++                                mode = newMode;
++                                base = add.getY();
++                                ret.setBase(base);
++                                tryNextBase = (base instanceof AddNode);
++                            }
++                        } else {
++                            child = add.getY();
++                            if (child.isJavaConstant() && child.asJavaConstant().getJavaKind().isNumericInteger()) {
++                                long newDisp = disp + child.asJavaConstant().asLong();
++                                SW64Address.AddressingMode newMode = immediateMode(kind, newDisp);
++                                if (newMode != SW64Address.AddressingMode.REGISTER_OFFSET) {
++                                    disp = newDisp;
++                                    mode = newMode;
++                                    base = add.getX();
++                                    ret.setBase(base);
++                                    tryNextBase = (base instanceof AddNode);
++                                }
++                            }
++                        }
++                    }
++                    if (disp != 0) {
++                        // ok now set the displacement in place of an index
++                        ret.setIndex(null);
++                        int scaleFactor = computeScaleFactor(kind, mode);
++                        ret.setDisplacement(disp, scaleFactor, mode);
++                    } else {
++                        // reset to base register only
++                        ret.setIndex(null);
++                        ret.setDisplacement(0, 1, SW64Address.AddressingMode.BASE_REGISTER_ONLY);
++                    }
++                    return true;
++                }
++            }
++        }
++        // nope cannot improve this any more
++        return false;
++    }
++
++    private SW64Kind getSW64Kind(Stamp stamp) {
++        LIRKind lirKind = stamp.getLIRKind(kindtool);
++        if (!lirKind.isValue()) {
++            if (!lirKind.isReference(0) || lirKind.getReferenceCount() != 1) {
++                return null;
++            }
++        }
++
++        return (SW64Kind) lirKind.getPlatformKind();
++    }
++
++    private static SW64Address.AddressingMode immediateMode(SW64Kind kind, long value) {
++        if (kind != null) {
++            int size = kind.getSizeInBytes();
++            // this next test should never really fail
++            if ((value & (size - 1)) == 0) {
++                long encodedValue = value / size;
++                // assert value % size == 0
++                // we can try for a 12 bit scaled offset
++                if (NumUtil.isUnsignedNbit(12, encodedValue)) {
++                    return SW64Address.AddressingMode.IMMEDIATE_SCALED;
++                }
++            }
++        }
++
++        // we can try for a 9 bit unscaled offset
++        if (NumUtil.isSignedNbit(9, value)) {
++            return SW64Address.AddressingMode.IMMEDIATE_UNSCALED;
++        }
++
++        // nope this index needs to be passed via offset register
++        return SW64Address.AddressingMode.REGISTER_OFFSET;
++    }
++
++    private static int computeScaleFactor(SW64Kind kind, SW64Address.AddressingMode mode) {
++        if (mode == SW64Address.AddressingMode.IMMEDIATE_SCALED) {
++            return kind.getSizeInBytes();
++        }
++        return 1;
++    }
++
++    boolean isBaseOnlyMode(SW64Address.AddressingMode addressingMode) {
++        return addressingMode == SW64Address.AddressingMode.BASE_REGISTER_ONLY;
++    }
++
++    private static boolean isDisplacementMode(SW64Address.AddressingMode addressingMode) {
++        switch (addressingMode) {
++            case IMMEDIATE_POST_INDEXED:
++            case IMMEDIATE_PRE_INDEXED:
++            case IMMEDIATE_SCALED:
++            case IMMEDIATE_UNSCALED:
++                return true;
++        }
++        return false;
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64AddressNode.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64AddressNode.java
+new file mode 100644
+index 0000000000..f40c4d0652
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64AddressNode.java
+@@ -0,0 +1,142 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.graph.NodeClass;
++import org.graalvm.compiler.lir.sw64.SW64AddressValue;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
++import org.graalvm.compiler.nodeinfo.NodeInfo;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.memory.address.AddressNode;
++import org.graalvm.compiler.nodes.spi.LIRLowerable;
++import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
++
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * Represents an SW64 address in the graph.
++ */
++@NodeInfo
++public class SW64AddressNode extends AddressNode implements LIRLowerable {
++
++    public static final NodeClass<SW64AddressNode> TYPE = NodeClass.create(SW64AddressNode.class);
++
++    @OptionalInput private ValueNode base;
++
++    @OptionalInput private ValueNode index;
++    private SW64Address.AddressingMode addressingMode;
++
++    private long displacement;
++    private int scaleFactor;
++
++    public SW64AddressNode(ValueNode base) {
++        this(base, null);
++    }
++
++    public SW64AddressNode(ValueNode base, ValueNode index) {
++        super(TYPE);
++        this.base = base;
++        this.index = index;
++        this.addressingMode = AddressingMode.REGISTER_OFFSET;
++        this.displacement = 0;
++        this.scaleFactor = 1;
++    }
++
++    @Override
++    public void generate(NodeLIRBuilderTool gen) {
++        LIRGeneratorTool tool = gen.getLIRGeneratorTool();
++
++        AllocatableValue baseValue = base == null ? Value.ILLEGAL : tool.asAllocatable(gen.operand(base));
++        AllocatableValue indexValue = index == null ? Value.ILLEGAL : tool.asAllocatable(gen.operand(index));
++
++        AllocatableValue baseReference = LIRKind.derivedBaseFromValue(baseValue);
++        AllocatableValue indexReference;
++        if (index == null) {
++            indexReference = null;
++        } else if (addressingMode.equals(AddressingMode.IMMEDIATE_UNSCALED)) {
++            indexReference = LIRKind.derivedBaseFromValue(indexValue);
++        } else {
++            if (LIRKind.isValue(indexValue.getValueKind())) {
++                indexReference = null;
++            } else {
++                indexReference = Value.ILLEGAL;
++            }
++        }
++
++        LIRKind kind = LIRKind.combineDerived(tool.getLIRKind(stamp(NodeView.DEFAULT)), baseReference, indexReference);
++        gen.setResult(this, new SW64AddressValue(kind, baseValue, indexValue, (int) displacement, scaleFactor, addressingMode));
++    }
++
++    @Override
++    public ValueNode getBase() {
++        return base;
++    }
++
++    public void setBase(ValueNode base) {
++        // allow modification before inserting into the graph
++        if (isAlive()) {
++            updateUsages(this.base, base);
++        }
++        this.base = base;
++    }
++
++    @Override
++    public ValueNode getIndex() {
++        return index;
++    }
++
++    public void setIndex(ValueNode index) {
++        // allow modification before inserting into the graph
++        if (isAlive()) {
++            updateUsages(this.index, index);
++        }
++        this.index = index;
++    }
++
++    public long getDisplacement() {
++        return displacement;
++    }
++
++    public void setDisplacement(long displacement, int scaleFactor, SW64Address.AddressingMode addressingMode) {
++        this.displacement = displacement;
++        this.scaleFactor = scaleFactor;
++        this.addressingMode = addressingMode;
++    }
++
++    @Override
++    public long getMaxConstantDisplacement() {
++        return displacement;
++    }
++
++    public AddressingMode getAddressingMode() {
++        return addressingMode;
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ArithmeticLIRGenerator.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ArithmeticLIRGenerator.java
+new file mode 100644
+index 0000000000..f059754575
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ArithmeticLIRGenerator.java
+@@ -0,0 +1,515 @@
++/*
++ * Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.sw64.SW64Kind.DWORD;
++import static jdk.vm.ci.sw64.SW64Kind.QWORD;
++import static org.graalvm.compiler.lir.LIRValueUtil.asJavaConstant;
++import static org.graalvm.compiler.lir.LIRValueUtil.isJavaConstant;
++import static org.graalvm.compiler.lir.sw64.SW64BitManipulationOp.BitManipulationOpCode.BSR;
++import static org.graalvm.compiler.lir.sw64.SW64BitManipulationOp.BitManipulationOpCode.CLZ;
++import static org.graalvm.compiler.lir.sw64.SW64BitManipulationOp.BitManipulationOpCode.CTZ;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.core.common.calc.FloatConvert;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.ConstantValue;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.Variable;
++import org.graalvm.compiler.lir.sw64.SW64AddressValue;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticLIRGeneratorTool;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticOp;
++import org.graalvm.compiler.lir.sw64.SW64BitManipulationOp;
++import org.graalvm.compiler.lir.sw64.SW64Move.LoadOp;
++import org.graalvm.compiler.lir.sw64.SW64Move.StoreConstantOp;
++import org.graalvm.compiler.lir.sw64.SW64Move.StoreOp;
++import org.graalvm.compiler.lir.sw64.SW64ReinterpretOp;
++import org.graalvm.compiler.lir.sw64.SW64SignExtendOp;
++import org.graalvm.compiler.lir.sw64.SW64Unary;
++import org.graalvm.compiler.lir.gen.ArithmeticLIRGenerator;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public class SW64ArithmeticLIRGenerator extends ArithmeticLIRGenerator implements SW64ArithmeticLIRGeneratorTool {
++
++    @Override
++    public SW64LIRGenerator getLIRGen() {
++        return (SW64LIRGenerator) super.getLIRGen();
++    }
++
++    @Override
++    protected boolean isNumericInteger(PlatformKind kind) {
++        return ((SW64Kind) kind).isInteger();
++    }
++
++    @Override
++    protected Variable emitAdd(LIRKind resultKind, Value a, Value b, boolean setFlags) {
++        if (isNumericInteger(a.getPlatformKind())) {
++            SW64ArithmeticOp op = setFlags ? SW64ArithmeticOp.ADDS : SW64ArithmeticOp.ADD;
++            return emitBinary(resultKind, op, true, a, b);
++        } else {
++            assert !setFlags : "Cannot set flags on floating point arithmetic";
++            return emitBinary(resultKind, SW64ArithmeticOp.FADD, true, a, b);
++        }
++    }
++
++    @Override
++    protected Variable emitSub(LIRKind resultKind, Value a, Value b, boolean setFlags) {
++        if (isNumericInteger(a.getPlatformKind())) {
++            SW64ArithmeticOp op = setFlags ? SW64ArithmeticOp.SUBS : SW64ArithmeticOp.SUB;
++            return emitBinary(resultKind, op, false, a, b);
++        } else {
++            assert !setFlags : "Cannot set flags on floating point arithmetic";
++            return emitBinary(resultKind, SW64ArithmeticOp.FSUB, false, a, b);
++        }
++    }
++
++    public Value emitExtendMemory(boolean isSigned, SW64Kind memoryKind, int resultBits, SW64AddressValue address, LIRFrameState state) {
++        // Issue a zero extending load of the proper bit size and set the result to
++        // the proper kind.
++        Variable result = getLIRGen().newVariable(LIRKind.value(resultBits == 32 ? SW64Kind.DWORD : SW64Kind.QWORD));
++
++        int targetSize = resultBits <= 32 ? 32 : 64;
++        switch (memoryKind) {
++            case BYTE:
++            case WORD:
++            case DWORD:
++            case QWORD:
++                getLIRGen().append(new SW64Unary.MemoryOp(isSigned, targetSize,
++                                memoryKind.getSizeInBytes() * 8, result, address, state));
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++        return result;
++    }
++
++    @Override
++    public Value emitMul(Value a, Value b, boolean setFlags) {
++        SW64ArithmeticOp intOp = setFlags ? SW64ArithmeticOp.MULVS : SW64ArithmeticOp.MUL;
++        return emitBinary(LIRKind.combine(a, b), getOpCode(a, intOp, SW64ArithmeticOp.FMUL), true, a, b);
++    }
++
++    @Override
++    public Value emitMulHigh(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.SMULH, true, a, b);
++    }
++
++    @Override
++    public Value emitUMulHigh(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.UMULH, true, a, b);
++    }
++
++    @Override
++    public Value emitDiv(Value a, Value b, LIRFrameState state) {
++        return emitBinary(LIRKind.combine(a, b), getOpCode(a, SW64ArithmeticOp.DIV, SW64ArithmeticOp.FDIV), false, asAllocatable(a), asAllocatable(b));
++    }
++
++    @Override
++    public Value emitRem(Value a, Value b, LIRFrameState state) {
++        return emitBinary(LIRKind.combine(a, b), getOpCode(a, SW64ArithmeticOp.REM, SW64ArithmeticOp.FREM), false, asAllocatable(a), asAllocatable(b));
++    }
++
++    @Override
++    public Value emitUDiv(Value a, Value b, LIRFrameState state) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.UDIV, false, asAllocatable(a), asAllocatable(b));
++    }
++
++    @Override
++    public Value emitURem(Value a, Value b, LIRFrameState state) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.UREM, false, asAllocatable(a), asAllocatable(b));
++    }
++
++    @Override
++    public Value emitAnd(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.AND, true, a, b);
++    }
++
++    @Override
++    public Value emitOr(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.OR, true, a, b);
++    }
++
++    @Override
++    public Value emitXor(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.XOR, true, a, b);
++    }
++
++    @Override
++    public Value emitShl(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.SHL, false, a, b);
++    }
++
++    @Override
++    public Value emitShr(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.ASHR, false, a, b);
++    }
++
++    @Override
++    public Value emitUShr(Value a, Value b) {
++        assert isNumericInteger(a.getPlatformKind());
++        return emitBinary(LIRKind.combine(a, b), SW64ArithmeticOp.LSHR, false, a, b);
++    }
++
++    @Override
++    public Value emitFloatConvert(FloatConvert op, Value inputVal) {
++        PlatformKind resultPlatformKind = getFloatConvertResultKind(op);
++        LIRKind resultLirKind = LIRKind.combine(inputVal).changeType(resultPlatformKind);
++        Variable result = getLIRGen().newVariable(resultLirKind);
++        getLIRGen().append(new SW64FloatConvertOp(op, result, asAllocatable(inputVal)));
++        return result;
++    }
++
++    private static PlatformKind getFloatConvertResultKind(FloatConvert op) {
++        switch (op) {
++            case F2I:
++            case D2I:
++                return SW64Kind.DWORD;
++            case F2L:
++            case D2L:
++                return SW64Kind.QWORD;
++            case I2F:
++            case L2F:
++            case D2F:
++                return SW64Kind.SINGLE;
++            case I2D:
++            case L2D:
++            case F2D:
++                return SW64Kind.DOUBLE;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    @Override
++    public Value emitReinterpret(LIRKind to, Value inputVal) {
++        ValueKind<?> from = inputVal.getValueKind();
++        if (to.equals(from)) {
++            return inputVal;
++        }
++        Variable result = getLIRGen().newVariable(to);
++        getLIRGen().append(new SW64ReinterpretOp(result, asAllocatable(inputVal)));
++        return result;
++    }
++
++    @Override
++    public Value emitNarrow(Value inputVal, int bits) {
++        if (inputVal.getPlatformKind() == SW64Kind.QWORD && bits <= 32) {
++            LIRKind resultKind = getResultLirKind(bits, inputVal);
++            long mask = NumUtil.getNbitNumberLong(bits);
++            Value maskValue = new ConstantValue(resultKind, JavaConstant.forLong(mask));
++            return emitBinary(resultKind, SW64ArithmeticOp.AND, true, inputVal, maskValue);
++        } else {
++            return inputVal;
++        }
++    }
++
++    @Override
++    public Value emitZeroExtend(Value inputVal, int fromBits, int toBits) {
++        assert fromBits <= toBits && toBits <= 64;
++        if (fromBits == toBits) {
++            return inputVal;
++        }
++        LIRKind resultKind = getResultLirKind(toBits, inputVal);
++        long mask = NumUtil.getNbitNumberLong(fromBits);
++        Value maskValue = new ConstantValue(resultKind, JavaConstant.forLong(mask));
++        return emitBinary(resultKind, SW64ArithmeticOp.AND, true, inputVal, maskValue);
++    }
++
++    @Override
++    public Value emitSignExtend(Value inputVal, int fromBits, int toBits) {
++        LIRKind resultKind = getResultLirKind(toBits, inputVal);
++        assert fromBits <= toBits && toBits <= 64;
++        if (fromBits == toBits) {
++            return inputVal;
++        } else if (isJavaConstant(inputVal)) {
++            JavaConstant javaConstant = asJavaConstant(inputVal);
++            long constant;
++            if (javaConstant.isNull()) {
++                constant = 0;
++            } else {
++                constant = javaConstant.asLong();
++            }
++            int shiftCount = QWORD.getSizeInBytes() * 8 - fromBits;
++            return new ConstantValue(resultKind, JavaConstant.forLong((constant << shiftCount) >> shiftCount));
++        }
++        Variable result = getLIRGen().newVariable(resultKind);
++        getLIRGen().append(new SW64SignExtendOp(result, asAllocatable(inputVal), fromBits, toBits));
++        return result;
++    }
++
++    private static LIRKind getResultLirKind(int resultBitSize, Value... inputValues) {
++        if (resultBitSize == 64) {
++            return LIRKind.combine(inputValues).changeType(QWORD);
++        } else {
++            // FIXME: I have no idea what this assert was ever for
++            // assert resultBitSize == 32;
++            return LIRKind.combine(inputValues).changeType(DWORD);
++        }
++    }
++
++    protected Variable emitBinary(ValueKind<?> resultKind, SW64ArithmeticOp op, boolean commutative, Value a, Value b) {
++        Variable result = getLIRGen().newVariable(resultKind);
++        if (isValidBinaryConstant(op, a, b)) {
++            emitBinaryConst(result, op, asAllocatable(a), asJavaConstant(b));
++        } else if (commutative && isValidBinaryConstant(op, b, a)) {
++            emitBinaryConst(result, op, asAllocatable(b), asJavaConstant(a));
++        } else {
++            emitBinaryVar(result, op, asAllocatable(a), asAllocatable(b));
++        }
++        return result;
++    }
++
++    private void emitBinaryVar(Variable result, SW64ArithmeticOp op, AllocatableValue a, AllocatableValue b) {
++        AllocatableValue x = moveSp(a);
++        AllocatableValue y = moveSp(b);
++        switch (op) {
++            case FREM:
++            case REM:
++            case UREM:
++                getLIRGen().append(new SW64ArithmeticOp.BinaryCompositeOp(op, result, x, y));
++                break;
++            default:
++                getLIRGen().append(new SW64ArithmeticOp.BinaryOp(op, result, x, y));
++                break;
++        }
++    }
++
++    private void emitBinaryConst(Variable result, SW64ArithmeticOp op, AllocatableValue a, JavaConstant b) {
++        AllocatableValue x = moveSp(a);
++        getLIRGen().append(new SW64ArithmeticOp.BinaryConstOp(op, result, x, b));
++    }
++
++    private static boolean isValidBinaryConstant(SW64ArithmeticOp op, Value a, Value b) {
++        if (!isJavaConstant(b)) {
++            return false;
++        }
++        JavaConstant constValue = asJavaConstant(b);
++        switch (op.category) {
++            case LOGICAL:
++                return isLogicalConstant(constValue);
++            case ARITHMETIC:
++                return isArithmeticConstant(constValue);
++            case SHIFT:
++                assert constValue.asLong() >= 0 && constValue.asLong() < a.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++                return true;
++            case NONE:
++                return false;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    private static boolean isLogicalConstant(JavaConstant constValue) {
++        switch (constValue.getJavaKind()) {
++            case Int:
++                return SW64MacroAssembler.isLogicalImmediate(constValue.asInt());
++            case Long:
++                return SW64MacroAssembler.isLogicalImmediate(constValue.asLong());
++            default:
++                return false;
++        }
++    }
++
++    protected static boolean isArithmeticConstant(JavaConstant constValue) {
++        switch (constValue.getJavaKind()) {
++            case Int:
++            case Long:
++                return SW64MacroAssembler.isArithmeticImmediate(constValue.asLong());
++            case Object:
++                return constValue.isNull();
++            default:
++                return false;
++        }
++    }
++
++    @Override
++    public Value emitNegate(Value inputVal) {
++        return emitUnary(getOpCode(inputVal, SW64ArithmeticOp.NEG, SW64ArithmeticOp.FNEG), inputVal);
++    }
++
++    @Override
++    public Value emitNot(Value input) {
++        assert isNumericInteger(input.getPlatformKind());
++        return emitUnary(SW64ArithmeticOp.NOT, input);
++    }
++
++    @Override
++    public Value emitMathAbs(Value input) {
++        return emitUnary(getOpCode(input, SW64ArithmeticOp.ABS, SW64ArithmeticOp.FABS), input);
++    }
++
++    @Override
++    public Value emitMathSqrt(Value input) {
++        assert input.getPlatformKind() == SW64Kind.DOUBLE;
++        return emitUnary(SW64ArithmeticOp.SQRT, input);
++    }
++
++    @Override
++    public Variable emitBitScanForward(Value value) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public Value emitBitCount(Value operand) {
++        throw GraalError.unimplemented("SW64 ISA does not offer way to implement this more efficiently than a simple Java algorithm.");
++    }
++
++    @Override
++    public Value emitBitScanReverse(Value value) {
++        Variable result = getLIRGen().newVariable(LIRKind.combine(value).changeType(SW64Kind.DWORD));
++        getLIRGen().append(new SW64BitManipulationOp(BSR, result, asAllocatable(value)));
++        return result;
++    }
++
++    @Override
++    public Value emitCountLeadingZeros(Value value) {
++        Variable result = getLIRGen().newVariable(LIRKind.combine(value).changeType(SW64Kind.DWORD));
++        getLIRGen().append(new SW64BitManipulationOp(CLZ, result, asAllocatable(value)));
++        return result;
++    }
++
++    @Override
++    public Value emitCountTrailingZeros(Value value) {
++        Variable result = getLIRGen().newVariable(LIRKind.combine(value).changeType(SW64Kind.DWORD));
++        getLIRGen().append(new SW64BitManipulationOp(CTZ, result, asAllocatable(value)));
++        return result;
++    }
++
++    private Variable emitUnary(SW64ArithmeticOp op, Value inputVal) {
++        AllocatableValue input = asAllocatable(inputVal);
++        Variable result = getLIRGen().newVariable(LIRKind.combine(input));
++        getLIRGen().append(new SW64ArithmeticOp.UnaryOp(op, result, input));
++        return result;
++    }
++
++    /**
++     * If val denotes the stackpointer, move it to another location. This is necessary since most
++     * ops cannot handle the stackpointer as input or output.
++     */
++    private AllocatableValue moveSp(AllocatableValue val) {
++        if (val instanceof RegisterValue && ((RegisterValue) val).getRegister().equals(sp)) {
++            assert val.getPlatformKind() == SW64Kind.QWORD : "Stackpointer must be long";
++            return getLIRGen().emitMove(val);
++        }
++        return val;
++    }
++
++    /**
++     * Returns the opcode depending on the platform kind of val.
++     */
++    private SW64ArithmeticOp getOpCode(Value val, SW64ArithmeticOp intOp, SW64ArithmeticOp floatOp) {
++        return isNumericInteger(val.getPlatformKind()) ? intOp : floatOp;
++    }
++
++    @Override
++    public Variable emitLoad(LIRKind kind, Value address, LIRFrameState state) {
++        SW64AddressValue loadAddress = getLIRGen().asAddressValue(address);
++        Variable result = getLIRGen().newVariable(getLIRGen().toRegisterKind(kind));
++        getLIRGen().append(new LoadOp((SW64Kind) kind.getPlatformKind(), result, loadAddress, state));
++        return result;
++    }
++
++    @Override
++    public void emitStore(ValueKind<?> lirKind, Value address, Value inputVal, LIRFrameState state) {
++        SW64AddressValue storeAddress = getLIRGen().asAddressValue(address);
++        SW64Kind kind = (SW64Kind) lirKind.getPlatformKind();
++
++        if (isJavaConstant(inputVal) && kind.isInteger()) {
++            JavaConstant c = asJavaConstant(inputVal);
++            if (c.isDefaultForKind()) {
++                // We can load 0 directly into integer registers
++                getLIRGen().append(new StoreConstantOp(kind, storeAddress, c, state));
++                return;
++            }
++        }
++        AllocatableValue input = asAllocatable(inputVal);
++        getLIRGen().append(new StoreOp(kind, storeAddress, input, state));
++    }
++
++    @Override
++    public Value emitMathLog(Value input, boolean base10) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public Value emitMathCos(Value input) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public Value emitMathSin(Value input) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public Value emitMathTan(Value input) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public void emitCompareOp(SW64Kind cmpKind, Variable left, Value right) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public Value emitRound(Value value, RoundingMode mode) {
++        SW64ArithmeticOp op;
++        switch (mode) {
++            case NEAREST:
++                op = SW64ArithmeticOp.FRINTN;
++                break;
++            case UP:
++                op = SW64ArithmeticOp.FRINTP;
++                break;
++            case DOWN:
++                op = SW64ArithmeticOp.FRINTM;
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++
++        return emitUnary(op, value);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64FloatConvertOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64FloatConvertOp.java
+new file mode 100644
+index 0000000000..82bd4d4d07
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64FloatConvertOp.java
+@@ -0,0 +1,83 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.calc.FloatConvert;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++
++public final class SW64FloatConvertOp extends SW64LIRInstruction {
++    private static final LIRInstructionClass<SW64FloatConvertOp> TYPE = LIRInstructionClass.create(SW64FloatConvertOp.class);
++
++    private final FloatConvert op;
++    @Def protected AllocatableValue resultValue;
++    @Use protected AllocatableValue inputValue;
++
++    protected SW64FloatConvertOp(FloatConvert op, AllocatableValue resultValue, AllocatableValue inputValue) {
++        super(TYPE);
++        this.op = op;
++        this.resultValue = resultValue;
++        this.inputValue = inputValue;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        int fromSize = inputValue.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++        int toSize = resultValue.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++
++        Register result = asRegister(resultValue);
++        Register input = asRegister(inputValue);
++        switch (op) {
++            case F2I:
++            case D2I:
++            case F2L:
++            case D2L:
++                masm.fcvtzs(toSize, fromSize, result, input);
++                break;
++            case I2F:
++            case I2D:
++            case L2F:
++            case L2D:
++                masm.scvtf(toSize, fromSize, result, input);
++                break;
++            case D2F:
++            case F2D:
++                masm.fcvt(fromSize, result, input);
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64LIRGenerator.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64LIRGenerator.java
+new file mode 100644
+index 0000000000..3bcfc57816
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64LIRGenerator.java
+@@ -0,0 +1,516 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.core.sw64;
++
++import static org.graalvm.compiler.lir.LIRValueUtil.asJavaConstant;
++import static org.graalvm.compiler.lir.LIRValueUtil.isJavaConstant;
++
++import java.util.function.Function;
++
++import org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ConditionFlag;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.calc.Condition;
++import org.graalvm.compiler.core.common.spi.LIRKindTool;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRValueUtil;
++import org.graalvm.compiler.lir.LabelRef;
++import org.graalvm.compiler.lir.StandardOp;
++import org.graalvm.compiler.lir.SwitchStrategy;
++import org.graalvm.compiler.lir.Variable;
++import org.graalvm.compiler.lir.sw64.SW64AddressValue;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticOp;
++import org.graalvm.compiler.lir.sw64.SW64ArrayCompareToOp;
++import org.graalvm.compiler.lir.sw64.SW64ArrayEqualsOp;
++import org.graalvm.compiler.lir.sw64.SW64ByteSwapOp;
++import org.graalvm.compiler.lir.sw64.SW64Compare;
++import org.graalvm.compiler.lir.sw64.SW64ControlFlow;
++import org.graalvm.compiler.lir.sw64.SW64ControlFlow.BranchOp;
++import org.graalvm.compiler.lir.sw64.SW64ControlFlow.CondMoveOp;
++import org.graalvm.compiler.lir.sw64.SW64ControlFlow.StrategySwitchOp;
++import org.graalvm.compiler.lir.sw64.SW64ControlFlow.TableSwitchOp;
++import org.graalvm.compiler.lir.sw64.SW64LIRFlagsVersioned;
++import org.graalvm.compiler.lir.sw64.SW64Move;
++import org.graalvm.compiler.lir.sw64.SW64AtomicMove.AtomicReadAndAddOp;
++import org.graalvm.compiler.lir.sw64.SW64AtomicMove.AtomicReadAndAddLSEOp;
++import org.graalvm.compiler.lir.sw64.SW64AtomicMove.CompareAndSwapOp;
++import org.graalvm.compiler.lir.sw64.SW64AtomicMove.AtomicReadAndWriteOp;
++import org.graalvm.compiler.lir.sw64.SW64Move.MembarOp;
++import org.graalvm.compiler.lir.sw64.SW64PauseOp;
++import org.graalvm.compiler.lir.gen.LIRGenerationResult;
++import org.graalvm.compiler.lir.gen.LIRGenerator;
++import org.graalvm.compiler.phases.util.Providers;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.PrimitiveConstant;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public abstract class SW64LIRGenerator extends LIRGenerator {
++
++    public SW64LIRGenerator(LIRKindTool lirKindTool, SW64ArithmeticLIRGenerator arithmeticLIRGen, MoveFactory moveFactory, Providers providers, LIRGenerationResult lirGenRes) {
++        super(lirKindTool, arithmeticLIRGen, moveFactory, providers, lirGenRes);
++    }
++
++    /**
++     * Checks whether the supplied constant can be used without loading it into a register for store
++     * operations, i.e., on the right hand side of a memory access.
++     *
++     * @param c The constant to check.
++     * @return True if the constant can be used directly, false if the constant needs to be in a
++     *         register.
++     */
++    protected static final boolean canStoreConstant(JavaConstant c) {
++        // Our own code never calls this since we can't make a definite statement about whether or
++        // not we can inline a constant without knowing what kind of operation we execute. Let's be
++        // optimistic here and fix up mistakes later.
++        return true;
++    }
++
++    /**
++     * SW64 cannot use anything smaller than a word in any instruction other than load and store.
++     */
++    @Override
++    public <K extends ValueKind<K>> K toRegisterKind(K kind) {
++        switch ((SW64Kind) kind.getPlatformKind()) {
++            case BYTE:
++            case WORD:
++                return kind.changeType(SW64Kind.DWORD);
++            default:
++                return kind;
++        }
++    }
++
++    @Override
++    public void emitNullCheck(Value address, LIRFrameState state) {
++        append(new SW64Move.NullCheckOp(asAddressValue(address), state));
++    }
++
++    @Override
++    public Variable emitAddress(AllocatableValue stackslot) {
++        Variable result = newVariable(LIRKind.value(target().arch.getWordKind()));
++        append(new SW64Move.StackLoadAddressOp(result, stackslot));
++        return result;
++    }
++
++    public SW64AddressValue asAddressValue(Value address) {
++        if (address instanceof SW64AddressValue) {
++            return (SW64AddressValue) address;
++        } else {
++            return new SW64AddressValue(address.getValueKind(), asAllocatable(address), Value.ILLEGAL, 0, 1, AddressingMode.BASE_REGISTER_ONLY);
++        }
++    }
++
++    @Override
++    public Variable emitLogicCompareAndSwap(LIRKind accessKind, Value address, Value expectedValue, Value newValue, Value trueValue, Value falseValue) {
++        Variable prevValue = newVariable(expectedValue.getValueKind());
++        Variable scratch = newVariable(LIRKind.value(SW64Kind.DWORD));
++        append(new CompareAndSwapOp(prevValue, loadReg(expectedValue), loadReg(newValue), asAllocatable(address), scratch));
++        assert trueValue.getValueKind().equals(falseValue.getValueKind());
++        Variable result = newVariable(trueValue.getValueKind());
++        append(new CondMoveOp(result, ConditionFlag.EQ, asAllocatable(trueValue), asAllocatable(falseValue)));
++        return result;
++    }
++
++    @Override
++    public Variable emitValueCompareAndSwap(LIRKind accessKind, Value address, Value expectedValue, Value newValue) {
++        Variable result = newVariable(newValue.getValueKind());
++        Variable scratch = newVariable(LIRKind.value(SW64Kind.WORD));
++        append(new CompareAndSwapOp(result, loadNonCompareConst(expectedValue), loadReg(newValue), asAllocatable(address), scratch));
++        return result;
++    }
++
++    @Override
++    public Value emitAtomicReadAndWrite(Value address, ValueKind<?> kind, Value newValue) {
++        Variable result = newVariable(kind);
++        Variable scratch = newVariable(kind);
++        append(new AtomicReadAndWriteOp((SW64Kind) kind.getPlatformKind(), asAllocatable(result), asAllocatable(address), asAllocatable(newValue), asAllocatable(scratch)));
++        return result;
++    }
++
++    @Override
++    public Value emitAtomicReadAndAdd(Value address, ValueKind<?> kind, Value delta) {
++        Variable result = newVariable(kind);
++        if (SW64LIRFlagsVersioned.useLSE(target().arch)) {
++            append(new AtomicReadAndAddLSEOp((SW64Kind) kind.getPlatformKind(), asAllocatable(result), asAllocatable(address), asAllocatable(delta)));
++        } else {
++            append(new AtomicReadAndAddOp((SW64Kind) kind.getPlatformKind(), asAllocatable(result), asAllocatable(address), delta));
++        }
++        return result;
++    }
++
++    @Override
++    public void emitMembar(int barriers) {
++        int necessaryBarriers = target().arch.requiredBarriers(barriers);
++        if (target().isMP && necessaryBarriers != 0) {
++            append(new MembarOp(necessaryBarriers));
++        }
++    }
++
++    @Override
++    public void emitJump(LabelRef label) {
++        assert label != null;
++        append(new StandardOp.JumpOp(label));
++    }
++
++    @Override
++    public void emitOverflowCheckBranch(LabelRef overflow, LabelRef noOverflow, LIRKind cmpKind, double overflowProbability) {
++        append(new SW64ControlFlow.BranchOp(ConditionFlag.VS, overflow, noOverflow, overflowProbability));
++    }
++
++    /**
++     * Branches to label if (left & right) == 0. If negated is true branchse on non-zero instead.
++     *
++     * @param left Integer kind. Non null.
++     * @param right Integer kind. Non null.
++     * @param trueDestination destination if left & right == 0. Non null.
++     * @param falseDestination destination if left & right != 0. Non null
++     * @param trueSuccessorProbability hoistoric probability that comparison is true
++     */
++    @Override
++    public void emitIntegerTestBranch(Value left, Value right, LabelRef trueDestination, LabelRef falseDestination, double trueSuccessorProbability) {
++        assert ((SW64Kind) left.getPlatformKind()).isInteger() && left.getPlatformKind() == right.getPlatformKind();
++        ((SW64ArithmeticLIRGenerator) getArithmetic()).emitBinary(LIRKind.combine(left, right), SW64ArithmeticOp.ANDS, true, left, right);
++        append(new SW64ControlFlow.BranchOp(ConditionFlag.EQ, trueDestination, falseDestination, trueSuccessorProbability));
++    }
++
++    /**
++     * Conditionally move trueValue into new variable if cond + unorderedIsTrue is true, else
++     * falseValue.
++     *
++     * @param left Arbitrary value. Has to have same type as right. Non null.
++     * @param right Arbitrary value. Has to have same type as left. Non null.
++     * @param cond condition that decides whether to move trueValue or falseValue into result. Non
++     *            null.
++     * @param unorderedIsTrue defines whether floating-point comparisons consider unordered true or
++     *            not. Ignored for integer comparisons.
++     * @param trueValue arbitrary value same type as falseValue. Non null.
++     * @param falseValue arbitrary value same type as trueValue. Non null.
++     * @return value containing trueValue if cond + unorderedIsTrue is true, else falseValue. Non
++     *         null.
++     */
++    @Override
++    public Variable emitConditionalMove(PlatformKind cmpKind, Value left, Value right, Condition cond, boolean unorderedIsTrue, Value trueValue, Value falseValue) {
++        boolean mirrored = emitCompare(cmpKind, left, right, cond, unorderedIsTrue);
++        Condition finalCondition = mirrored ? cond.mirror() : cond;
++        boolean finalUnorderedIsTrue = mirrored ? !unorderedIsTrue : unorderedIsTrue;
++        ConditionFlag cmpCondition = toConditionFlag(((SW64Kind) cmpKind).isInteger(), finalCondition, finalUnorderedIsTrue);
++        Variable result = newVariable(trueValue.getValueKind());
++        append(new CondMoveOp(result, cmpCondition, loadReg(trueValue), loadReg(falseValue)));
++        return result;
++    }
++
++    @Override
++    public void emitCompareBranch(PlatformKind cmpKind, Value left, Value right, Condition cond, boolean unorderedIsTrue, LabelRef trueDestination, LabelRef falseDestination,
++                    double trueDestinationProbability) {
++        boolean mirrored = emitCompare(cmpKind, left, right, cond, unorderedIsTrue);
++        Condition finalCondition = mirrored ? cond.mirror() : cond;
++        boolean finalUnorderedIsTrue = mirrored ? !unorderedIsTrue : unorderedIsTrue;
++        ConditionFlag cmpCondition = toConditionFlag(((SW64Kind) cmpKind).isInteger(), finalCondition, finalUnorderedIsTrue);
++        append(new BranchOp(cmpCondition, trueDestination, falseDestination, trueDestinationProbability));
++    }
++
++    private static ConditionFlag toConditionFlag(boolean isInt, Condition cond, boolean unorderedIsTrue) {
++        return isInt ? toIntConditionFlag(cond) : toFloatConditionFlag(cond, unorderedIsTrue);
++    }
++
++    /**
++     * Takes a Condition and unorderedIsTrue flag and returns the correct Aarch64 specific
++     * ConditionFlag. Note: This is only correct if the emitCompare code for floats has correctly
++     * handled the case of 'EQ && unorderedIsTrue', respectively 'NE && !unorderedIsTrue'!
++     */
++    private static ConditionFlag toFloatConditionFlag(Condition cond, boolean unorderedIsTrue) {
++        switch (cond) {
++            case LT:
++                return unorderedIsTrue ? ConditionFlag.LT : ConditionFlag.LO;
++            case LE:
++                return unorderedIsTrue ? ConditionFlag.LE : ConditionFlag.LS;
++            case GE:
++                return unorderedIsTrue ? ConditionFlag.PL : ConditionFlag.GE;
++            case GT:
++                return unorderedIsTrue ? ConditionFlag.HI : ConditionFlag.GT;
++            case EQ:
++                return ConditionFlag.EQ;
++            case NE:
++                return ConditionFlag.NE;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    /**
++     * Takes a Condition and returns the correct Aarch64 specific ConditionFlag.
++     */
++    private static ConditionFlag toIntConditionFlag(Condition cond) {
++        switch (cond) {
++            case EQ:
++                return ConditionFlag.EQ;
++            case NE:
++                return ConditionFlag.NE;
++            case LT:
++                return ConditionFlag.LT;
++            case LE:
++                return ConditionFlag.LE;
++            case GT:
++                return ConditionFlag.GT;
++            case GE:
++                return ConditionFlag.GE;
++            case AE:
++                return ConditionFlag.HS;
++            case BE:
++                return ConditionFlag.LS;
++            case AT:
++                return ConditionFlag.HI;
++            case BT:
++                return ConditionFlag.LO;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    /**
++     * This method emits the compare instruction, and may reorder the operands. It returns true if
++     * it did so.
++     *
++     * @param a the left operand of the comparison. Has to have same type as b. Non null.
++     * @param b the right operand of the comparison. Has to have same type as a. Non null.
++     * @return true if mirrored (i.e. "b cmp a" instead of "a cmp b" was done).
++     */
++    protected boolean emitCompare(PlatformKind cmpKind, Value a, Value b, Condition condition, boolean unorderedIsTrue) {
++        Value left;
++        Value right;
++        boolean mirrored;
++        SW64Kind kind = (SW64Kind) cmpKind;
++        if (kind.isInteger()) {
++            Value aExt = a;
++            Value bExt = b;
++
++            int compareBytes = cmpKind.getSizeInBytes();
++            // SW64 compares 32 or 64 bits: sign extend a and b as required.
++            if (compareBytes < a.getPlatformKind().getSizeInBytes()) {
++                aExt = arithmeticLIRGen.emitSignExtend(a, compareBytes * 8, 64);
++            }
++            if (compareBytes < b.getPlatformKind().getSizeInBytes()) {
++                bExt = arithmeticLIRGen.emitSignExtend(b, compareBytes * 8, 64);
++            }
++
++            if (LIRValueUtil.isVariable(bExt)) {
++                left = load(bExt);
++                right = loadNonConst(aExt);
++                mirrored = true;
++            } else {
++                left = load(aExt);
++                right = loadNonConst(bExt);
++                mirrored = false;
++            }
++            append(new SW64Compare.CompareOp(left, loadNonCompareConst(right)));
++        } else if (kind.isSIMD()) {
++            if (SW64Compare.FloatCompareOp.isFloatCmpConstant(a, condition, unorderedIsTrue)) {
++                left = load(b);
++                right = a;
++                mirrored = true;
++            } else if (SW64Compare.FloatCompareOp.isFloatCmpConstant(b, condition, unorderedIsTrue)) {
++                left = load(a);
++                right = b;
++                mirrored = false;
++            } else {
++                left = load(a);
++                right = loadReg(b);
++                mirrored = false;
++            }
++            append(new SW64Compare.FloatCompareOp(left, asAllocatable(right), condition, unorderedIsTrue));
++        } else {
++            throw GraalError.shouldNotReachHere();
++        }
++        return mirrored;
++    }
++
++    /**
++     * If value is a constant that cannot be used directly with a gpCompare instruction load it into
++     * a register and return the register, otherwise return constant value unchanged.
++     */
++    protected Value loadNonCompareConst(Value value) {
++        if (!isCompareConstant(value)) {
++            return loadReg(value);
++        }
++        return value;
++    }
++
++    /**
++     * Checks whether value can be used directly with a gpCompare instruction. This is <b>not</b>
++     * the same as {@link SW64ArithmeticLIRGenerator#isArithmeticConstant(JavaConstant)}, because
++     * 0.0 is a valid compare constant for floats, while there are no arithmetic constants for
++     * floats.
++     *
++     * @param value any type. Non null.
++     * @return true if value can be used directly in comparison instruction, false otherwise.
++     */
++    public boolean isCompareConstant(Value value) {
++        if (isJavaConstant(value)) {
++            JavaConstant constant = asJavaConstant(value);
++            if (constant instanceof PrimitiveConstant) {
++                final long longValue = constant.asLong();
++                long maskedValue;
++                switch (constant.getJavaKind()) {
++                    case Boolean:
++                    case Byte:
++                        maskedValue = longValue & 0xFF;
++                        break;
++                    case Char:
++                    case Short:
++                        maskedValue = longValue & 0xFFFF;
++                        break;
++                    case Int:
++                        maskedValue = longValue & 0xFFFF_FFFF;
++                        break;
++                    case Long:
++                        maskedValue = longValue;
++                        break;
++                    default:
++                        throw GraalError.shouldNotReachHere();
++                }
++                return SW64MacroAssembler.isArithmeticImmediate(maskedValue);
++            } else {
++                return constant.isDefaultForKind();
++            }
++        }
++        return false;
++    }
++
++    /**
++     * Moves trueValue into result if (left & right) == 0, else falseValue.
++     *
++     * @param left Integer kind. Non null.
++     * @param right Integer kind. Non null.
++     * @param trueValue Integer kind. Non null.
++     * @param falseValue Integer kind. Non null.
++     * @return virtual register containing trueValue if (left & right) == 0, else falseValue.
++     */
++    @Override
++    public Variable emitIntegerTestMove(Value left, Value right, Value trueValue, Value falseValue) {
++        assert ((SW64Kind) left.getPlatformKind()).isInteger() && ((SW64Kind) right.getPlatformKind()).isInteger();
++        assert ((SW64Kind) trueValue.getPlatformKind()).isInteger() && ((SW64Kind) falseValue.getPlatformKind()).isInteger();
++        ((SW64ArithmeticLIRGenerator) getArithmetic()).emitBinary(left.getValueKind(), SW64ArithmeticOp.ANDS, true, left, right);
++        Variable result = newVariable(trueValue.getValueKind());
++        append(new CondMoveOp(result, ConditionFlag.EQ, load(trueValue), load(falseValue)));
++        return result;
++    }
++
++    @Override
++    public void emitStrategySwitch(SwitchStrategy strategy, Variable key, LabelRef[] keyTargets, LabelRef defaultTarget) {
++        append(createStrategySwitchOp(strategy, keyTargets, defaultTarget, key, newVariable(key.getValueKind()), SW64LIRGenerator::toIntConditionFlag));
++    }
++
++    protected StrategySwitchOp createStrategySwitchOp(SwitchStrategy strategy, LabelRef[] keyTargets, LabelRef defaultTarget, Variable key, AllocatableValue scratchValue,
++                    Function<Condition, ConditionFlag> converter) {
++        return new StrategySwitchOp(strategy, keyTargets, defaultTarget, key, scratchValue, converter);
++    }
++
++    @Override
++    protected void emitTableSwitch(int lowKey, LabelRef defaultTarget, LabelRef[] targets, Value key) {
++        append(new TableSwitchOp(lowKey, defaultTarget, targets, key, newVariable(LIRKind.value(target().arch.getWordKind())), newVariable(key.getValueKind())));
++    }
++
++    @Override
++    public Variable emitByteSwap(Value input) {
++        Variable result = newVariable(LIRKind.combine(input));
++        append(new SW64ByteSwapOp(result, input));
++        return result;
++    }
++
++    @Override
++    public Variable emitArrayCompareTo(JavaKind kind1, JavaKind kind2, Value array1, Value array2, Value length1, Value length2) {
++        LIRKind resultKind = LIRKind.value(SW64Kind.DWORD);
++        // DMS TODO: check calling conversion and registers used
++        RegisterValue res = SW64.r0.asValue(resultKind);
++        RegisterValue cnt1 = SW64.r1.asValue(length1.getValueKind());
++        RegisterValue cnt2 = SW64.r2.asValue(length2.getValueKind());
++        emitMove(cnt1, length1);
++        emitMove(cnt2, length2);
++        append(new SW64ArrayCompareToOp(this, kind1, kind2, res, array1, array2, cnt1, cnt2));
++        Variable result = newVariable(resultKind);
++        emitMove(result, res);
++        return result;
++    }
++
++    @Override
++    public Variable emitArrayEquals(JavaKind kind, Value array1, Value array2, Value length) {
++        Variable result = newVariable(LIRKind.value(SW64Kind.DWORD));
++        append(new SW64ArrayEqualsOp(this, kind, result, array1, array2, asAllocatable(length)));
++        return result;
++    }
++
++    @Override
++    protected JavaConstant zapValueForKind(PlatformKind kind) {
++        long dead = 0xDEADDEADDEADDEADL;
++        switch ((SW64Kind) kind) {
++            case BYTE:
++                return JavaConstant.forByte((byte) dead);
++            case WORD:
++                return JavaConstant.forShort((short) dead);
++            case DWORD:
++                return JavaConstant.forInt((int) dead);
++            case QWORD:
++                return JavaConstant.forLong(dead);
++            case SINGLE:
++                return JavaConstant.forFloat(Float.intBitsToFloat((int) dead));
++            case DOUBLE:
++                return JavaConstant.forDouble(Double.longBitsToDouble(dead));
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    /**
++     * Loads value into virtual register. Contrary to {@link #load(Value)} this handles
++     * RegisterValues (i.e. values corresponding to fixed physical registers) correctly, by not
++     * creating an unnecessary move into a virtual register.
++     *
++     * This avoids generating the following code: mov x0, x19 # x19 is fixed thread register ldr x0,
++     * [x0] instead of: ldr x0, [x19].
++     */
++    protected AllocatableValue loadReg(Value val) {
++        if (!(val instanceof Variable || val instanceof RegisterValue)) {
++            return emitMove(val);
++        }
++        return (AllocatableValue) val;
++    }
++
++    @Override
++    public void emitPause() {
++        append(new SW64PauseOp());
++    }
++
++    public abstract void emitCCall(long address, CallingConvention nativeCallingConvention, Value[] args);
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64LIRKindTool.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64LIRKindTool.java
+new file mode 100644
+index 0000000000..e7ecfb4969
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64LIRKindTool.java
+@@ -0,0 +1,80 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.core.sw64;
++
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.spi.LIRKindTool;
++import org.graalvm.compiler.debug.GraalError;
++
++import jdk.vm.ci.sw64.SW64Kind;
++
++public class SW64LIRKindTool implements LIRKindTool {
++
++    @Override
++    public LIRKind getIntegerKind(int bits) {
++        if (bits <= 8) {
++            return LIRKind.value(SW64Kind.BYTE);
++        } else if (bits <= 16) {
++            return LIRKind.value(SW64Kind.WORD);
++        } else if (bits <= 32) {
++            return LIRKind.value(SW64Kind.DWORD);
++        } else {
++            assert bits <= 64;
++            return LIRKind.value(SW64Kind.QWORD);
++        }
++    }
++
++    @Override
++    public LIRKind getFloatingKind(int bits) {
++        switch (bits) {
++            case 32:
++                return LIRKind.value(SW64Kind.SINGLE);
++            case 64:
++                return LIRKind.value(SW64Kind.DOUBLE);
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    @Override
++    public LIRKind getObjectKind() {
++        return LIRKind.reference(SW64Kind.QWORD);
++    }
++
++    @Override
++    public LIRKind getWordKind() {
++        return LIRKind.value(SW64Kind.QWORD);
++    }
++
++    @Override
++    public LIRKind getNarrowOopKind() {
++        return LIRKind.compressedReference(SW64Kind.DWORD);
++    }
++
++    @Override
++    public LIRKind getNarrowPointerKind() {
++        return LIRKind.value(SW64Kind.DWORD);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64MoveFactory.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64MoveFactory.java
+new file mode 100644
+index 0000000000..07404001de
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64MoveFactory.java
+@@ -0,0 +1,122 @@
++/*
++ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import static org.graalvm.compiler.lir.LIRValueUtil.asConstant;
++import static org.graalvm.compiler.lir.LIRValueUtil.isConstantValue;
++import static org.graalvm.compiler.lir.LIRValueUtil.isStackSlotValue;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.type.DataPointerConstant;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstruction;
++import org.graalvm.compiler.lir.sw64.SW64AddressValue;
++import org.graalvm.compiler.lir.sw64.SW64Move;
++import org.graalvm.compiler.lir.sw64.SW64Move.LoadAddressOp;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool.MoveFactory;
++
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Constant;
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.Value;
++
++public class SW64MoveFactory implements MoveFactory {
++
++    @Override
++    public LIRInstruction createMove(AllocatableValue dst, Value src) {
++        boolean srcIsSlot = isStackSlotValue(src);
++        boolean dstIsSlot = isStackSlotValue(dst);
++        if (isConstantValue(src)) {
++            return createLoad(dst, asConstant(src));
++        } else if (src instanceof SW64AddressValue) {
++            return new LoadAddressOp(dst, (SW64AddressValue) src);
++        } else {
++            assert src instanceof AllocatableValue;
++            if (srcIsSlot && dstIsSlot) {
++                throw GraalError.shouldNotReachHere(src.getClass() + " " + dst.getClass());
++            } else {
++                return new SW64Move.Move(dst, (AllocatableValue) src);
++            }
++        }
++    }
++
++    @Override
++    public LIRInstruction createStackMove(AllocatableValue result, AllocatableValue input) {
++        return new SW64Move.Move(result, input);
++    }
++
++    @Override
++    public LIRInstruction createLoad(AllocatableValue dst, Constant src) {
++        if (src instanceof JavaConstant) {
++            JavaConstant javaConstant = (JavaConstant) src;
++            if (canInlineConstant(javaConstant)) {
++                return new SW64Move.LoadInlineConstant(javaConstant, dst);
++            } else {
++                // return new SW64Move.LoadConstantFromTable(javaConstant,
++                // constantTableBaseProvider.getConstantTableBase(), dst);
++                return new SW64Move.LoadInlineConstant(javaConstant, dst);
++            }
++        } else if (src instanceof DataPointerConstant) {
++            return new SW64Move.LoadDataOp(dst, (DataPointerConstant) src);
++        } else {
++            // throw GraalError.shouldNotReachHere(src.getClass().toString());
++            throw GraalError.unimplemented();
++        }
++    }
++
++    @Override
++    public LIRInstruction createStackLoad(AllocatableValue result, Constant input) {
++        return createLoad(result, input);
++    }
++
++    @Override
++    public boolean canInlineConstant(Constant con) {
++        if (con instanceof JavaConstant) {
++            JavaConstant c = (JavaConstant) con;
++            switch (c.getJavaKind()) {
++                case Boolean:
++                case Byte:
++                case Char:
++                case Short:
++                case Int:
++                    return SW64MacroAssembler.isMovableImmediate(c.asInt());
++                case Long:
++                    return SW64MacroAssembler.isMovableImmediate(c.asLong());
++                case Object:
++                    return c.isNull();
++                default:
++                    return false;
++            }
++        }
++        return false;
++    }
++
++    @Override
++    public boolean allowConstantToStackMove(Constant value) {
++        return false;
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64NodeLIRBuilder.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64NodeLIRBuilder.java
+new file mode 100644
+index 0000000000..5f3ebb2549
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64NodeLIRBuilder.java
+@@ -0,0 +1,59 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import org.graalvm.compiler.core.gen.NodeLIRBuilder;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
++import org.graalvm.compiler.nodes.StructuredGraph;
++import org.graalvm.compiler.nodes.ValueNode;
++
++/**
++ * This class implements the SW64 specific portion of the LIR generator.
++ */
++public abstract class SW64NodeLIRBuilder extends NodeLIRBuilder {
++
++    public SW64NodeLIRBuilder(StructuredGraph graph, LIRGeneratorTool lirGen, SW64NodeMatchRules nodeMatchRules) {
++        super(graph, lirGen, nodeMatchRules);
++    }
++
++    @Override
++    protected boolean peephole(ValueNode valueNode) {
++        // No peephole optimizations for now
++        return false;
++    }
++
++    @Override
++    public SW64LIRGenerator getLIRGeneratorTool() {
++        return (SW64LIRGenerator) super.getLIRGeneratorTool();
++    }
++
++    @Override
++    protected void emitPrologue(StructuredGraph graph) {
++        // XXX Maybe we need something like this.
++        // getLIRGeneratorTool().emitLoadConstantTableBase();
++        super.emitPrologue(graph);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64NodeMatchRules.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64NodeMatchRules.java
+new file mode 100644
+index 0000000000..fa949712be
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64NodeMatchRules.java
+@@ -0,0 +1,62 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import org.graalvm.compiler.core.gen.NodeMatchRules;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
++import org.graalvm.compiler.nodes.DeoptimizingNode;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.memory.Access;
++
++import jdk.vm.ci.sw64.SW64Kind;
++
++public class SW64NodeMatchRules extends NodeMatchRules {
++
++    public SW64NodeMatchRules(LIRGeneratorTool gen) {
++        super(gen);
++    }
++
++    protected LIRFrameState getState(Access access) {
++        if (access instanceof DeoptimizingNode) {
++            return state((DeoptimizingNode) access);
++        }
++        return null;
++    }
++
++    protected SW64Kind getMemoryKind(Access access) {
++        return (SW64Kind) gen.getLIRKind(access.asNode().stamp(NodeView.DEFAULT)).getPlatformKind();
++    }
++
++    @Override
++    public SW64LIRGenerator getLIRGeneratorTool() {
++        return (SW64LIRGenerator) gen;
++    }
++
++    protected SW64ArithmeticLIRGenerator getArithmeticLIRGenerator() {
++        return (SW64ArithmeticLIRGenerator) getLIRGeneratorTool().getArithmetic();
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ReadNode.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ReadNode.java
+new file mode 100644
+index 0000000000..2eb8a0645e
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ReadNode.java
+@@ -0,0 +1,104 @@
++/*
++ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import jdk.vm.ci.sw64.SW64Kind;
++
++import org.graalvm.compiler.core.common.type.IntegerStamp;
++import org.graalvm.compiler.core.common.type.Stamp;
++import org.graalvm.compiler.graph.NodeClass;
++import org.graalvm.compiler.lir.sw64.SW64AddressValue;
++import org.graalvm.compiler.nodeinfo.NodeInfo;
++import org.graalvm.compiler.nodes.FrameState;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.StructuredGraph;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.calc.SignExtendNode;
++import org.graalvm.compiler.nodes.calc.ZeroExtendNode;
++import org.graalvm.compiler.nodes.extended.GuardingNode;
++import org.graalvm.compiler.nodes.memory.ReadNode;
++import org.graalvm.compiler.nodes.memory.address.AddressNode;
++import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
++import jdk.internal.vm.compiler.word.LocationIdentity;
++
++/**
++ * SW64-specific subclass of ReadNode that knows how to merge ZeroExtend and SignExtend into the
++ * read.
++ */
++
++@NodeInfo
++public class SW64ReadNode extends ReadNode {
++    public static final NodeClass<SW64ReadNode> TYPE = NodeClass.create(SW64ReadNode.class);
++    private final IntegerStamp accessStamp;
++    private final boolean isSigned;
++
++    public SW64ReadNode(AddressNode address, LocationIdentity location, Stamp stamp, GuardingNode guard, BarrierType barrierType, boolean nullCheck,
++                    FrameState stateBefore, IntegerStamp accessStamp, boolean isSigned) {
++        super(TYPE, address, location, stamp, guard, barrierType, nullCheck, stateBefore);
++        this.accessStamp = accessStamp;
++        this.isSigned = isSigned;
++    }
++
++    @Override
++    public void generate(NodeLIRBuilderTool gen) {
++        SW64LIRGenerator lirgen = (SW64LIRGenerator) gen.getLIRGeneratorTool();
++        SW64ArithmeticLIRGenerator arithgen = (SW64ArithmeticLIRGenerator) lirgen.getArithmetic();
++        SW64Kind readKind = (SW64Kind) lirgen.getLIRKind(accessStamp).getPlatformKind();
++        int resultBits = ((IntegerStamp) stamp(NodeView.DEFAULT)).getBits();
++        gen.setResult(this, arithgen.emitExtendMemory(isSigned, readKind, resultBits, (SW64AddressValue) gen.operand(getAddress()), gen.state(this)));
++    }
++
++    /**
++     * replace a ReadNode with an SW64-specific variant which knows how to merge a downstream
++     * zero or sign extend into the read operation.
++     *
++     * @param readNode
++     */
++    public static void replace(ReadNode readNode) {
++        assert readNode.getUsageCount() == 1;
++        assert readNode.getUsageAt(0) instanceof ZeroExtendNode || readNode.getUsageAt(0) instanceof SignExtendNode;
++
++        ValueNode usage = (ValueNode) readNode.getUsageAt(0);
++        boolean isSigned = usage instanceof SignExtendNode;
++        IntegerStamp accessStamp = ((IntegerStamp) readNode.getAccessStamp());
++
++        AddressNode address = readNode.getAddress();
++        LocationIdentity location = readNode.getLocationIdentity();
++        Stamp stamp = usage.stamp(NodeView.DEFAULT);
++        GuardingNode guard = readNode.getGuard();
++        BarrierType barrierType = readNode.getBarrierType();
++        boolean nullCheck = readNode.getNullCheck();
++        FrameState stateBefore = readNode.stateBefore();
++        SW64ReadNode clone = new SW64ReadNode(address, location, stamp, guard, barrierType, nullCheck, stateBefore, accessStamp, isSigned);
++        StructuredGraph graph = readNode.graph();
++        graph.add(clone);
++        // splice out the extend node
++        usage.replaceAtUsagesAndDelete(readNode);
++        // swap the clone for the read
++        graph.replaceFixedWithFixed(readNode, clone);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ReadReplacementPhase.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ReadReplacementPhase.java
+new file mode 100644
+index 0000000000..c9721f1de2
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64ReadReplacementPhase.java
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 2017, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2017, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.core.sw64;
++
++import org.graalvm.compiler.graph.Node;
++import org.graalvm.compiler.nodes.StructuredGraph;
++import org.graalvm.compiler.nodes.calc.SignExtendNode;
++import org.graalvm.compiler.nodes.calc.ZeroExtendNode;
++import org.graalvm.compiler.nodes.memory.ReadNode;
++import org.graalvm.compiler.phases.Phase;
++
++/**
++ * SW64-specific phase which substitutes certain read nodes with arch-specific variants in order
++ * to allow merging of zero and sign extension into the read operation.
++ */
++
++public class SW64ReadReplacementPhase extends Phase {
++    @Override
++    protected void run(StructuredGraph graph) {
++        for (Node node : graph.getNodes()) {
++            // don't process nodes we just added
++            if (node instanceof SW64ReadNode) {
++                continue;
++            }
++            if (node instanceof ReadNode) {
++                ReadNode readNode = (ReadNode) node;
++                if (readNode.hasExactlyOneUsage()) {
++                    Node usage = readNode.getUsageAt(0);
++                    if (usage instanceof ZeroExtendNode || usage instanceof SignExtendNode) {
++                        SW64ReadNode.replace(readNode);
++                    }
++                }
++            }
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64SuitesCreator.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64SuitesCreator.java
+new file mode 100644
+index 0000000000..b3c6f00db3
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.core.sw64/src/org/graalvm/compiler/core/sw64/SW64SuitesCreator.java
+@@ -0,0 +1,60 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.core.sw64;
++
++import java.util.ListIterator;
++
++import org.graalvm.compiler.java.DefaultSuitesCreator;
++import org.graalvm.compiler.nodes.graphbuilderconf.GraphBuilderConfiguration.Plugins;
++import org.graalvm.compiler.options.OptionValues;
++import org.graalvm.compiler.phases.BasePhase;
++import org.graalvm.compiler.phases.Phase;
++import org.graalvm.compiler.phases.PhaseSuite;
++import org.graalvm.compiler.phases.tiers.CompilerConfiguration;
++import org.graalvm.compiler.phases.tiers.LowTierContext;
++import org.graalvm.compiler.phases.tiers.Suites;
++
++public class SW64SuitesCreator extends DefaultSuitesCreator {
++    private final Class<? extends Phase> insertReadReplacementBefore;
++
++    public SW64SuitesCreator(CompilerConfiguration compilerConfiguration, Plugins plugins, Class<? extends Phase> insertReadReplacementBefore) {
++        super(compilerConfiguration, plugins);
++        this.insertReadReplacementBefore = insertReadReplacementBefore;
++    }
++
++    @Override
++    public Suites createSuites(OptionValues options) {
++        Suites suites = super.createSuites(options);
++
++        ListIterator<BasePhase<? super LowTierContext>> findPhase = suites.getLowTier().findPhase(insertReadReplacementBefore);
++        // Put SW64ReadReplacementPhase right before the SchedulePhase
++        while (PhaseSuite.findNextPhase(findPhase, insertReadReplacementBefore)) {
++            // Search for last occurrence of SchedulePhase
++        }
++        findPhase.previous();
++        findPhase.add(new SW64ReadReplacementPhase());
++        return suites;
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotBackend.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotBackend.java
+new file mode 100644
+index 0000000000..dc87ef4ffb
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotBackend.java
+@@ -0,0 +1,367 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static java.lang.reflect.Modifier.isStatic;
++import static jdk.vm.ci.sw64.SW64.lr;
++import static jdk.vm.ci.sw64.SW64.r10;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.fp;
++import static org.graalvm.compiler.core.common.GraalOptions.GeneratePIC;
++import static org.graalvm.compiler.core.common.GraalOptions.ZapStackOnMethodEntry;
++
++import jdk.internal.vm.compiler.collections.EconomicSet;
++import org.graalvm.compiler.asm.Assembler;
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.ScratchRegister;
++import org.graalvm.compiler.code.CompilationResult;
++import org.graalvm.compiler.core.sw64.SW64NodeMatchRules;
++import org.graalvm.compiler.core.common.CompilationIdentifier;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.alloc.RegisterAllocationConfig;
++import org.graalvm.compiler.core.common.spi.ForeignCallLinkage;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.hotspot.HotSpotDataBuilder;
++import org.graalvm.compiler.hotspot.HotSpotGraalRuntimeProvider;
++import org.graalvm.compiler.hotspot.HotSpotHostBackend;
++import org.graalvm.compiler.hotspot.HotSpotLIRGenerationResult;
++import org.graalvm.compiler.hotspot.meta.HotSpotConstantLoadAction;
++import org.graalvm.compiler.hotspot.meta.HotSpotForeignCallsProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotProviders;
++import org.graalvm.compiler.hotspot.stubs.Stub;
++import org.graalvm.compiler.lir.LIR;
++import org.graalvm.compiler.lir.sw64.SW64Call;
++import org.graalvm.compiler.lir.sw64.SW64FrameMap;
++import org.graalvm.compiler.lir.sw64.SW64FrameMapBuilder;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilderFactory;
++import org.graalvm.compiler.lir.asm.DataBuilder;
++import org.graalvm.compiler.lir.asm.FrameContext;
++import org.graalvm.compiler.lir.framemap.FrameMap;
++import org.graalvm.compiler.lir.framemap.FrameMapBuilder;
++import org.graalvm.compiler.lir.gen.LIRGenerationResult;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
++import org.graalvm.compiler.nodes.StructuredGraph;
++import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.hotspot.HotSpotSentinelConstant;
++import jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.JavaType;
++import jdk.vm.ci.meta.ResolvedJavaMethod;
++
++/**
++ * HotSpot SW64 specific backend.
++ */
++public class SW64HotSpotBackend extends HotSpotHostBackend {
++
++    public SW64HotSpotBackend(GraalHotSpotVMConfig config, HotSpotGraalRuntimeProvider runtime, HotSpotProviders providers) {
++        super(config, runtime, providers);
++    }
++
++    @Override
++    public FrameMapBuilder newFrameMapBuilder(RegisterConfig registerConfig) {
++        RegisterConfig registerConfigNonNull = registerConfig == null ? getCodeCache().getRegisterConfig() : registerConfig;
++        return new SW64FrameMapBuilder(newFrameMap(registerConfigNonNull), getCodeCache(), registerConfigNonNull);
++    }
++
++    @Override
++    public FrameMap newFrameMap(RegisterConfig registerConfig) {
++        return new SW64FrameMap(getCodeCache(), registerConfig, this);
++    }
++
++    @Override
++    public LIRGeneratorTool newLIRGenerator(LIRGenerationResult lirGenRes) {
++        return new SW64HotSpotLIRGenerator(getProviders(), config, lirGenRes);
++    }
++
++    @Override
++    public LIRGenerationResult newLIRGenerationResult(CompilationIdentifier compilationId, LIR lir, FrameMapBuilder frameMapBuilder, StructuredGraph graph, Object stub) {
++        return new HotSpotLIRGenerationResult(compilationId, lir, frameMapBuilder, makeCallingConvention(graph, (Stub) stub), stub, config.requiresReservedStackCheck(graph.getMethods()));
++    }
++
++    @Override
++    public NodeLIRBuilderTool newNodeLIRBuilder(StructuredGraph graph, LIRGeneratorTool lirGen) {
++        return new SW64HotSpotNodeLIRBuilder(graph, lirGen, new SW64NodeMatchRules(lirGen));
++    }
++
++    @Override
++    protected void bangStackWithOffset(CompilationResultBuilder crb, int bangOffset) {
++        SW64MacroAssembler masm = (SW64MacroAssembler) crb.asm;
++        try (ScratchRegister sc = masm.getScratchRegister()) {
++            Register scratch = sc.getRegister();
++            SW64Address address = masm.makeAddress(sp, -bangOffset, scratch, 8, /* allowOverwrite */false);
++            masm.str(64, zr, address);
++        }
++    }
++
++    private class HotSpotFrameContext implements FrameContext {
++        final boolean isStub;
++
++        HotSpotFrameContext(boolean isStub) {
++            this.isStub = isStub;
++        }
++
++        @Override
++        public void enter(CompilationResultBuilder crb) {
++            FrameMap frameMap = crb.frameMap;
++            final int frameSize = frameMap.frameSize();
++            final int totalFrameSize = frameMap.totalFrameSize();
++            assert frameSize + 2 * crb.target.arch.getWordSize() == totalFrameSize : "total framesize should be framesize + 2 words";
++            SW64MacroAssembler masm = (SW64MacroAssembler) crb.asm;
++            if (!isStub) {
++                emitStackOverflowCheck(crb);
++            }
++            crb.blockComment("[method prologue]");
++
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                int wordSize = crb.target.arch.getWordSize();
++                Register rscratch1 = sc.getRegister();
++                assert totalFrameSize > 0;
++                if (frameSize < 1 << 9) {
++                    masm.sub(64, sp, sp, totalFrameSize);
++                    masm.stp(64, fp, lr, SW64Address.createScaledImmediateAddress(sp, frameSize / wordSize));
++                } else {
++                    masm.stp(64, fp, lr, SW64Address.createPreIndexedImmediateAddress(sp, -2));
++                    if (frameSize < 1 << 12) {
++                        masm.sub(64, sp, sp, totalFrameSize - 2 * wordSize);
++                    } else {
++                        masm.mov(rscratch1, totalFrameSize - 2 * wordSize);
++                        masm.sub(64, sp, sp, rscratch1);
++                    }
++                }
++            }
++            if (ZapStackOnMethodEntry.getValue(crb.getOptions())) {
++                try (ScratchRegister sc = masm.getScratchRegister()) {
++                    Register scratch = sc.getRegister();
++                    int longSize = 8;
++                    masm.mov(64, scratch, sp);
++                    SW64Address address = SW64Address.createPostIndexedImmediateAddress(scratch, longSize);
++                    try (ScratchRegister sc2 = masm.getScratchRegister()) {
++                        Register value = sc2.getRegister();
++                        masm.mov(value, 0xBADDECAFFC0FFEEL);
++                        for (int i = 0; i < frameSize; i += longSize) {
++                            masm.str(64, value, address);
++                        }
++                    }
++
++                }
++            }
++            crb.blockComment("[code body]");
++        }
++
++        @Override
++        public void leave(CompilationResultBuilder crb) {
++            SW64MacroAssembler masm = (SW64MacroAssembler) crb.asm;
++            FrameMap frameMap = crb.frameMap;
++            final int totalFrameSize = frameMap.totalFrameSize();
++
++            crb.blockComment("[method epilogue]");
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                int wordSize = crb.target.arch.getWordSize();
++                Register rscratch1 = sc.getRegister();
++                final int frameSize = frameMap.frameSize();
++                assert totalFrameSize > 0;
++                if (frameSize < 1 << 9) {
++                    masm.ldp(64, fp, lr, SW64Address.createScaledImmediateAddress(sp, frameSize / wordSize));
++                    masm.add(64, sp, sp, totalFrameSize);
++                } else {
++                    if (frameSize < 1 << 12) {
++                        masm.add(64, sp, sp, totalFrameSize - 2 * wordSize);
++                    } else {
++                        masm.mov(rscratch1, totalFrameSize - 2 * wordSize);
++                        masm.add(64, sp, sp, rscratch1);
++                    }
++                    masm.ldp(64, fp, lr, SW64Address.createPostIndexedImmediateAddress(sp, 2));
++                }
++            }
++
++        }
++
++        @Override
++        public boolean hasFrame() {
++            return true;
++        }
++
++    }
++
++    @Override
++    protected Assembler createAssembler(FrameMap frameMap) {
++        return new SW64MacroAssembler(getTarget());
++    }
++
++    @Override
++    public CompilationResultBuilder newCompilationResultBuilder(LIRGenerationResult lirGenRen, FrameMap frameMap, CompilationResult compilationResult, CompilationResultBuilderFactory factory) {
++        HotSpotLIRGenerationResult gen = (HotSpotLIRGenerationResult) lirGenRen;
++        LIR lir = gen.getLIR();
++        assert gen.getDeoptimizationRescueSlot() == null || frameMap.frameNeedsAllocating() : "method that can deoptimize must have a frame";
++
++        Stub stub = gen.getStub();
++        Assembler masm = createAssembler(frameMap);
++        HotSpotFrameContext frameContext = new HotSpotFrameContext(stub != null);
++
++        DataBuilder dataBuilder = new HotSpotDataBuilder(getCodeCache().getTarget());
++        CompilationResultBuilder crb = factory.createBuilder(getCodeCache(), getForeignCalls(), frameMap, masm, dataBuilder, frameContext, lir.getOptions(), lir.getDebug(), compilationResult,
++                        Register.None);
++        crb.setTotalFrameSize(frameMap.totalFrameSize());
++        crb.setMaxInterpreterFrameSize(gen.getMaxInterpreterFrameSize());
++        StackSlot deoptimizationRescueSlot = gen.getDeoptimizationRescueSlot();
++        if (deoptimizationRescueSlot != null && stub == null) {
++            crb.compilationResult.setCustomStackAreaOffset(deoptimizationRescueSlot);
++        }
++
++        if (stub != null) {
++            EconomicSet<Register> destroyedCallerRegisters = gatherDestroyedCallerRegisters(lir);
++            updateStub(stub, destroyedCallerRegisters, gen.getCalleeSaveInfo(), frameMap);
++        }
++        return crb;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, LIR lir, ResolvedJavaMethod installedCodeOwner) {
++        SW64MacroAssembler masm = (SW64MacroAssembler) crb.asm;
++        FrameMap frameMap = crb.frameMap;
++        RegisterConfig regConfig = frameMap.getRegisterConfig();
++        Label verifiedStub = new Label();
++
++        emitCodePrefix(crb, installedCodeOwner, masm, regConfig, verifiedStub);
++        emitCodeBody(crb, lir, masm);
++        emitCodeSuffix(crb, masm, frameMap);
++    }
++
++    private void emitCodePrefix(CompilationResultBuilder crb, ResolvedJavaMethod installedCodeOwner, SW64MacroAssembler masm, RegisterConfig regConfig, Label verifiedStub) {
++        HotSpotProviders providers = getProviders();
++        if (installedCodeOwner != null && !isStatic(installedCodeOwner.getModifiers())) {
++            crb.recordMark(config.MARKID_UNVERIFIED_ENTRY);
++            CallingConvention cc = regConfig.getCallingConvention(HotSpotCallingConventionType.JavaCallee, null, new JavaType[]{providers.getMetaAccess().lookupJavaType(Object.class)}, this);
++            // See definition of IC_Klass in c1_LIRAssembler_sw64.cpp
++            // equal to scratch(1) careful!
++            Register inlineCacheKlass = SW64HotSpotRegisterConfig.inlineCacheRegister;
++            Register receiver = asRegister(cc.getArgument(0));
++            int transferSize = config.useCompressedClassPointers ? 4 : 8;
++            SW64Address klassAddress = masm.makeAddress(receiver, config.hubOffset, transferSize);
++
++            // Are r10 and r11 available scratch registers here? One would hope so.
++            Register klass = r10;
++            if (config.useCompressedClassPointers) {
++                masm.ldr(32, klass, klassAddress);
++                SW64HotSpotMove.decodeKlassPointer(crb, masm, klass, klass, config.getKlassEncoding(), config);
++            } else {
++                masm.ldr(64, klass, klassAddress);
++            }
++            masm.cmp(64, inlineCacheKlass, klass);
++            /*
++             * Conditional jumps have a much lower range than unconditional ones, which can be a
++             * problem because the miss handler could be out of range.
++             */
++            masm.branchConditionally(SW64Assembler.ConditionFlag.EQ, verifiedStub);
++            SW64Call.directJmp(crb, masm, getForeignCalls().lookupForeignCall(IC_MISS_HANDLER));
++        }
++        masm.align(config.codeEntryAlignment);
++        crb.recordMark(config.MARKID_OSR_ENTRY);
++        masm.bind(verifiedStub);
++        crb.recordMark(config.MARKID_VERIFIED_ENTRY);
++
++        if (GeneratePIC.getValue(crb.getOptions())) {
++            // Check for method state
++            HotSpotFrameContext frameContext = (HotSpotFrameContext) crb.frameContext;
++            if (!frameContext.isStub) {
++                crb.recordInlineDataInCodeWithNote(new HotSpotSentinelConstant(LIRKind.value(SW64Kind.QWORD), JavaKind.Long), HotSpotConstantLoadAction.MAKE_NOT_ENTRANT);
++                try (ScratchRegister sc = masm.getScratchRegister()) {
++                    Register scratch = sc.getRegister();
++                    masm.addressOf(scratch);
++                    masm.ldr(64, scratch, SW64Address.createBaseRegisterOnlyAddress(scratch));
++                    Label noCall = new Label();
++                    masm.cbz(64, scratch, noCall);
++                    SW64Call.directJmp(crb, masm, getForeignCalls().lookupForeignCall(WRONG_METHOD_HANDLER));
++                    masm.bind(noCall);
++                }
++            }
++        }
++    }
++
++    private static void emitCodeBody(CompilationResultBuilder crb, LIR lir, SW64MacroAssembler masm) {
++        emitInvalidatePlaceholder(crb, masm);
++        crb.emit(lir);
++    }
++
++    /**
++     * Insert a nop at the start of the prolog so we can patch in a branch if we need to invalidate
++     * the method later.
++     *
++     * @see "http://mail.openjdk.java.net/pipermail/sw64-port-dev/2013-September/000273.html"
++     */
++    public static void emitInvalidatePlaceholder(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        if (!GeneratePIC.getValue(crb.getOptions())) {
++            crb.blockComment("[nop for method invalidation]");
++            masm.nop();
++        }
++    }
++
++    private void emitCodeSuffix(CompilationResultBuilder crb, SW64MacroAssembler masm, FrameMap frameMap) {
++        HotSpotProviders providers = getProviders();
++        HotSpotFrameContext frameContext = (HotSpotFrameContext) crb.frameContext;
++        if (!frameContext.isStub) {
++            HotSpotForeignCallsProvider foreignCalls = providers.getForeignCalls();
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                Register scratch = sc.getRegister();
++                crb.recordMark(config.MARKID_EXCEPTION_HANDLER_ENTRY);
++                ForeignCallLinkage linkage = foreignCalls.lookupForeignCall(EXCEPTION_HANDLER);
++                Register helper = SW64Call.isNearCall(linkage) ? null : scratch;
++                SW64Call.directCall(crb, masm, linkage, helper, null);
++            }
++            crb.recordMark(config.MARKID_DEOPT_HANDLER_ENTRY);
++            ForeignCallLinkage linkage = foreignCalls.lookupForeignCall(DEOPTIMIZATION_HANDLER);
++            masm.adr(lr, 0); // Warning: the argument is an offset from the instruction!
++            SW64Call.directJmp(crb, masm, linkage);
++        } else {
++            // No need to emit the stubs for entries back into the method since
++            // it has no calls that can cause such "return" entries
++            assert !frameMap.accessesCallerFrame();
++        }
++    }
++
++    @Override
++    public RegisterAllocationConfig newRegisterAllocationConfig(RegisterConfig registerConfig, String[] allocationRestrictedTo) {
++        RegisterConfig registerConfigNonNull = registerConfig == null ? getCodeCache().getRegisterConfig() : registerConfig;
++        return new SW64HotSpotRegisterAllocationConfig(registerConfigNonNull, allocationRestrictedTo);
++    }
++
++    @Override
++    public EconomicSet<Register> translateToCallerRegisters(EconomicSet<Register> calleeRegisters) {
++        return calleeRegisters;
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotBackendFactory.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotBackendFactory.java
+new file mode 100644
+index 0000000000..d6dad90fd8
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotBackendFactory.java
+@@ -0,0 +1,231 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.common.InitTimer.timer;
++
++import java.util.ArrayList;
++import java.util.List;
++
++import org.graalvm.compiler.api.replacements.SnippetReflectionProvider;
++import org.graalvm.compiler.bytecode.BytecodeProvider;
++import org.graalvm.compiler.core.sw64.SW64AddressLoweringByUse;
++import org.graalvm.compiler.core.sw64.SW64LIRKindTool;
++import org.graalvm.compiler.core.sw64.SW64SuitesCreator;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.hotspot.HotSpotBackend;
++import org.graalvm.compiler.hotspot.HotSpotBackendFactory;
++import org.graalvm.compiler.hotspot.HotSpotGraalRuntimeProvider;
++import org.graalvm.compiler.hotspot.HotSpotReplacementsImpl;
++import org.graalvm.compiler.hotspot.meta.AddressLoweringHotSpotSuitesProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotConstantFieldProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotForeignCallsProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotGraalConstantFieldProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotGraphBuilderPlugins;
++import org.graalvm.compiler.hotspot.meta.HotSpotHostForeignCallsProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotLoweringProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotProviders;
++import org.graalvm.compiler.hotspot.meta.HotSpotRegisters;
++import org.graalvm.compiler.hotspot.meta.HotSpotRegistersProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotSnippetReflectionProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotStampProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotSuitesProvider;
++import org.graalvm.compiler.hotspot.word.HotSpotWordTypes;
++import org.graalvm.compiler.nodes.graphbuilderconf.GraphBuilderConfiguration.Plugins;
++import org.graalvm.compiler.nodes.spi.LoweringProvider;
++import org.graalvm.compiler.nodes.spi.Replacements;
++import org.graalvm.compiler.options.OptionValues;
++import org.graalvm.compiler.phases.Phase;
++import org.graalvm.compiler.phases.common.AddressLoweringByUsePhase;
++import org.graalvm.compiler.phases.schedule.SchedulePhase;
++import org.graalvm.compiler.phases.tiers.CompilerConfiguration;
++import org.graalvm.compiler.phases.util.Providers;
++import org.graalvm.compiler.replacements.sw64.SW64GraphBuilderPlugins;
++import org.graalvm.compiler.replacements.classfile.ClassfileBytecodeProvider;
++import org.graalvm.compiler.serviceprovider.ServiceProvider;
++import org.graalvm.compiler.word.WordTypes;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.code.Architecture;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.common.InitTimer;
++import jdk.vm.ci.hotspot.HotSpotCodeCacheProvider;
++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
++import jdk.vm.ci.hotspot.HotSpotMetaAccessProvider;
++import jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.runtime.JVMCIBackend;
++
++@ServiceProvider(HotSpotBackendFactory.class)
++public class SW64HotSpotBackendFactory implements HotSpotBackendFactory {
++
++    @Override
++    public String getName() {
++        return "community";
++    }
++
++    @Override
++    public Class<? extends Architecture> getArchitecture() {
++        return SW64.class;
++    }
++
++    @Override
++    @SuppressWarnings("try")
++    public HotSpotBackend createBackend(HotSpotGraalRuntimeProvider graalRuntime, CompilerConfiguration compilerConfiguration, HotSpotJVMCIRuntime jvmciRuntime, HotSpotBackend host) {
++        assert host == null;
++
++        JVMCIBackend jvmci = jvmciRuntime.getHostJVMCIBackend();
++        GraalHotSpotVMConfig config = graalRuntime.getVMConfig();
++        HotSpotProviders providers;
++        HotSpotRegistersProvider registers;
++        HotSpotCodeCacheProvider codeCache = (HotSpotCodeCacheProvider) jvmci.getCodeCache();
++        TargetDescription target = codeCache.getTarget();
++        HotSpotHostForeignCallsProvider foreignCalls;
++        Value[] nativeABICallerSaveRegisters;
++        HotSpotMetaAccessProvider metaAccess = (HotSpotMetaAccessProvider) jvmci.getMetaAccess();
++        HotSpotConstantReflectionProvider constantReflection = (HotSpotConstantReflectionProvider) jvmci.getConstantReflection();
++        HotSpotConstantFieldProvider constantFieldProvider = new HotSpotGraalConstantFieldProvider(config, metaAccess);
++        HotSpotLoweringProvider lowerer;
++        HotSpotSnippetReflectionProvider snippetReflection;
++        HotSpotReplacementsImpl replacements;
++        HotSpotSuitesProvider suites;
++        HotSpotWordTypes wordTypes;
++        Plugins plugins;
++        BytecodeProvider bytecodeProvider;
++        try (InitTimer t = timer("create providers")) {
++            try (InitTimer rt = timer("create HotSpotRegisters provider")) {
++                registers = createRegisters();
++            }
++            try (InitTimer rt = timer("create NativeABICallerSaveRegisters")) {
++                nativeABICallerSaveRegisters = createNativeABICallerSaveRegisters(config, codeCache.getRegisterConfig());
++            }
++            try (InitTimer rt = timer("create WordTypes")) {
++                wordTypes = new HotSpotWordTypes(metaAccess, target.wordJavaKind);
++            }
++            try (InitTimer rt = timer("create ForeignCalls provider")) {
++                foreignCalls = createForeignCalls(jvmciRuntime, graalRuntime, metaAccess, codeCache, wordTypes, nativeABICallerSaveRegisters);
++            }
++            try (InitTimer rt = timer("create Lowerer provider")) {
++                lowerer = createLowerer(graalRuntime, metaAccess, foreignCalls, registers, constantReflection, target);
++            }
++            HotSpotStampProvider stampProvider = new HotSpotStampProvider();
++            Providers p = new Providers(metaAccess, codeCache, constantReflection, constantFieldProvider, foreignCalls, lowerer, null, stampProvider);
++
++            try (InitTimer rt = timer("create SnippetReflection provider")) {
++                snippetReflection = createSnippetReflection(graalRuntime, constantReflection, wordTypes);
++            }
++            try (InitTimer rt = timer("create Bytecode provider")) {
++                bytecodeProvider = new ClassfileBytecodeProvider(metaAccess, snippetReflection);
++            }
++            try (InitTimer rt = timer("create Replacements provider")) {
++                replacements = createReplacements(graalRuntime.getOptions(), p, snippetReflection, bytecodeProvider);
++            }
++            try (InitTimer rt = timer("create GraphBuilderPhase plugins")) {
++                plugins = createGraphBuilderPlugins(compilerConfiguration, config, constantReflection, foreignCalls, lowerer, metaAccess, snippetReflection, replacements, wordTypes, stampProvider);
++                replacements.setGraphBuilderPlugins(plugins);
++            }
++            try (InitTimer rt = timer("create Suites provider")) {
++                suites = createSuites(config, graalRuntime, compilerConfiguration, plugins, replacements);
++            }
++            providers = new HotSpotProviders(metaAccess, codeCache, constantReflection, constantFieldProvider, foreignCalls, lowerer, replacements, suites, registers,
++                            snippetReflection, wordTypes,
++                            plugins);
++        }
++        try (InitTimer rt = timer("instantiate backend")) {
++            return createBackend(config, graalRuntime, providers);
++        }
++    }
++
++    protected Plugins createGraphBuilderPlugins(CompilerConfiguration compilerConfiguration, GraalHotSpotVMConfig config, HotSpotConstantReflectionProvider constantReflection,
++                    HotSpotHostForeignCallsProvider foreignCalls, LoweringProvider lowerer, HotSpotMetaAccessProvider metaAccess, HotSpotSnippetReflectionProvider snippetReflection,
++                    HotSpotReplacementsImpl replacements, HotSpotWordTypes wordTypes, HotSpotStampProvider stampProvider) {
++        Plugins plugins = HotSpotGraphBuilderPlugins.create(compilerConfiguration, config, wordTypes, metaAccess, constantReflection, snippetReflection, foreignCalls, lowerer, stampProvider,
++                        replacements);
++        SW64GraphBuilderPlugins.register(plugins, replacements.getDefaultReplacementBytecodeProvider(), false);
++        return plugins;
++    }
++
++    protected SW64HotSpotBackend createBackend(GraalHotSpotVMConfig config, HotSpotGraalRuntimeProvider runtime, HotSpotProviders providers) {
++        return new SW64HotSpotBackend(config, runtime, providers);
++    }
++
++    protected HotSpotRegistersProvider createRegisters() {
++        return new HotSpotRegisters(SW64HotSpotRegisterConfig.threadRegister, SW64HotSpotRegisterConfig.heapBaseRegister, sp);
++    }
++
++    protected HotSpotReplacementsImpl createReplacements(OptionValues options, Providers p, SnippetReflectionProvider snippetReflection, BytecodeProvider bytecodeProvider) {
++        return new HotSpotReplacementsImpl(options, p, snippetReflection, bytecodeProvider, p.getCodeCache().getTarget());
++    }
++
++    protected HotSpotHostForeignCallsProvider createForeignCalls(HotSpotJVMCIRuntime jvmciRuntime, HotSpotGraalRuntimeProvider runtime, HotSpotMetaAccessProvider metaAccess,
++                    HotSpotCodeCacheProvider codeCache, WordTypes wordTypes, Value[] nativeABICallerSaveRegisters) {
++        return new SW64HotSpotForeignCallsProvider(jvmciRuntime, runtime, metaAccess, codeCache, wordTypes, nativeABICallerSaveRegisters);
++    }
++
++    protected HotSpotSuitesProvider createSuites(GraalHotSpotVMConfig config, HotSpotGraalRuntimeProvider runtime, CompilerConfiguration compilerConfiguration, Plugins plugins,
++                    @SuppressWarnings("unused") Replacements replacements) {
++        SW64SuitesCreator suitesCreator = new SW64SuitesCreator(compilerConfiguration, plugins, SchedulePhase.class);
++        Phase addressLoweringPhase = new AddressLoweringByUsePhase(new SW64AddressLoweringByUse(new SW64LIRKindTool()));
++        return new AddressLoweringHotSpotSuitesProvider(suitesCreator, config, runtime, addressLoweringPhase);
++    }
++
++    protected HotSpotSnippetReflectionProvider createSnippetReflection(HotSpotGraalRuntimeProvider runtime, HotSpotConstantReflectionProvider constantReflection, WordTypes wordTypes) {
++        return new HotSpotSnippetReflectionProvider(runtime, constantReflection, wordTypes);
++    }
++
++    protected HotSpotLoweringProvider createLowerer(HotSpotGraalRuntimeProvider runtime, HotSpotMetaAccessProvider metaAccess, HotSpotForeignCallsProvider foreignCalls,
++                    HotSpotRegistersProvider registers, HotSpotConstantReflectionProvider constantReflection, TargetDescription target) {
++        return new SW64HotSpotLoweringProvider(runtime, metaAccess, foreignCalls, registers, constantReflection, target);
++    }
++
++    protected static Value[] createNativeABICallerSaveRegisters(@SuppressWarnings("unused") GraalHotSpotVMConfig config, RegisterConfig regConfig) {
++        List<Register> callerSave = new ArrayList<>(regConfig.getAllocatableRegisters().asList());
++        callerSave.remove(SW64.r19);
++        callerSave.remove(SW64.r20);
++        callerSave.remove(SW64.r21);
++        callerSave.remove(SW64.r22);
++        callerSave.remove(SW64.r23);
++        callerSave.remove(SW64.r24);
++        callerSave.remove(SW64.r25);
++        callerSave.remove(SW64.r26);
++        callerSave.remove(SW64.r27);
++        callerSave.remove(SW64.r28);
++        Value[] nativeABICallerSaveRegisters = new Value[callerSave.size()];
++        for (int i = 0; i < callerSave.size(); i++) {
++            nativeABICallerSaveRegisters[i] = callerSave.get(i).asValue();
++        }
++        return nativeABICallerSaveRegisters;
++    }
++
++    @Override
++    public String toString() {
++        return "SW64";
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotCRuntimeCallEpilogueOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotCRuntimeCallEpilogueOp.java
+new file mode 100644
+index 0000000000..35c2204526
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotCRuntimeCallEpilogueOp.java
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.zr;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++
++@Opcode("CRUNTIME_CALL_EPILOGUE")
++public class SW64HotSpotCRuntimeCallEpilogueOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64HotSpotCRuntimeCallEpilogueOp> TYPE = LIRInstructionClass.create(SW64HotSpotCRuntimeCallEpilogueOp.class);
++
++    private final int threadLastJavaSpOffset;
++    private final int threadLastJavaPcOffset;
++    private final Register thread;
++    @SuppressWarnings("unused") private final Label label;
++
++    public SW64HotSpotCRuntimeCallEpilogueOp(int threadLastJavaSpOffset, int threadLastJavaPcOffset, Register thread, Label label) {
++        super(TYPE);
++        this.threadLastJavaSpOffset = threadLastJavaSpOffset;
++        this.threadLastJavaPcOffset = threadLastJavaPcOffset;
++        this.thread = thread;
++        this.label = label;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        // Reset last Java frame:
++        masm.str(64, zr, masm.makeAddress(thread, threadLastJavaSpOffset, 8));
++        masm.str(64, zr, masm.makeAddress(thread, threadLastJavaPcOffset, 8));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotCRuntimeCallPrologueOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotCRuntimeCallPrologueOp.java
+new file mode 100644
+index 0000000000..873d4b7028
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotCRuntimeCallPrologueOp.java
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++
++@Opcode("CRUNTIME_CALL_PROLOGUE")
++public class SW64HotSpotCRuntimeCallPrologueOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64HotSpotCRuntimeCallPrologueOp> TYPE = LIRInstructionClass.create(SW64HotSpotCRuntimeCallPrologueOp.class);
++
++    private final int threadLastJavaSpOffset;
++    private final int threadLastJavaPcOffset;
++    private final Register thread;
++    @Temp({REG}) protected AllocatableValue scratch;
++    private final Label label;
++
++    public SW64HotSpotCRuntimeCallPrologueOp(int threadLastJavaSpOffset, int threadLastJavaPcOffset, Register thread, AllocatableValue scratch, Label label) {
++        super(TYPE);
++        this.threadLastJavaSpOffset = threadLastJavaSpOffset;
++        this.threadLastJavaPcOffset = threadLastJavaPcOffset;
++        this.thread = thread;
++        this.scratch = scratch;
++        this.label = label;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        // Save last Java frame.
++        // We cannot save the SP directly so use a temporary register.
++        Register scratchRegister = asRegister(scratch);
++        masm.movx(scratchRegister, sp);
++        masm.str(64, scratchRegister, masm.makeAddress(thread, threadLastJavaSpOffset, 8));
++
++        // Get the current PC. Use a label to patch the return address.
++        masm.adr(scratchRegister, label);
++        masm.str(64, scratchRegister, masm.makeAddress(thread, threadLastJavaPcOffset, 8));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotConstantRetrievalOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotConstantRetrievalOp.java
+new file mode 100644
+index 0000000000..7cb706c403
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotConstantRetrievalOp.java
+@@ -0,0 +1,124 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import java.util.ArrayList;
++import java.util.EnumSet;
++
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Constant;
++import jdk.vm.ci.meta.Value;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.spi.ForeignCallLinkage;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.LIRValueUtil;
++import org.graalvm.compiler.lir.ValueProcedure;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++public final class SW64HotSpotConstantRetrievalOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64HotSpotConstantRetrievalOp> TYPE = LIRInstructionClass.create(SW64HotSpotConstantRetrievalOp.class);
++
++    @Def protected AllocatableValue result;
++    protected final Constant[] constants;
++    @Alive protected AllocatableValue[] constantDescriptions;
++    @Temp protected AllocatableValue[] gotSlotOffsetParameters;
++    @Temp protected AllocatableValue[] descriptionParameters;
++    @Temp protected Value[] callTemps;
++    @State protected LIRFrameState frameState;
++    private final ForeignCallLinkage callLinkage;
++    private final Object[] notes;
++
++    private class CollectTemporaries implements ValueProcedure {
++        ArrayList<Value> values = new ArrayList<>();
++
++        CollectTemporaries() {
++            forEachTemp(this);
++        }
++
++        public Value[] asArray() {
++            Value[] copy = new Value[values.size()];
++            return values.toArray(copy);
++        }
++
++        @Override
++        public Value doValue(Value value, OperandMode mode, EnumSet<OperandFlag> flags) {
++            values.add(value);
++            return value;
++        }
++    }
++
++    public SW64HotSpotConstantRetrievalOp(Constant[] constants, AllocatableValue[] constantDescriptions, LIRFrameState frameState, ForeignCallLinkage callLinkage, Object[] notes) {
++        super(TYPE);
++        this.constantDescriptions = constantDescriptions;
++        this.constants = constants;
++        this.frameState = frameState;
++        this.notes = notes;
++        assert constants.length == notes.length;
++
++        // call arguments
++        CallingConvention callingConvention = callLinkage.getOutgoingCallingConvention();
++        this.gotSlotOffsetParameters = new AllocatableValue[constants.length];
++        int argIndex = 0;
++        for (int i = 0; i < constants.length; i++, argIndex++) {
++            this.gotSlotOffsetParameters[i] = callingConvention.getArgument(argIndex);
++        }
++        this.descriptionParameters = new AllocatableValue[constantDescriptions.length];
++        for (int i = 0; i < constantDescriptions.length; i++, argIndex++) {
++            this.descriptionParameters[i] = callingConvention.getArgument(argIndex);
++        }
++        this.result = callingConvention.getReturn();
++
++        this.callLinkage = callLinkage;
++
++        // compute registers that are killed by the stub, but are not used as other temps.
++        this.callTemps = new Value[0];
++        this.callTemps = LIRValueUtil.subtractRegisters(callLinkage.getTemporaries(), new CollectTemporaries().asArray());
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        // metadata_adr
++        for (int i = 0; i < constants.length; i++) {
++            crb.recordInlineDataInCodeWithNote(constants[i], notes[i]);
++            masm.addressOf(asRegister(gotSlotOffsetParameters[i]));
++        }
++
++        for (int i = 0; i < constantDescriptions.length; i++) {
++            masm.mov(64, asRegister(descriptionParameters[i]), asRegister(constantDescriptions[i]));
++        }
++
++        final int before = masm.position();
++        masm.bl(before);
++        final int after = masm.position();
++        crb.recordDirectCall(before, after, callLinkage, frameState);
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDeoptimizeCallerOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDeoptimizeCallerOp.java
+new file mode 100644
+index 0000000000..602721b9ab
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDeoptimizeCallerOp.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static org.graalvm.compiler.hotspot.HotSpotHostBackend.UNCOMMON_TRAP_HANDLER;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64Call;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++/**
++ * Removes the current frame and tail calls the uncommon trap routine.
++ */
++@Opcode("DEOPT_CALLER")
++public class SW64HotSpotDeoptimizeCallerOp extends SW64HotSpotEpilogueOp {
++    public static final LIRInstructionClass<SW64HotSpotDeoptimizeCallerOp> TYPE = LIRInstructionClass.create(SW64HotSpotDeoptimizeCallerOp.class);
++
++    public SW64HotSpotDeoptimizeCallerOp(GraalHotSpotVMConfig config) {
++        super(TYPE, config);
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        leaveFrame(crb, masm, /* emitSafepoint */false, false);
++        SW64Call.directJmp(crb, masm, crb.foreignCalls.lookupForeignCall(UNCOMMON_TRAP_HANDLER));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDeoptimizeOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDeoptimizeOp.java
+new file mode 100644
+index 0000000000..5e9a74b98c
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDeoptimizeOp.java
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static org.graalvm.compiler.hotspot.HotSpotHostBackend.UNCOMMON_TRAP_HANDLER;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.StandardOp.BlockEndOp;
++import org.graalvm.compiler.lir.sw64.SW64BlockEndOp;
++import org.graalvm.compiler.lir.sw64.SW64Call;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++@Opcode("DEOPT")
++public class SW64HotSpotDeoptimizeOp extends SW64BlockEndOp implements BlockEndOp {
++    public static final LIRInstructionClass<SW64HotSpotDeoptimizeOp> TYPE = LIRInstructionClass.create(SW64HotSpotDeoptimizeOp.class);
++
++    @State private LIRFrameState info;
++
++    public SW64HotSpotDeoptimizeOp(LIRFrameState info) {
++        super(TYPE);
++        this.info = info;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        try (SW64MacroAssembler.ScratchRegister scratch = masm.getScratchRegister()) {
++            SW64Call.directCall(crb, masm, crb.foreignCalls.lookupForeignCall(UNCOMMON_TRAP_HANDLER), scratch.getRegister(), info, null);
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDirectStaticCallOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDirectStaticCallOp.java
+new file mode 100644
+index 0000000000..24a736a858
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDirectStaticCallOp.java
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.inlineCacheRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64Call.DirectCallOp;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++import org.graalvm.compiler.nodes.CallTargetNode.InvokeKind;
++
++import jdk.vm.ci.meta.ResolvedJavaMethod;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * A direct call that complies with the conventions for such calls in HotSpot. In particular, for
++ * calls using an inline cache, a MOVE instruction is emitted just prior to the aligned direct call.
++ */
++@Opcode("CALL_DIRECT")
++final class SW64HotSpotDirectStaticCallOp extends DirectCallOp {
++
++    public static final LIRInstructionClass<SW64HotSpotDirectStaticCallOp> TYPE = LIRInstructionClass.create(SW64HotSpotDirectStaticCallOp.class);
++
++    private final InvokeKind invokeKind;
++    private final GraalHotSpotVMConfig config;
++
++    SW64HotSpotDirectStaticCallOp(ResolvedJavaMethod target, Value result, Value[] parameters, Value[] temps, LIRFrameState state, InvokeKind invokeKind, GraalHotSpotVMConfig config) {
++        super(TYPE, target, result, parameters, temps, state);
++        assert invokeKind.isDirect();
++        this.invokeKind = invokeKind;
++        this.config = config;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        // The mark for an invocation that uses an inline cache must be placed at the
++        // instruction that loads the Klass from the inline cache.
++        // For the first invocation this is set to a bitpattern that is guaranteed to never be a
++        // valid object which causes the called function to call a handler that installs the
++        // correct inline cache value here.
++        crb.recordMark(invokeKind == InvokeKind.Static ? config.MARKID_INVOKESTATIC : config.MARKID_INVOKESPECIAL);
++        masm.movNativeAddress(inlineCacheRegister, config.nonOopBits);
++        super.emitCode(crb, masm);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDirectVirtualCallOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDirectVirtualCallOp.java
+new file mode 100644
+index 0000000000..86102a0908
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotDirectVirtualCallOp.java
+@@ -0,0 +1,71 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.inlineCacheRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64Call.DirectCallOp;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++import org.graalvm.compiler.nodes.CallTargetNode.InvokeKind;
++
++import jdk.vm.ci.meta.ResolvedJavaMethod;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * A direct call that complies with the conventions for such calls in HotSpot. In particular, for
++ * calls using an inline cache, a MOVE instruction is emitted just prior to the aligned direct call.
++ */
++@Opcode("CALL_DIRECT")
++final class SW64HotSpotDirectVirtualCallOp extends DirectCallOp {
++
++    public static final LIRInstructionClass<SW64HotSpotDirectVirtualCallOp> TYPE = LIRInstructionClass.create(SW64HotSpotDirectVirtualCallOp.class);
++
++    private final InvokeKind invokeKind;
++    private final GraalHotSpotVMConfig config;
++
++    SW64HotSpotDirectVirtualCallOp(ResolvedJavaMethod target, Value result, Value[] parameters, Value[] temps, LIRFrameState state, InvokeKind invokeKind, GraalHotSpotVMConfig config) {
++        super(TYPE, target, result, parameters, temps, state);
++        assert invokeKind.isIndirect();
++        this.invokeKind = invokeKind;
++        this.config = config;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        // The mark for an invocation that uses an inline cache must be placed at the
++        // instruction that loads the Klass from the inline cache.
++        // For the first invocation this is set to a bitpattern that is guaranteed to never be a
++        // valid object which causes the called function to call a handler that installs the
++        // correct inline cache value here.
++        crb.recordMark(invokeKind == InvokeKind.Virtual ? config.MARKID_INVOKEVIRTUAL : config.MARKID_INVOKEINTERFACE);
++        masm.movNativeAddress(inlineCacheRegister, config.nonOopBits);
++        super.emitCode(crb, masm);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotEpilogueOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotEpilogueOp.java
+new file mode 100644
+index 0000000000..eb486378d9
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotEpilogueOp.java
+@@ -0,0 +1,103 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.lr;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.fp;
++import static org.graalvm.compiler.hotspot.HotSpotHostBackend.ENABLE_STACK_RESERVED_ZONE;
++import static org.graalvm.compiler.hotspot.HotSpotHostBackend.THROW_DELAYED_STACKOVERFLOW_ERROR;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.ScratchRegister;
++import org.graalvm.compiler.core.common.spi.ForeignCallLinkage;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.hotspot.meta.HotSpotForeignCallsProvider;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.sw64.SW64BlockEndOp;
++import org.graalvm.compiler.lir.sw64.SW64Call;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterValue;
++
++/**
++ * Superclass for operations that leave a method's frame.
++ */
++abstract class SW64HotSpotEpilogueOp extends SW64BlockEndOp {
++
++    private final GraalHotSpotVMConfig config;
++    private final Register thread;
++
++    protected SW64HotSpotEpilogueOp(LIRInstructionClass<? extends SW64HotSpotEpilogueOp> c, GraalHotSpotVMConfig config, Register thread) {
++        super(c);
++        this.config = config;
++        this.thread = thread;
++    }
++
++    protected SW64HotSpotEpilogueOp(LIRInstructionClass<? extends SW64HotSpotEpilogueOp> c, GraalHotSpotVMConfig config) {
++        super(c);
++        this.config = config;
++        this.thread = null; // no safepoint
++    }
++
++    protected void leaveFrame(CompilationResultBuilder crb, SW64MacroAssembler masm, boolean emitSafepoint, boolean requiresReservedStackAccessCheck) {
++        assert crb.frameContext != null : "We never elide frames in sw64";
++        crb.frameContext.leave(crb);
++        if (requiresReservedStackAccessCheck) {
++            HotSpotForeignCallsProvider foreignCalls = (HotSpotForeignCallsProvider) crb.foreignCalls;
++            Label noReserved = new Label();
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                Register scratch = sc.getRegister();
++                masm.ldr(64, scratch, masm.makeAddress(thread, config.javaThreadReservedStackActivationOffset, 8));
++                masm.subs(64, zr, sp, scratch);
++            }
++            masm.branchConditionally(SW64Assembler.ConditionFlag.LO, noReserved);
++            ForeignCallLinkage enableStackReservedZone = foreignCalls.lookupForeignCall(ENABLE_STACK_RESERVED_ZONE);
++            CallingConvention cc = enableStackReservedZone.getOutgoingCallingConvention();
++            assert cc.getArgumentCount() == 1;
++            Register arg0 = ((RegisterValue) cc.getArgument(0)).getRegister();
++            masm.mov(64, arg0, thread);
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                masm.stp(64, fp, lr, SW64Address.createPreIndexedImmediateAddress(sp, -2));
++                SW64Call.directCall(crb, masm, enableStackReservedZone, sc.getRegister(), null);
++                masm.ldp(64, fp, lr, SW64Address.createPostIndexedImmediateAddress(sp, 2));
++            }
++            SW64Call.directJmp(crb, masm, foreignCalls.lookupForeignCall(THROW_DELAYED_STACKOVERFLOW_ERROR));
++            masm.bind(noReserved);
++        }
++        if (emitSafepoint) {
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                Register scratch = sc.getRegister();
++                SW64HotSpotSafepointOp.emitCode(crb, masm, config, true, thread, scratch, null);
++            }
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotForeignCallsProvider.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotForeignCallsProvider.java
+new file mode 100644
+index 0000000000..ac5ff01872
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotForeignCallsProvider.java
+@@ -0,0 +1,97 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.r0;
++import static jdk.vm.ci.sw64.SW64.r3;
++import static jdk.vm.ci.hotspot.HotSpotCallingConventionType.NativeCall;
++import static jdk.vm.ci.meta.Value.ILLEGAL;
++import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.JUMP_ADDRESS;
++import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.RegisterEffect.PRESERVES_REGISTERS;
++import static org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage.Transition.LEAF;
++import static org.graalvm.compiler.hotspot.replacements.CRC32CSubstitutions.UPDATE_BYTES_CRC32C;
++import static org.graalvm.compiler.hotspot.replacements.CRC32Substitutions.UPDATE_BYTES_CRC32;
++import static jdk.internal.vm.compiler.word.LocationIdentity.any;
++
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.hotspot.HotSpotBackend;
++import org.graalvm.compiler.hotspot.HotSpotForeignCallLinkageImpl;
++import org.graalvm.compiler.hotspot.HotSpotGraalRuntimeProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotHostForeignCallsProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotProviders;
++import org.graalvm.compiler.options.OptionValues;
++import org.graalvm.compiler.word.WordTypes;
++
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.CodeCacheProvider;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.hotspot.HotSpotJVMCIRuntime;
++import jdk.vm.ci.meta.MetaAccessProvider;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++
++public class SW64HotSpotForeignCallsProvider extends HotSpotHostForeignCallsProvider {
++
++    private final Value[] nativeABICallerSaveRegisters;
++
++    public SW64HotSpotForeignCallsProvider(HotSpotJVMCIRuntime jvmciRuntime, HotSpotGraalRuntimeProvider runtime, MetaAccessProvider metaAccess, CodeCacheProvider codeCache,
++                    WordTypes wordTypes, Value[] nativeABICallerSaveRegisters) {
++        super(jvmciRuntime, runtime, metaAccess, codeCache, wordTypes);
++        this.nativeABICallerSaveRegisters = nativeABICallerSaveRegisters;
++    }
++
++    @Override
++    public void initialize(HotSpotProviders providers, OptionValues options) {
++        GraalHotSpotVMConfig config = runtime.getVMConfig();
++        TargetDescription target = providers.getCodeCache().getTarget();
++        PlatformKind word = target.arch.getWordKind();
++
++        // The calling convention for the exception handler stub is (only?) defined in
++        // TemplateInterpreterGenerator::generate_throw_exception()
++        RegisterValue exception = r0.asValue(LIRKind.reference(word));
++        RegisterValue exceptionPc = r3.asValue(LIRKind.value(word));
++        CallingConvention exceptionCc = new CallingConvention(0, ILLEGAL, exception, exceptionPc);
++        register(new HotSpotForeignCallLinkageImpl(HotSpotBackend.EXCEPTION_HANDLER, 0L, PRESERVES_REGISTERS, LEAF, exceptionCc, null, NOT_REEXECUTABLE, any()));
++        register(new HotSpotForeignCallLinkageImpl(HotSpotBackend.EXCEPTION_HANDLER_IN_CALLER, JUMP_ADDRESS, PRESERVES_REGISTERS, LEAF, exceptionCc, null, NOT_REEXECUTABLE, any()));
++
++        // These stubs do callee saving
++        if (config.useCRC32Intrinsics) {
++            registerForeignCall(UPDATE_BYTES_CRC32, config.updateBytesCRC32Stub, NativeCall, PRESERVES_REGISTERS, LEAF, NOT_REEXECUTABLE, any());
++        }
++        if (config.useCRC32CIntrinsics) {
++            registerForeignCall(UPDATE_BYTES_CRC32C, config.updateBytesCRC32C, NativeCall, PRESERVES_REGISTERS, LEAF, NOT_REEXECUTABLE, any());
++        }
++
++        super.initialize(providers, options);
++    }
++
++    @Override
++    public Value[] getNativeABICallerSaveRegisters() {
++        return nativeABICallerSaveRegisters;
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotJumpToExceptionHandlerInCallerOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotJumpToExceptionHandlerInCallerOp.java
+new file mode 100644
+index 0000000000..085c720f13
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotJumpToExceptionHandlerInCallerOp.java
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.fp;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.ScratchRegister;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++import org.graalvm.compiler.serviceprovider.GraalServices;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++
++/**
++ * Sets up the arguments for an exception handler in the callers frame, removes the current frame
++ * and jumps to the handler.
++ */
++@Opcode("JUMP_TO_EXCEPTION_HANDLER_IN_CALLER")
++public class SW64HotSpotJumpToExceptionHandlerInCallerOp extends SW64HotSpotEpilogueOp {
++
++    public static final LIRInstructionClass<SW64HotSpotJumpToExceptionHandlerInCallerOp> TYPE = LIRInstructionClass.create(SW64HotSpotJumpToExceptionHandlerInCallerOp.class);
++
++    @Use(REG) private AllocatableValue handlerInCallerPc;
++    @Use(REG) private AllocatableValue exception;
++    @Use(REG) private AllocatableValue exceptionPc;
++    private final Register thread;
++    private final int isMethodHandleReturnOffset;
++
++    public SW64HotSpotJumpToExceptionHandlerInCallerOp(AllocatableValue handlerInCallerPc, AllocatableValue exception, AllocatableValue exceptionPc, int isMethodHandleReturnOffset,
++                    Register thread, GraalHotSpotVMConfig config) {
++        super(TYPE, config);
++        this.handlerInCallerPc = handlerInCallerPc;
++        this.exception = exception;
++        this.exceptionPc = exceptionPc;
++        this.isMethodHandleReturnOffset = isMethodHandleReturnOffset;
++        this.thread = thread;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        leaveFrame(crb, masm, /* emitSafepoint */false, false);
++
++        if (GraalServices.JAVA_SPECIFICATION_VERSION < 8) {
++            // Restore sp from fp if the exception PC is a method handle call site.
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                Register scratch = sc.getRegister();
++                final boolean allowOverwrite = false;
++                SW64Address address = masm.makeAddress(thread, isMethodHandleReturnOffset, scratch, 4, allowOverwrite);
++                masm.ldr(32, scratch, address);
++                Label noRestore = new Label();
++                masm.cbz(32, scratch, noRestore);
++                masm.mov(64, sp, fp);
++                masm.bind(noRestore);
++            }
++        }
++
++        masm.jmp(asRegister(handlerInCallerPc));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLIRGenerator.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLIRGenerator.java
+new file mode 100644
+index 0000000000..c91750d67b
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLIRGenerator.java
+@@ -0,0 +1,550 @@
++/*
++ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static org.graalvm.compiler.core.common.GraalOptions.GeneratePIC;
++import static org.graalvm.compiler.hotspot.HotSpotBackend.INITIALIZE_KLASS_BY_SYMBOL;
++import static org.graalvm.compiler.hotspot.HotSpotBackend.RESOLVE_DYNAMIC_INVOKE;
++import static org.graalvm.compiler.hotspot.HotSpotBackend.RESOLVE_KLASS_BY_SYMBOL;
++import static org.graalvm.compiler.hotspot.HotSpotBackend.RESOLVE_METHOD_BY_SYMBOL_AND_LOAD_COUNTERS;
++import static org.graalvm.compiler.hotspot.HotSpotBackend.RESOLVE_STRING_BY_SYMBOL;
++import static org.graalvm.compiler.hotspot.meta.HotSpotConstantLoadAction.INITIALIZE;
++import static org.graalvm.compiler.hotspot.meta.HotSpotConstantLoadAction.LOAD_COUNTERS;
++import static org.graalvm.compiler.hotspot.meta.HotSpotConstantLoadAction.RESOLVE;
++import static org.graalvm.compiler.lir.LIRValueUtil.asConstant;
++import static org.graalvm.compiler.lir.LIRValueUtil.isConstantValue;
++
++import java.util.function.Function;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ConditionFlag;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.PrefetchMode;
++import org.graalvm.compiler.core.sw64.SW64ArithmeticLIRGenerator;
++import org.graalvm.compiler.core.sw64.SW64LIRGenerator;
++import org.graalvm.compiler.core.sw64.SW64LIRKindTool;
++import org.graalvm.compiler.core.common.CompressEncoding;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.calc.Condition;
++import org.graalvm.compiler.core.common.spi.ForeignCallDescriptor;
++import org.graalvm.compiler.core.common.spi.ForeignCallLinkage;
++import org.graalvm.compiler.core.common.spi.LIRKindTool;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.hotspot.HotSpotBackend;
++import org.graalvm.compiler.hotspot.HotSpotDebugInfoBuilder;
++import org.graalvm.compiler.hotspot.HotSpotForeignCallLinkage;
++import org.graalvm.compiler.hotspot.HotSpotLIRGenerationResult;
++import org.graalvm.compiler.hotspot.HotSpotLIRGenerator;
++import org.graalvm.compiler.hotspot.HotSpotLockStack;
++import org.graalvm.compiler.hotspot.meta.HotSpotConstantLoadAction;
++import org.graalvm.compiler.hotspot.meta.HotSpotProviders;
++import org.graalvm.compiler.hotspot.meta.HotSpotRegistersProvider;
++import org.graalvm.compiler.hotspot.stubs.Stub;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstruction;
++import org.graalvm.compiler.lir.LabelRef;
++import org.graalvm.compiler.lir.StandardOp.SaveRegistersOp;
++import org.graalvm.compiler.lir.SwitchStrategy;
++import org.graalvm.compiler.lir.Variable;
++import org.graalvm.compiler.lir.VirtualStackSlot;
++import org.graalvm.compiler.lir.sw64.SW64AddressValue;
++import org.graalvm.compiler.lir.sw64.SW64CCall;
++import org.graalvm.compiler.lir.sw64.SW64Call;
++import org.graalvm.compiler.lir.sw64.SW64ControlFlow.StrategySwitchOp;
++import org.graalvm.compiler.lir.sw64.SW64FrameMapBuilder;
++import org.graalvm.compiler.lir.sw64.SW64Move;
++import org.graalvm.compiler.lir.sw64.SW64Move.StoreOp;
++import org.graalvm.compiler.lir.sw64.SW64PrefetchOp;
++import org.graalvm.compiler.lir.sw64.SW64RestoreRegistersOp;
++import org.graalvm.compiler.lir.sw64.SW64SaveRegistersOp;
++import org.graalvm.compiler.lir.gen.LIRGenerationResult;
++import org.graalvm.compiler.options.OptionValues;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.hotspot.HotSpotCompressedNullConstant;
++import jdk.vm.ci.hotspot.HotSpotMetaspaceConstant;
++import jdk.vm.ci.hotspot.HotSpotObjectConstant;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Constant;
++import jdk.vm.ci.meta.DeoptimizationAction;
++import jdk.vm.ci.meta.DeoptimizationReason;
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.SpeculationLog;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * LIR generator specialized for SW64 HotSpot.
++ */
++public class SW64HotSpotLIRGenerator extends SW64LIRGenerator implements HotSpotLIRGenerator {
++
++    final GraalHotSpotVMConfig config;
++    private HotSpotDebugInfoBuilder debugInfoBuilder;
++
++    protected SW64HotSpotLIRGenerator(HotSpotProviders providers, GraalHotSpotVMConfig config, LIRGenerationResult lirGenRes) {
++        this(new SW64LIRKindTool(), new SW64ArithmeticLIRGenerator(), new SW64HotSpotMoveFactory(), providers, config, lirGenRes);
++    }
++
++    protected SW64HotSpotLIRGenerator(LIRKindTool lirKindTool, SW64ArithmeticLIRGenerator arithmeticLIRGen, MoveFactory moveFactory, HotSpotProviders providers, GraalHotSpotVMConfig config,
++                    LIRGenerationResult lirGenRes) {
++        super(lirKindTool, arithmeticLIRGen, moveFactory, providers, lirGenRes);
++        this.config = config;
++    }
++
++    @Override
++    public HotSpotProviders getProviders() {
++        return (HotSpotProviders) super.getProviders();
++    }
++
++    @Override
++    public boolean needOnlyOopMaps() {
++        // Stubs only need oop maps
++        return getResult().getStub() != null;
++    }
++
++    private LIRFrameState currentRuntimeCallInfo;
++
++    @Override
++    protected void emitForeignCallOp(ForeignCallLinkage linkage, Value result, Value[] arguments, Value[] temps, LIRFrameState info) {
++        currentRuntimeCallInfo = info;
++        if (SW64Call.isNearCall(linkage)) {
++            append(new SW64Call.DirectNearForeignCallOp(linkage, result, arguments, temps, info, label));
++        } else {
++            append(new SW64Call.DirectFarForeignCallOp(linkage, result, arguments, temps, info, label));
++        }
++    }
++
++    @Override
++    public void emitTailcall(Value[] args, Value address) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public void emitCCall(long address, CallingConvention nativeCallingConvention, Value[] args) {
++        Value[] argLocations = new Value[args.length];
++        getResult().getFrameMapBuilder().callsMethod(nativeCallingConvention);
++        for (int i = 0; i < args.length; i++) {
++            Value arg = args[i];
++            AllocatableValue loc = nativeCallingConvention.getArgument(i);
++            emitMove(loc, arg);
++            argLocations[i] = loc;
++        }
++        Value ptr = emitLoadConstant(LIRKind.value(SW64Kind.QWORD), JavaConstant.forLong(address));
++        append(new SW64CCall(nativeCallingConvention.getReturn(), ptr, argLocations));
++    }
++
++    /**
++     * @param savedRegisters the registers saved by this operation which may be subject to pruning
++     * @param savedRegisterLocations the slots to which the registers are saved
++     * @param supportsRemove determines if registers can be pruned
++     */
++    protected SW64SaveRegistersOp emitSaveRegisters(Register[] savedRegisters, AllocatableValue[] savedRegisterLocations, boolean supportsRemove) {
++        SW64SaveRegistersOp save = new SW64SaveRegistersOp(savedRegisters, savedRegisterLocations, supportsRemove);
++        append(save);
++        return save;
++    }
++
++    /**
++     * Allocate a stack slot for saving a register.
++     */
++    protected VirtualStackSlot allocateSaveRegisterLocation(Register register) {
++        PlatformKind kind = target().arch.getLargestStorableKind(register.getRegisterCategory());
++        if (kind.getVectorLength() > 1) {
++            // we don't use vector registers, so there is no need to save them
++            kind = SW64Kind.QWORD;
++        }
++        return getResult().getFrameMapBuilder().allocateSpillSlot(LIRKind.value(kind));
++    }
++
++    /**
++     * Adds a node to the graph that saves all allocatable registers to the stack.
++     *
++     * @param supportsRemove determines if registers can be pruned
++     * @return the register save node
++     */
++    private SW64SaveRegistersOp emitSaveAllRegisters(Register[] savedRegisters, boolean supportsRemove) {
++        AllocatableValue[] savedRegisterLocations = new AllocatableValue[savedRegisters.length];
++        for (int i = 0; i < savedRegisters.length; i++) {
++            savedRegisterLocations[i] = allocateSaveRegisterLocation(savedRegisters[i]);
++        }
++        return emitSaveRegisters(savedRegisters, savedRegisterLocations, supportsRemove);
++    }
++
++    protected void emitRestoreRegisters(SW64SaveRegistersOp save) {
++        append(new SW64RestoreRegistersOp(save.getSlots().clone(), save));
++    }
++
++    @Override
++    public VirtualStackSlot getLockSlot(int lockDepth) {
++        return getLockStack().makeLockSlot(lockDepth);
++    }
++
++    private HotSpotLockStack getLockStack() {
++        assert debugInfoBuilder != null && debugInfoBuilder.lockStack() != null;
++        return debugInfoBuilder.lockStack();
++    }
++
++    @Override
++    public void emitCompareBranch(PlatformKind cmpKind, Value x, Value y, Condition cond, boolean unorderedIsTrue, LabelRef trueDestination, LabelRef falseDestination,
++                    double trueDestinationProbability) {
++        Value localX = x;
++        Value localY = y;
++        if (localX instanceof HotSpotObjectConstant) {
++            localX = load(localX);
++        }
++        if (localY instanceof HotSpotObjectConstant) {
++            localY = load(localY);
++        }
++        super.emitCompareBranch(cmpKind, localX, localY, cond, unorderedIsTrue, trueDestination, falseDestination, trueDestinationProbability);
++    }
++
++    @Override
++    protected boolean emitCompare(PlatformKind cmpKind, Value a, Value b, Condition condition, boolean unorderedIsTrue) {
++        Value localA = a;
++        Value localB = b;
++        if (isConstantValue(a)) {
++            Constant c = asConstant(a);
++            if (HotSpotCompressedNullConstant.COMPRESSED_NULL.equals(c)) {
++                localA = SW64.zr.asValue(LIRKind.value(SW64Kind.DWORD));
++            } else if (c instanceof HotSpotObjectConstant) {
++                localA = load(localA);
++            }
++        }
++        if (isConstantValue(b)) {
++            Constant c = asConstant(b);
++            if (HotSpotCompressedNullConstant.COMPRESSED_NULL.equals(c)) {
++                localB = SW64.zr.asValue(LIRKind.value(SW64Kind.DWORD));
++            } else if (c instanceof HotSpotObjectConstant) {
++                localB = load(localB);
++            }
++        }
++        return super.emitCompare(cmpKind, localA, localB, condition, unorderedIsTrue);
++    }
++
++    @Override
++    public Value emitCompress(Value pointer, CompressEncoding encoding, boolean nonNull) {
++        LIRKind inputKind = pointer.getValueKind(LIRKind.class);
++        LIRKindTool lirKindTool = getLIRKindTool();
++        assert inputKind.getPlatformKind() == SW64Kind.QWORD;
++        if (inputKind.isReference(0)) {
++            // oop
++            Variable result = newVariable(LIRKind.compressedReference(SW64Kind.DWORD));
++            append(new SW64HotSpotMove.CompressPointer(result, asAllocatable(pointer), getProviders().getRegisters().getHeapBaseRegister().asValue(), encoding, nonNull));
++            return result;
++        } else {
++            // metaspace pointer
++            Variable result = newVariable(LIRKind.value(SW64Kind.DWORD));
++            AllocatableValue base = Value.ILLEGAL;
++            OptionValues options = getResult().getLIR().getOptions();
++            if (encoding.hasBase() || GeneratePIC.getValue(options)) {
++                if (GeneratePIC.getValue(options)) {
++                    Variable baseAddress = newVariable(lirKindTool.getWordKind());
++                    SW64HotSpotMove.BaseMove move = new SW64HotSpotMove.BaseMove(baseAddress, config);
++                    append(move);
++                    base = baseAddress;
++                } else {
++                    base = emitLoadConstant(LIRKind.value(SW64Kind.QWORD), JavaConstant.forLong(encoding.getBase()));
++                }
++            }
++            append(new SW64HotSpotMove.CompressPointer(result, asAllocatable(pointer), base, encoding, nonNull));
++            return result;
++        }
++    }
++
++    @Override
++    public Value emitUncompress(Value pointer, CompressEncoding encoding, boolean nonNull) {
++        LIRKind inputKind = pointer.getValueKind(LIRKind.class);
++        assert inputKind.getPlatformKind() == SW64Kind.DWORD;
++        if (inputKind.isReference(0)) {
++            // oop
++            Variable result = newVariable(LIRKind.reference(SW64Kind.QWORD));
++            append(new SW64HotSpotMove.UncompressPointer(result, asAllocatable(pointer), getProviders().getRegisters().getHeapBaseRegister().asValue(), encoding, nonNull));
++            return result;
++        } else {
++            // metaspace pointer
++            Variable result = newVariable(LIRKind.value(SW64Kind.QWORD));
++            AllocatableValue base = Value.ILLEGAL;
++            OptionValues options = getResult().getLIR().getOptions();
++            if (encoding.hasBase() || GeneratePIC.getValue(options)) {
++                if (GeneratePIC.getValue(options)) {
++                    Variable baseAddress = newVariable(LIRKind.value(SW64Kind.QWORD));
++                    SW64HotSpotMove.BaseMove move = new SW64HotSpotMove.BaseMove(baseAddress, config);
++                    append(move);
++                    base = baseAddress;
++                } else {
++                    base = emitLoadConstant(LIRKind.value(SW64Kind.QWORD), JavaConstant.forLong(encoding.getBase()));
++                }
++            }
++            append(new SW64HotSpotMove.UncompressPointer(result, asAllocatable(pointer), base, encoding, nonNull));
++            return result;
++        }
++    }
++
++    @Override
++    public void emitNullCheck(Value address, LIRFrameState state) {
++        if (address.getValueKind().getPlatformKind() == SW64Kind.DWORD) {
++            CompressEncoding encoding = config.getOopEncoding();
++            Value uncompressed = emitUncompress(address, encoding, false);
++            append(new SW64Move.NullCheckOp(asAddressValue(uncompressed), state));
++        } else {
++            super.emitNullCheck(address, state);
++        }
++    }
++
++    @Override
++    public void emitPrefetchAllocate(Value address) {
++        append(new SW64PrefetchOp(asAddressValue(address), PrefetchMode.PSTL1KEEP));
++    }
++
++    @Override
++    public void beforeRegisterAllocation() {
++        super.beforeRegisterAllocation();
++        boolean hasDebugInfo = getResult().getLIR().hasDebugInfo();
++        if (hasDebugInfo) {
++            getResult().setDeoptimizationRescueSlot(((SW64FrameMapBuilder) getResult().getFrameMapBuilder()).allocateDeoptimizationRescueSlot());
++        }
++
++        getResult().setMaxInterpreterFrameSize(debugInfoBuilder.maxInterpreterFrameSize());
++    }
++
++    private Label label;
++
++    @Override
++    public Variable emitForeignCall(ForeignCallLinkage linkage, LIRFrameState state, Value... args) {
++        HotSpotForeignCallLinkage hotspotLinkage = (HotSpotForeignCallLinkage) linkage;
++        boolean destroysRegisters = hotspotLinkage.destroysRegisters();
++
++        SW64SaveRegistersOp save = null;
++        Stub stub = getStub();
++        if (destroysRegisters) {
++            if (stub != null && stub.preservesRegisters()) {
++                Register[] savedRegisters = getRegisterConfig().getAllocatableRegisters().toArray();
++                save = emitSaveAllRegisters(savedRegisters, true);
++            }
++        }
++
++        Variable result;
++        LIRFrameState debugInfo = null;
++        if (hotspotLinkage.needsDebugInfo()) {
++            debugInfo = state;
++            assert debugInfo != null || getStub() != null;
++        }
++
++        if (destroysRegisters || hotspotLinkage.needsJavaFrameAnchor()) {
++            HotSpotRegistersProvider registers = getProviders().getRegisters();
++            Register thread = registers.getThreadRegister();
++            Variable scratch = newVariable(LIRKind.value(target().arch.getWordKind()));
++
++            // We need a label for the return address.
++            label = new Label();
++
++            append(new SW64HotSpotCRuntimeCallPrologueOp(config.threadLastJavaSpOffset(), config.threadLastJavaPcOffset(), thread, scratch, label));
++            result = super.emitForeignCall(hotspotLinkage, debugInfo, args);
++            append(new SW64HotSpotCRuntimeCallEpilogueOp(config.threadLastJavaSpOffset(), config.threadLastJavaPcOffset(), thread, label));
++
++            // Clear it out so it's not being reused later.
++            label = null;
++        } else {
++            result = super.emitForeignCall(hotspotLinkage, debugInfo, args);
++        }
++
++        if (destroysRegisters) {
++            if (stub != null) {
++                if (stub.preservesRegisters()) {
++                    HotSpotLIRGenerationResult generationResult = getResult();
++                    LIRFrameState key = currentRuntimeCallInfo;
++                    if (key == null) {
++                        key = LIRFrameState.NO_STATE;
++                    }
++                    assert !generationResult.getCalleeSaveInfo().containsKey(key);
++                    generationResult.getCalleeSaveInfo().put(key, save);
++                    emitRestoreRegisters(save);
++                }
++            }
++        }
++
++        return result;
++    }
++
++    @Override
++    public void emitDeoptimizeCaller(DeoptimizationAction action, DeoptimizationReason reason) {
++        Value actionAndReason = emitJavaConstant(getMetaAccess().encodeDeoptActionAndReason(action, reason, 0));
++        Value speculation = emitJavaConstant(getMetaAccess().encodeSpeculation(SpeculationLog.NO_SPECULATION));
++        moveDeoptValuesToThread(actionAndReason, speculation);
++        append(new SW64HotSpotDeoptimizeCallerOp(config));
++    }
++
++    @Override
++    public void emitDeoptimize(Value actionAndReason, Value failedSpeculation, LIRFrameState state) {
++        moveDeoptValuesToThread(actionAndReason, failedSpeculation);
++        append(new SW64HotSpotDeoptimizeOp(state));
++    }
++
++    private void moveDeoptValuesToThread(Value actionAndReason, Value speculation) {
++        moveValueToThread(actionAndReason, config.pendingDeoptimizationOffset);
++        moveValueToThread(speculation, config.pendingFailedSpeculationOffset);
++    }
++
++    private void moveValueToThread(Value value, int offset) {
++        LIRKind wordKind = LIRKind.value(target().arch.getWordKind());
++        RegisterValue thread = getProviders().getRegisters().getThreadRegister().asValue(wordKind);
++        final int transferSize = value.getValueKind().getPlatformKind().getSizeInBytes();
++        SW64AddressValue address = new SW64AddressValue(value.getValueKind(), thread, Value.ILLEGAL, offset, transferSize, AddressingMode.IMMEDIATE_SCALED);
++        append(new StoreOp((SW64Kind) value.getPlatformKind(), address, loadReg(value), null));
++    }
++
++    @Override
++    public void emitUnwind(Value exception) {
++        ForeignCallLinkage linkage = getForeignCalls().lookupForeignCall(HotSpotBackend.UNWIND_EXCEPTION_TO_CALLER);
++        CallingConvention outgoingCc = linkage.getOutgoingCallingConvention();
++        assert outgoingCc.getArgumentCount() == 2;
++        RegisterValue exceptionParameter = (RegisterValue) outgoingCc.getArgument(0);
++        emitMove(exceptionParameter, exception);
++        append(new SW64HotSpotUnwindOp(config, exceptionParameter));
++    }
++
++    @Override
++    public Value emitLoadObjectAddress(Constant constant) {
++        HotSpotObjectConstant objectConstant = (HotSpotObjectConstant) constant;
++        LIRKind kind = objectConstant.isCompressed() ? getLIRKindTool().getNarrowOopKind() : getLIRKindTool().getObjectKind();
++        Variable result = newVariable(kind);
++        append(new SW64HotSpotLoadAddressOp(result, constant, HotSpotConstantLoadAction.RESOLVE));
++        return result;
++    }
++
++    @Override
++    public Value emitLoadMetaspaceAddress(Constant constant, HotSpotConstantLoadAction action) {
++        HotSpotMetaspaceConstant metaspaceConstant = (HotSpotMetaspaceConstant) constant;
++        LIRKind kind = metaspaceConstant.isCompressed() ? getLIRKindTool().getNarrowPointerKind() : getLIRKindTool().getWordKind();
++        Variable result = newVariable(kind);
++        append(new SW64HotSpotLoadAddressOp(result, constant, action));
++        return result;
++    }
++
++    private Value emitConstantRetrieval(ForeignCallDescriptor foreignCall, Object[] notes, Constant[] constants, AllocatableValue[] constantDescriptions, LIRFrameState frameState) {
++        ForeignCallLinkage linkage = getForeignCalls().lookupForeignCall(foreignCall);
++        append(new SW64HotSpotConstantRetrievalOp(constants, constantDescriptions, frameState, linkage, notes));
++        AllocatableValue result = linkage.getOutgoingCallingConvention().getReturn();
++        return emitMove(result);
++    }
++
++    private Value emitConstantRetrieval(ForeignCallDescriptor foreignCall, HotSpotConstantLoadAction action, Constant constant, AllocatableValue[] constantDescriptions, LIRFrameState frameState) {
++        Constant[] constants = new Constant[]{constant};
++        Object[] notes = new Object[]{action};
++        return emitConstantRetrieval(foreignCall, notes, constants, constantDescriptions, frameState);
++    }
++
++    @Override
++    public Value emitResolveDynamicInvoke(Constant appendix, LIRFrameState frameState) {
++        AllocatableValue[] constantDescriptions = new AllocatableValue[0];
++        return emitConstantRetrieval(RESOLVE_DYNAMIC_INVOKE, INITIALIZE, appendix, constantDescriptions, frameState);
++    }
++
++    @Override
++    public Value emitLoadConfigValue(int markId, LIRKind kind) {
++        Variable result = newVariable(kind);
++        append(new SW64HotSpotLoadConfigValueOp(markId, result));
++        return result;
++    }
++
++    private Value emitConstantRetrieval(ForeignCallDescriptor foreignCall, HotSpotConstantLoadAction action, Constant constant, Value constantDescription, LIRFrameState frameState) {
++        AllocatableValue[] constantDescriptions = new AllocatableValue[]{asAllocatable(constantDescription)};
++        return emitConstantRetrieval(foreignCall, action, constant, constantDescriptions, frameState);
++    }
++
++    @Override
++    public Value emitObjectConstantRetrieval(Constant constant, Value constantDescription, LIRFrameState frameState) {
++        return emitConstantRetrieval(RESOLVE_STRING_BY_SYMBOL, RESOLVE, constant, constantDescription, frameState);
++    }
++
++    @Override
++    public Value emitMetaspaceConstantRetrieval(Constant constant, Value constantDescription, LIRFrameState frameState) {
++        return emitConstantRetrieval(RESOLVE_KLASS_BY_SYMBOL, RESOLVE, constant, constantDescription, frameState);
++    }
++
++    @Override
++    public void emitReturn(JavaKind kind, Value input) {
++        AllocatableValue operand = Value.ILLEGAL;
++        if (input != null) {
++            operand = resultOperandFor(kind, input.getValueKind());
++            emitMove(operand, input);
++        }
++        Register thread = getProviders().getRegisters().getThreadRegister();
++        append(new SW64HotSpotReturnOp(operand, getStub() != null, config, thread, getResult().requiresReservedStackAccessCheck()));
++    }
++
++    @Override
++    public Value emitKlassInitializationAndRetrieval(Constant constant, Value constantDescription, LIRFrameState frameState) {
++        return emitConstantRetrieval(INITIALIZE_KLASS_BY_SYMBOL, INITIALIZE, constant, constantDescription, frameState);
++    }
++
++    @Override
++    public Value emitResolveMethodAndLoadCounters(Constant method, Value klassHint, Value methodDescription, LIRFrameState frameState) {
++        AllocatableValue[] constantDescriptions = new AllocatableValue[]{asAllocatable(klassHint), asAllocatable(methodDescription)};
++        return emitConstantRetrieval(RESOLVE_METHOD_BY_SYMBOL_AND_LOAD_COUNTERS, LOAD_COUNTERS, method, constantDescriptions, frameState);
++    }
++
++    /**
++     * Gets the {@link Stub} this generator is generating code for or {@code null} if a stub is not
++     * being generated.
++     */
++    public Stub getStub() {
++        return getResult().getStub();
++    }
++
++    @Override
++    public HotSpotLIRGenerationResult getResult() {
++        return ((HotSpotLIRGenerationResult) super.getResult());
++    }
++
++    @Override
++    protected StrategySwitchOp createStrategySwitchOp(SwitchStrategy strategy, LabelRef[] keyTargets, LabelRef defaultTarget, Variable key, AllocatableValue scratchValue,
++                    Function<Condition, ConditionFlag> converter) {
++        return new SW64HotSpotStrategySwitchOp(strategy, keyTargets, defaultTarget, key, scratchValue, converter);
++    }
++
++    public void setDebugInfoBuilder(HotSpotDebugInfoBuilder debugInfoBuilder) {
++        this.debugInfoBuilder = debugInfoBuilder;
++    }
++
++    @Override
++    public SaveRegistersOp createZapRegisters(Register[] zappedRegisters, JavaConstant[] zapValues) {
++        throw GraalError.unimplemented();
++    }
++
++    @Override
++    public LIRInstruction createZapArgumentSpace(StackSlot[] zappedStack, JavaConstant[] zapValues) {
++        throw GraalError.unimplemented();
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoadAddressOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoadAddressOp.java
+new file mode 100644
+index 0000000000..4540e6330c
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoadAddressOp.java
+@@ -0,0 +1,79 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Constant;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++public final class SW64HotSpotLoadAddressOp extends SW64LIRInstruction {
++
++    public static final LIRInstructionClass<SW64HotSpotLoadAddressOp> TYPE = LIRInstructionClass.create(SW64HotSpotLoadAddressOp.class);
++
++    @Def({OperandFlag.REG}) protected AllocatableValue result;
++    private final Constant constant;
++    private final Object note;
++
++    public SW64HotSpotLoadAddressOp(AllocatableValue result, Constant constant, Object note) {
++        super(TYPE);
++        this.result = result;
++        this.constant = constant;
++        this.note = note;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        crb.recordInlineDataInCodeWithNote(constant, note);
++        SW64Kind kind = (SW64Kind) result.getPlatformKind();
++        int size = 0;
++        switch (kind) {
++            case DWORD:
++                size = 32;
++                break;
++            case QWORD:
++                size = 64;
++                break;
++            default:
++                throw GraalError.shouldNotReachHere("unexpected kind: " + kind);
++        }
++        if (crb.compilationResult.isImmutablePIC()) {
++            Register dst = asRegister(result);
++            masm.addressOf(dst);
++            masm.ldr(size, dst, SW64Address.createBaseRegisterOnlyAddress(dst));
++        } else {
++            masm.ldr(size, asRegister(result), masm.getPlaceholder(-1));
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoadConfigValueOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoadConfigValueOp.java
+new file mode 100644
+index 0000000000..ff6e230bc0
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoadConfigValueOp.java
+@@ -0,0 +1,85 @@
++/*
++ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static org.graalvm.compiler.core.common.GraalOptions.GeneratePIC;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++public final class SW64HotSpotLoadConfigValueOp extends SW64LIRInstruction {
++
++    public static final LIRInstructionClass<SW64HotSpotLoadConfigValueOp> TYPE = LIRInstructionClass.create(SW64HotSpotLoadConfigValueOp.class);
++
++    @Def({OperandFlag.REG}) protected AllocatableValue result;
++    private final int markId;
++
++    public SW64HotSpotLoadConfigValueOp(int markId, AllocatableValue result) {
++        super(TYPE);
++        this.result = result;
++        this.markId = markId;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        if (GeneratePIC.getValue(crb.getOptions())) {
++            SW64Kind kind = (SW64Kind) result.getPlatformKind();
++            Register reg = asRegister(result);
++            masm.adrp(reg);
++            masm.add(64, reg, reg, 1);
++            switch (kind) {
++                case BYTE:
++                    masm.ldrs(8, 32, reg, SW64Address.createBaseRegisterOnlyAddress(reg));
++                    break;
++                case WORD:
++                    masm.ldrs(16, 32, reg, SW64Address.createBaseRegisterOnlyAddress(reg));
++                    break;
++                case DWORD:
++                    masm.ldr(32, reg, SW64Address.createBaseRegisterOnlyAddress(reg));
++                    break;
++                case QWORD:
++                    masm.ldr(64, reg, SW64Address.createBaseRegisterOnlyAddress(reg));
++                    break;
++                default:
++                    throw GraalError.unimplemented();
++            }
++            masm.nop();
++        } else {
++            throw GraalError.unimplemented();
++        }
++        crb.recordMark(markId);
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoweringProvider.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoweringProvider.java
+new file mode 100644
+index 0000000000..c3c0000899
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotLoweringProvider.java
+@@ -0,0 +1,78 @@
++/*
++ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import org.graalvm.compiler.core.common.spi.ForeignCallsProvider;
++import org.graalvm.compiler.debug.DebugHandlersFactory;
++import org.graalvm.compiler.graph.Node;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.hotspot.HotSpotGraalRuntimeProvider;
++import org.graalvm.compiler.hotspot.meta.DefaultHotSpotLoweringProvider;
++import org.graalvm.compiler.hotspot.meta.HotSpotProviders;
++import org.graalvm.compiler.hotspot.meta.HotSpotRegistersProvider;
++import org.graalvm.compiler.nodes.calc.FloatConvertNode;
++import org.graalvm.compiler.nodes.calc.IntegerDivRemNode;
++import org.graalvm.compiler.nodes.calc.RemNode;
++import org.graalvm.compiler.nodes.spi.LoweringTool;
++import org.graalvm.compiler.options.OptionValues;
++import org.graalvm.compiler.replacements.sw64.SW64FloatArithmeticSnippets;
++import org.graalvm.compiler.replacements.sw64.SW64IntegerArithmeticSnippets;
++
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.hotspot.HotSpotConstantReflectionProvider;
++import jdk.vm.ci.meta.MetaAccessProvider;
++
++public class SW64HotSpotLoweringProvider extends DefaultHotSpotLoweringProvider {
++
++    private SW64IntegerArithmeticSnippets integerArithmeticSnippets;
++    private SW64FloatArithmeticSnippets floatArithmeticSnippets;
++
++    public SW64HotSpotLoweringProvider(HotSpotGraalRuntimeProvider runtime, MetaAccessProvider metaAccess, ForeignCallsProvider foreignCalls, HotSpotRegistersProvider registers,
++                    HotSpotConstantReflectionProvider constantReflection, TargetDescription target) {
++        super(runtime, metaAccess, foreignCalls, registers, constantReflection, target);
++    }
++
++    @Override
++    public void initialize(OptionValues options, Iterable<DebugHandlersFactory> factories, HotSpotProviders providers, GraalHotSpotVMConfig config) {
++        integerArithmeticSnippets = new SW64IntegerArithmeticSnippets(options, factories, providers, providers.getSnippetReflection(), providers.getCodeCache().getTarget());
++        floatArithmeticSnippets = new SW64FloatArithmeticSnippets(options, factories, providers, providers.getSnippetReflection(), providers.getCodeCache().getTarget());
++        super.initialize(options, factories, providers, config);
++    }
++
++    @Override
++    public void lower(Node n, LoweringTool tool) {
++        if (n instanceof IntegerDivRemNode) {
++            integerArithmeticSnippets.lower((IntegerDivRemNode) n, tool);
++        } else if (n instanceof RemNode) {
++            floatArithmeticSnippets.lower((RemNode) n, tool);
++        } else if (n instanceof FloatConvertNode) {
++            // AMD64 has custom lowerings for ConvertNodes, HotSpotLoweringProvider does not expect
++            // to see a ConvertNode and throws an error, just do nothing here.
++        } else {
++            super.lower(n, tool);
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotMove.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotMove.java
+new file mode 100644
+index 0000000000..8970870951
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotMove.java
+@@ -0,0 +1,244 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static jdk.vm.ci.code.ValueUtil.isRegister;
++import static org.graalvm.compiler.core.common.GraalOptions.GeneratePIC;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.HINT;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.CompressEncoding;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRInstruction;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.StandardOp.LoadConstantOp;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.hotspot.HotSpotConstant;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Constant;
++
++public class SW64HotSpotMove {
++
++    public static class LoadHotSpotObjectConstantInline extends SW64LIRInstruction implements LoadConstantOp {
++        public static final LIRInstructionClass<LoadHotSpotObjectConstantInline> TYPE = LIRInstructionClass.create(LoadHotSpotObjectConstantInline.class);
++
++        private HotSpotConstant constant;
++        @Def({REG, STACK}) AllocatableValue result;
++
++        public LoadHotSpotObjectConstantInline(HotSpotConstant constant, AllocatableValue result) {
++            super(TYPE);
++            this.constant = constant;
++            this.result = result;
++        }
++
++        @Override
++        protected void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            crb.recordInlineDataInCode(constant);
++            if (constant.isCompressed()) {
++                // masm.forceMov(asRegister(result), 0);
++                masm.movNarrowAddress(asRegister(result), 0);
++            } else {
++                masm.movNativeAddress(asRegister(result), 0);
++            }
++        }
++
++        @Override
++        public AllocatableValue getResult() {
++            return result;
++        }
++
++        @Override
++        public Constant getConstant() {
++            return constant;
++        }
++    }
++
++    public static final class BaseMove extends SW64LIRInstruction {
++        public static final LIRInstructionClass<BaseMove> TYPE = LIRInstructionClass.create(BaseMove.class);
++
++        @LIRInstruction.Def({REG, HINT}) protected AllocatableValue result;
++        private final GraalHotSpotVMConfig config;
++
++        public BaseMove(AllocatableValue result, GraalHotSpotVMConfig config) {
++            super(TYPE);
++            this.result = result;
++            this.config = config;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            try (SW64MacroAssembler.ScratchRegister sc = masm.getScratchRegister()) {
++                Register scratch = sc.getRegister();
++                masm.adrp(scratch);
++                masm.add(64, scratch, scratch, 1);
++                masm.ldr(64, asRegister(result), SW64Address.createBaseRegisterOnlyAddress(scratch));
++                masm.nop();
++                crb.recordMark(config.MARKID_NARROW_KLASS_BASE_ADDRESS);
++            }
++        }
++
++    }
++
++    /**
++     * Compresses a 8-byte pointer as a 4-byte int.
++     */
++    public static class CompressPointer extends SW64LIRInstruction {
++        public static final LIRInstructionClass<CompressPointer> TYPE = LIRInstructionClass.create(CompressPointer.class);
++
++        private final CompressEncoding encoding;
++        private final boolean nonNull;
++
++        @Def({REG, HINT}) protected AllocatableValue result;
++        @Use({REG}) protected AllocatableValue input;
++        @Alive({REG, ILLEGAL}) protected AllocatableValue baseRegister;
++
++        public CompressPointer(AllocatableValue result, AllocatableValue input, AllocatableValue baseRegister, CompressEncoding encoding, boolean nonNull) {
++            super(TYPE);
++            this.result = result;
++            this.input = input;
++            this.baseRegister = baseRegister;
++            this.encoding = encoding;
++            this.nonNull = nonNull;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register resultRegister = asRegister(result);
++            Register ptr = asRegister(input);
++            Register base = (isRegister(baseRegister) ? asRegister(baseRegister) : zr);
++            // result = (ptr - base) >> shift
++            if (!encoding.hasBase()) {
++                if (encoding.hasShift()) {
++                    masm.lshr(64, resultRegister, ptr, encoding.getShift());
++                } else {
++                    masm.movx(resultRegister, ptr);
++                }
++            } else if (nonNull) {
++                masm.sub(64, resultRegister, ptr, base);
++                if (encoding.hasShift()) {
++                    masm.lshr(64, resultRegister, resultRegister, encoding.getShift());
++                }
++            } else {
++                // if ptr is null it still has to be null after compression
++                masm.cmp(64, ptr, 0);
++                masm.cmov(64, resultRegister, ptr, base, SW64Assembler.ConditionFlag.NE);
++                masm.sub(64, resultRegister, resultRegister, base);
++                if (encoding.hasShift()) {
++                    masm.lshr(64, resultRegister, resultRegister, encoding.getShift());
++                }
++            }
++        }
++    }
++
++    /**
++     * Decompresses a 4-byte offset into an actual pointer.
++     */
++    public static class UncompressPointer extends SW64LIRInstruction {
++        public static final LIRInstructionClass<UncompressPointer> TYPE = LIRInstructionClass.create(UncompressPointer.class);
++
++        private final CompressEncoding encoding;
++        private final boolean nonNull;
++
++        @Def({REG}) protected AllocatableValue result;
++        @Use({REG}) protected AllocatableValue input;
++        @Alive({REG, ILLEGAL}) protected AllocatableValue baseRegister;
++
++        public UncompressPointer(AllocatableValue result, AllocatableValue input, AllocatableValue baseRegister, CompressEncoding encoding, boolean nonNull) {
++            super(TYPE);
++            this.result = result;
++            this.input = input;
++            this.baseRegister = baseRegister;
++            this.encoding = encoding;
++            this.nonNull = nonNull;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register inputRegister = asRegister(input);
++            Register resultRegister = asRegister(result);
++            Register base = encoding.hasBase() ? asRegister(baseRegister) : null;
++            emitUncompressCode(masm, inputRegister, resultRegister, base, encoding.getShift(), nonNull);
++        }
++
++        public static void emitUncompressCode(SW64MacroAssembler masm, Register inputRegister, Register resReg, Register baseReg, int shift, boolean nonNull) {
++            // result = base + (ptr << shift)
++            if (nonNull || baseReg == null) {
++                masm.add(64, resReg, baseReg == null ? zr : baseReg, inputRegister, SW64Assembler.ShiftType.LSL, shift);
++            } else {
++                // if ptr is null it has to be null after decompression
++                Label done = new Label();
++                if (!resReg.equals(inputRegister)) {
++                    masm.mov(32, resReg, inputRegister);
++                }
++                masm.cbz(32, resReg, done);
++                masm.add(64, resReg, baseReg, resReg, SW64Assembler.ShiftType.LSL, shift);
++                masm.bind(done);
++            }
++        }
++    }
++
++    //
++    // private static void decompressPointer(CompilationResultBuilder crb, ARMv8MacroAssembler masm,
++    // Register result,
++    // Register ptr, long base, int shift, int alignment) {
++    // assert base != 0 || shift == 0 || alignment == shift;
++    // // result = heapBase + ptr << alignment
++    // Register heapBase = ARMv8.heapBaseRegister;
++    // // if result == 0, we make sure that it will still be 0 at the end, so that it traps when
++    // // loading storing a value.
++    // masm.cmp(32, ptr, 0);
++    // masm.add(64, result, heapBase, ptr, ARMv8Assembler.ExtendType.UXTX, alignment);
++    // masm.cmov(64, result, result, ARMv8.zr, ARMv8Assembler.ConditionFlag.NE);
++    // }
++
++    public static void decodeKlassPointer(CompilationResultBuilder crb, SW64MacroAssembler masm, Register result, Register ptr, CompressEncoding encoding, GraalHotSpotVMConfig config) {
++        try (SW64MacroAssembler.ScratchRegister sc = masm.getScratchRegister()) {
++            Register scratch = sc.getRegister();
++            boolean pic = GeneratePIC.getValue(crb.getOptions());
++            if (pic || encoding.hasBase() || encoding.getShift() != 0) {
++                if (pic) {
++                    masm.addressOf(scratch);
++                    masm.ldr(64, scratch, SW64Address.createBaseRegisterOnlyAddress(scratch));
++                    masm.add(64, result, scratch, ptr, SW64Assembler.ExtendType.UXTX, encoding.getShift());
++                    crb.recordMark(config.MARKID_NARROW_KLASS_BASE_ADDRESS);
++                } else {
++                    masm.mov(scratch, encoding.getBase());
++                    masm.add(64, result, scratch, ptr, SW64Assembler.ExtendType.UXTX, encoding.getShift());
++                }
++            }
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotMoveFactory.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotMoveFactory.java
+new file mode 100644
+index 0000000000..c4347ae48f
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotMoveFactory.java
+@@ -0,0 +1,78 @@
++/*
++ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.hotspot.HotSpotCompressedNullConstant.COMPRESSED_NULL;
++import static jdk.vm.ci.meta.JavaConstant.INT_0;
++import static jdk.vm.ci.meta.JavaConstant.LONG_0;
++
++import org.graalvm.compiler.core.sw64.SW64MoveFactory;
++import org.graalvm.compiler.lir.LIRInstruction;
++
++import jdk.vm.ci.hotspot.HotSpotCompressedNullConstant;
++import jdk.vm.ci.hotspot.HotSpotConstant;
++import jdk.vm.ci.hotspot.HotSpotMetaspaceConstant;
++import jdk.vm.ci.hotspot.HotSpotObjectConstant;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Constant;
++
++public class SW64HotSpotMoveFactory extends SW64MoveFactory {
++
++    @Override
++    public boolean canInlineConstant(Constant c) {
++        if (HotSpotCompressedNullConstant.COMPRESSED_NULL.equals(c)) {
++            return true;
++        } else if (c instanceof HotSpotObjectConstant || c instanceof HotSpotMetaspaceConstant) {
++            return false;
++        } else {
++            return super.canInlineConstant(c);
++        }
++    }
++
++    @Override
++    public LIRInstruction createLoad(AllocatableValue dst, Constant src) {
++        Constant usedSource;
++        if (COMPRESSED_NULL.equals(src)) {
++            usedSource = INT_0;
++        } else if (src instanceof HotSpotObjectConstant && ((HotSpotObjectConstant) src).isNull()) {
++            usedSource = LONG_0;
++        } else {
++            usedSource = src;
++        }
++        if (usedSource instanceof HotSpotConstant) {
++            HotSpotConstant constant = (HotSpotConstant) usedSource;
++            if (constant.isCompressed()) {
++                return new SW64HotSpotMove.LoadHotSpotObjectConstantInline(constant, dst);
++            } else {
++                // XXX Do we need the constant table?
++                // return new SPARCHotSpotMove.LoadHotSpotObjectConstantFromTable(constant, dst,
++                // constantTableBaseProvider.getConstantTableBase());
++                return new SW64HotSpotMove.LoadHotSpotObjectConstantInline(constant, dst);
++            }
++        } else {
++            return super.createLoad(dst, usedSource);
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotNodeLIRBuilder.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotNodeLIRBuilder.java
+new file mode 100644
+index 0000000000..e6e5fb317f
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotNodeLIRBuilder.java
+@@ -0,0 +1,193 @@
++/*
++ * Copyright (c) 2015, 2017, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.lr;
++import static jdk.vm.ci.code.ValueUtil.isStackSlot;
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.fp;
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.inlineCacheRegister;
++import static jdk.vm.ci.hotspot.sw64.SW64HotSpotRegisterConfig.metaspaceMethodRegister;
++import static org.graalvm.compiler.hotspot.HotSpotBackend.EXCEPTION_HANDLER_IN_CALLER;
++
++import org.graalvm.compiler.core.sw64.SW64NodeLIRBuilder;
++import org.graalvm.compiler.core.sw64.SW64NodeMatchRules;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.spi.ForeignCallLinkage;
++import org.graalvm.compiler.core.gen.DebugInfoBuilder;
++import org.graalvm.compiler.hotspot.HotSpotDebugInfoBuilder;
++import org.graalvm.compiler.hotspot.HotSpotLIRGenerator;
++import org.graalvm.compiler.hotspot.HotSpotLockStack;
++import org.graalvm.compiler.hotspot.HotSpotNodeLIRBuilder;
++import org.graalvm.compiler.hotspot.nodes.HotSpotDirectCallTargetNode;
++import org.graalvm.compiler.hotspot.nodes.HotSpotIndirectCallTargetNode;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.Variable;
++import org.graalvm.compiler.lir.sw64.SW64BreakpointOp;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
++import org.graalvm.compiler.nodes.BreakpointNode;
++import org.graalvm.compiler.nodes.CallTargetNode.InvokeKind;
++import org.graalvm.compiler.nodes.DirectCallTargetNode;
++import org.graalvm.compiler.nodes.FullInfopointNode;
++import org.graalvm.compiler.nodes.IndirectCallTargetNode;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.ParameterNode;
++import org.graalvm.compiler.nodes.SafepointNode;
++import org.graalvm.compiler.nodes.StructuredGraph;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.spi.NodeValueMap;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.BytecodeFrame;
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.code.ValueUtil;
++import jdk.vm.ci.hotspot.HotSpotCallingConventionType;
++import jdk.vm.ci.hotspot.HotSpotResolvedJavaMethod;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaType;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * LIR generator specialized for SW64 HotSpot.
++ */
++public class SW64HotSpotNodeLIRBuilder extends SW64NodeLIRBuilder implements HotSpotNodeLIRBuilder {
++
++    public SW64HotSpotNodeLIRBuilder(StructuredGraph graph, LIRGeneratorTool gen, SW64NodeMatchRules nodeMatchRules) {
++        super(graph, gen, nodeMatchRules);
++        assert gen instanceof SW64HotSpotLIRGenerator;
++        assert getDebugInfoBuilder() instanceof HotSpotDebugInfoBuilder;
++        ((SW64HotSpotLIRGenerator) gen).setDebugInfoBuilder(((HotSpotDebugInfoBuilder) getDebugInfoBuilder()));
++    }
++
++    @Override
++    protected DebugInfoBuilder createDebugInfoBuilder(StructuredGraph graph, NodeValueMap nodeValueMap) {
++        HotSpotLockStack lockStack = new HotSpotLockStack(gen.getResult().getFrameMapBuilder(), LIRKind.value(SW64Kind.QWORD));
++        return new HotSpotDebugInfoBuilder(nodeValueMap, lockStack, (HotSpotLIRGenerator) gen);
++    }
++
++    private SW64HotSpotLIRGenerator getGen() {
++        return (SW64HotSpotLIRGenerator) gen;
++    }
++
++    @Override
++    protected void emitPrologue(StructuredGraph graph) {
++        CallingConvention incomingArguments = gen.getResult().getCallingConvention();
++        Value[] params = new Value[incomingArguments.getArgumentCount() + 2];
++        for (int i = 0; i < incomingArguments.getArgumentCount(); i++) {
++            params[i] = incomingArguments.getArgument(i);
++            if (isStackSlot(params[i])) {
++                StackSlot slot = ValueUtil.asStackSlot(params[i]);
++                if (slot.isInCallerFrame() && !gen.getResult().getLIR().hasArgInCallerFrame()) {
++                    gen.getResult().getLIR().setHasArgInCallerFrame();
++                }
++            }
++        }
++        params[params.length - 2] = fp.asValue(LIRKind.value(SW64Kind.QWORD));
++        params[params.length - 1] = lr.asValue(LIRKind.value(SW64Kind.QWORD));
++
++        gen.emitIncomingValues(params);
++
++        for (ParameterNode param : graph.getNodes(ParameterNode.TYPE)) {
++            Value paramValue = params[param.index()];
++            assert paramValue.getValueKind().equals(getLIRGeneratorTool().getLIRKind(param.stamp(NodeView.DEFAULT))) : paramValue.getValueKind() + " != " + param.stamp(NodeView.DEFAULT);
++            setResult(param, gen.emitMove(paramValue));
++        }
++    }
++
++    @Override
++    public void visitSafepointNode(SafepointNode i) {
++        LIRFrameState info = state(i);
++        Register thread = getGen().getProviders().getRegisters().getThreadRegister();
++        Variable scratch = gen.newVariable(LIRKind.value(getGen().target().arch.getWordKind()));
++        append(new SW64HotSpotSafepointOp(info, getGen().config, thread, scratch));
++    }
++
++    @Override
++    protected void emitDirectCall(DirectCallTargetNode callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState callState) {
++        InvokeKind invokeKind = ((HotSpotDirectCallTargetNode) callTarget).invokeKind();
++        if (invokeKind.isIndirect()) {
++            append(new SW64HotSpotDirectVirtualCallOp(callTarget.targetMethod(), result, parameters, temps, callState, invokeKind, getGen().config));
++        } else {
++            assert invokeKind.isDirect();
++            HotSpotResolvedJavaMethod resolvedMethod = (HotSpotResolvedJavaMethod) callTarget.targetMethod();
++            assert resolvedMethod.isConcrete() : "Cannot make direct call to abstract method.";
++            append(new SW64HotSpotDirectStaticCallOp(callTarget.targetMethod(), result, parameters, temps, callState, invokeKind, getGen().config));
++        }
++    }
++
++    @Override
++    protected void emitIndirectCall(IndirectCallTargetNode callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState callState) {
++        Value metaspaceMethodSrc = operand(((HotSpotIndirectCallTargetNode) callTarget).metaspaceMethod());
++        Value targetAddressSrc = operand(callTarget.computedAddress());
++        AllocatableValue metaspaceMethodDst = metaspaceMethodRegister.asValue(metaspaceMethodSrc.getValueKind());
++        AllocatableValue targetAddressDst = inlineCacheRegister.asValue(targetAddressSrc.getValueKind());
++        gen.emitMove(metaspaceMethodDst, metaspaceMethodSrc);
++        gen.emitMove(targetAddressDst, targetAddressSrc);
++        append(new SW64IndirectCallOp(callTarget.targetMethod(), result, parameters, temps, metaspaceMethodDst, targetAddressDst, callState, getGen().config));
++    }
++
++    @Override
++    public void emitPatchReturnAddress(ValueNode address) {
++        append(new SW64HotSpotPatchReturnAddressOp(gen.load(operand(address))));
++    }
++
++    @Override
++    public void emitJumpToExceptionHandlerInCaller(ValueNode handlerInCallerPc, ValueNode exception, ValueNode exceptionPc) {
++        Variable handler = gen.load(operand(handlerInCallerPc));
++        ForeignCallLinkage linkage = gen.getForeignCalls().lookupForeignCall(EXCEPTION_HANDLER_IN_CALLER);
++        CallingConvention outgoingCc = linkage.getOutgoingCallingConvention();
++        assert outgoingCc.getArgumentCount() == 2;
++        RegisterValue exceptionFixed = (RegisterValue) outgoingCc.getArgument(0);
++        RegisterValue exceptionPcFixed = (RegisterValue) outgoingCc.getArgument(1);
++        gen.emitMove(exceptionFixed, operand(exception));
++        gen.emitMove(exceptionPcFixed, operand(exceptionPc));
++        Register thread = getGen().getProviders().getRegisters().getThreadRegister();
++        SW64HotSpotJumpToExceptionHandlerInCallerOp op = new SW64HotSpotJumpToExceptionHandlerInCallerOp(handler, exceptionFixed, exceptionPcFixed,
++                        getGen().config.threadIsMethodHandleReturnOffset, thread, getGen().config);
++        append(op);
++    }
++
++    @Override
++    public void visitFullInfopointNode(FullInfopointNode i) {
++        if (i.getState() != null && i.getState().bci == BytecodeFrame.AFTER_BCI) {
++            i.getDebug().log("Ignoring InfopointNode for AFTER_BCI");
++        } else {
++            super.visitFullInfopointNode(i);
++        }
++    }
++
++    @Override
++    public void visitBreakpointNode(BreakpointNode node) {
++        JavaType[] sig = new JavaType[node.arguments().size()];
++        for (int i = 0; i < sig.length; i++) {
++            sig[i] = node.arguments().get(i).stamp(NodeView.DEFAULT).javaType(gen.getMetaAccess());
++        }
++
++        Value[] parameters = visitInvokeArguments(gen.getRegisterConfig().getCallingConvention(HotSpotCallingConventionType.JavaCall, null, sig, gen), node.arguments());
++        append(new SW64BreakpointOp(parameters));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotPatchReturnAddressOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotPatchReturnAddressOp.java
+new file mode 100644
+index 0000000000..aee4d52e36
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotPatchReturnAddressOp.java
+@@ -0,0 +1,63 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.meta.AllocatableValue;
++
++/**
++ * Patch the return address of the current frame.
++ */
++@Opcode("PATCH_RETURN")
++final class SW64HotSpotPatchReturnAddressOp extends SW64LIRInstruction {
++
++    public static final LIRInstructionClass<SW64HotSpotPatchReturnAddressOp> TYPE = LIRInstructionClass.create(SW64HotSpotPatchReturnAddressOp.class);
++
++    @Use(REG) AllocatableValue address;
++
++    SW64HotSpotPatchReturnAddressOp(AllocatableValue address) {
++        super(TYPE);
++        this.address = address;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        final int frameSize = crb.frameMap.frameSize();
++        // LR is saved in the {fp, lr} pair above the frame
++        SW64Address lrAddress = SW64Address.createUnscaledImmediateAddress(sp,
++                        frameSize + crb.target.wordSize);
++        masm.str(64, asRegister(address), lrAddress);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotRegisterAllocationConfig.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotRegisterAllocationConfig.java
+new file mode 100644
+index 0000000000..262ddee718
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotRegisterAllocationConfig.java
+@@ -0,0 +1,133 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.r0;
++import static jdk.vm.ci.sw64.SW64.r1;
++import static jdk.vm.ci.sw64.SW64.r10;
++import static jdk.vm.ci.sw64.SW64.r11;
++import static jdk.vm.ci.sw64.SW64.r12;
++import static jdk.vm.ci.sw64.SW64.r13;
++import static jdk.vm.ci.sw64.SW64.r14;
++import static jdk.vm.ci.sw64.SW64.r15;
++import static jdk.vm.ci.sw64.SW64.r16;
++import static jdk.vm.ci.sw64.SW64.r17;
++import static jdk.vm.ci.sw64.SW64.r18;
++import static jdk.vm.ci.sw64.SW64.r19;
++import static jdk.vm.ci.sw64.SW64.r2;
++import static jdk.vm.ci.sw64.SW64.r20;
++import static jdk.vm.ci.sw64.SW64.r21;
++import static jdk.vm.ci.sw64.SW64.r22;
++import static jdk.vm.ci.sw64.SW64.r23;
++import static jdk.vm.ci.sw64.SW64.r24;
++import static jdk.vm.ci.sw64.SW64.r25;
++import static jdk.vm.ci.sw64.SW64.r26;
++import static jdk.vm.ci.sw64.SW64.r28;
++import static jdk.vm.ci.sw64.SW64.r3;
++import static jdk.vm.ci.sw64.SW64.r4;
++import static jdk.vm.ci.sw64.SW64.r5;
++import static jdk.vm.ci.sw64.SW64.r6;
++import static jdk.vm.ci.sw64.SW64.r7;
++import static jdk.vm.ci.sw64.SW64.r8;
++import static jdk.vm.ci.sw64.SW64.r9;
++import static jdk.vm.ci.sw64.SW64.v0;
++import static jdk.vm.ci.sw64.SW64.v1;
++import static jdk.vm.ci.sw64.SW64.v10;
++import static jdk.vm.ci.sw64.SW64.v11;
++import static jdk.vm.ci.sw64.SW64.v12;
++import static jdk.vm.ci.sw64.SW64.v13;
++import static jdk.vm.ci.sw64.SW64.v14;
++import static jdk.vm.ci.sw64.SW64.v15;
++import static jdk.vm.ci.sw64.SW64.v16;
++import static jdk.vm.ci.sw64.SW64.v17;
++import static jdk.vm.ci.sw64.SW64.v18;
++import static jdk.vm.ci.sw64.SW64.v19;
++import static jdk.vm.ci.sw64.SW64.v2;
++import static jdk.vm.ci.sw64.SW64.v20;
++import static jdk.vm.ci.sw64.SW64.v21;
++import static jdk.vm.ci.sw64.SW64.v22;
++import static jdk.vm.ci.sw64.SW64.v23;
++import static jdk.vm.ci.sw64.SW64.v24;
++import static jdk.vm.ci.sw64.SW64.v25;
++import static jdk.vm.ci.sw64.SW64.v26;
++import static jdk.vm.ci.sw64.SW64.v27;
++import static jdk.vm.ci.sw64.SW64.v28;
++import static jdk.vm.ci.sw64.SW64.v29;
++import static jdk.vm.ci.sw64.SW64.v3;
++import static jdk.vm.ci.sw64.SW64.v30;
++import static jdk.vm.ci.sw64.SW64.v31;
++import static jdk.vm.ci.sw64.SW64.v4;
++import static jdk.vm.ci.sw64.SW64.v5;
++import static jdk.vm.ci.sw64.SW64.v6;
++import static jdk.vm.ci.sw64.SW64.v7;
++import static jdk.vm.ci.sw64.SW64.v8;
++import static jdk.vm.ci.sw64.SW64.v9;
++
++import java.util.ArrayList;
++import java.util.BitSet;
++
++import org.graalvm.compiler.core.common.alloc.RegisterAllocationConfig;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterArray;
++import jdk.vm.ci.code.RegisterConfig;
++
++public class SW64HotSpotRegisterAllocationConfig extends RegisterAllocationConfig {
++
++    // @formatter:off
++    static final Register[] registerAllocationOrder = {
++        r0,  r1,  r2,  r3,  r4,  r5,  r6,  r7,
++        r8,  r9,  r10, r11, r12, r13, r14, r15,
++        r16, r17, r18, r19, r20, r21, r22, r23,
++        r24, r25, r26, /* r27, */ r28, /* r29, r30, r31 */
++
++        v0,  v1,  v2,  v3,  v4,  v5,  v6,  v7,
++        v8,  v9,  v10, v11, v12, v13, v14, v15,
++        v16, v17, v18, v19, v20, v21, v22, v23,
++        v24, v25, v26, v27, v28, v29, v30, v31
++    };
++    // @formatter:on
++
++    public SW64HotSpotRegisterAllocationConfig(RegisterConfig registerConfig, String[] allocationRestrictedTo) {
++        super(registerConfig, allocationRestrictedTo);
++    }
++
++    @Override
++    protected RegisterArray initAllocatable(RegisterArray registers) {
++        BitSet regMap = new BitSet(registerConfig.getAllocatableRegisters().size());
++        for (Register reg : registers) {
++            regMap.set(reg.number);
++        }
++
++        ArrayList<Register> allocatableRegisters = new ArrayList<>(registers.size());
++        for (Register reg : registerAllocationOrder) {
++            if (regMap.get(reg.number)) {
++                allocatableRegisters.add(reg);
++            }
++        }
++
++        return super.initAllocatable(new RegisterArray(allocatableRegisters));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotReturnOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotReturnOp.java
+new file mode 100644
+index 0000000000..5f0046113c
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotReturnOp.java
+@@ -0,0 +1,74 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.lr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * Returns from a function.
++ */
++@Opcode("RETURN")
++public final class SW64HotSpotReturnOp extends SW64HotSpotEpilogueOp {
++
++    public static final LIRInstructionClass<SW64HotSpotReturnOp> TYPE = LIRInstructionClass.create(SW64HotSpotReturnOp.class);
++
++    @Use({REG, ILLEGAL}) private Value result;
++    private final boolean isStub;
++    private final boolean requiresReservedStackAccessCheck;
++
++    public SW64HotSpotReturnOp(Value result, boolean isStub, GraalHotSpotVMConfig config, Register thread, boolean requiresReservedStackAccessCheck) {
++        super(TYPE, config, thread);
++        this.requiresReservedStackAccessCheck = requiresReservedStackAccessCheck;
++        assert validReturnValue(result);
++        this.result = result;
++        this.isStub = isStub;
++    }
++
++    private static boolean validReturnValue(Value result) {
++        if (result.equals(Value.ILLEGAL)) {
++            return true;
++        }
++        return asRegister(result).encoding == 0;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        final boolean emitSafepoint = !isStub;
++        leaveFrame(crb, masm, emitSafepoint, requiresReservedStackAccessCheck);
++        masm.ret(lr);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotSafepointOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotSafepointOp.java
+new file mode 100644
+index 0000000000..a3d691443b
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotSafepointOp.java
+@@ -0,0 +1,120 @@
++/*
++ * Copyright (c) 2013, 2017, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64LIRInstruction;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.site.InfopointReason;
++import jdk.vm.ci.meta.AllocatableValue;
++
++/**
++ * Emits a safepoint poll.
++ */
++@Opcode("SAFEPOINT")
++public class SW64HotSpotSafepointOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64HotSpotSafepointOp> TYPE = LIRInstructionClass.create(SW64HotSpotSafepointOp.class);
++
++    @State protected LIRFrameState state;
++    @Temp protected AllocatableValue scratchValue;
++
++    private final GraalHotSpotVMConfig config;
++    private final Register thread;
++
++    public SW64HotSpotSafepointOp(LIRFrameState state, GraalHotSpotVMConfig config, Register thread, AllocatableValue scratch) {
++        super(TYPE);
++        this.state = state;
++        this.config = config;
++        this.thread = thread;
++        this.scratchValue = scratch;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        Register scratch = asRegister(scratchValue);
++        emitCode(crb, masm, config, false, thread, scratch, state);
++    }
++
++    /**
++     * Conservatively checks whether we can load the safepoint polling address with a single ldr
++     * instruction or not.
++     *
++     * @return true if it is guaranteed that polling page offset will always fit into a 21-bit
++     *         signed integer, false otherwise.
++     */
++    private static boolean isPollingPageFar(GraalHotSpotVMConfig config) {
++        final long pollingPageAddress = config.safepointPollingAddress;
++        return !NumUtil.isSignedNbit(21, pollingPageAddress - config.codeCacheLowBound) || !NumUtil.isSignedNbit(21, pollingPageAddress - config.codeCacheHighBound);
++    }
++
++    public static void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm, GraalHotSpotVMConfig config, boolean onReturn, Register thread, Register scratch, LIRFrameState state) {
++        if (config.threadLocalHandshakes) {
++            emitThreadLocalPoll(crb, masm, config, onReturn, thread, scratch, state);
++        } else {
++            emitGlobalPoll(crb, masm, config, onReturn, scratch, state);
++        }
++    }
++
++    private static void emitGlobalPoll(CompilationResultBuilder crb, SW64MacroAssembler masm, GraalHotSpotVMConfig config, boolean onReturn, Register scratch, LIRFrameState state) {
++        if (isPollingPageFar(config)) {
++            crb.recordMark(onReturn ? config.MARKID_POLL_RETURN_FAR : config.MARKID_POLL_FAR);
++            masm.movNativeAddress(scratch, config.safepointPollingAddress);
++            crb.recordMark(onReturn ? config.MARKID_POLL_RETURN_FAR : config.MARKID_POLL_FAR);
++            if (state != null) {
++                crb.recordInfopoint(masm.position(), state, InfopointReason.SAFEPOINT);
++            }
++            masm.ldr(32, zr, SW64Address.createBaseRegisterOnlyAddress(scratch));
++        } else {
++            crb.recordMark(onReturn ? config.MARKID_POLL_RETURN_NEAR : config.MARKID_POLL_NEAR);
++            if (state != null) {
++                crb.recordInfopoint(masm.position(), state, InfopointReason.SAFEPOINT);
++            }
++            masm.ldr(32, zr, SW64Address.createPcLiteralAddress(0));
++        }
++    }
++
++    private static void emitThreadLocalPoll(CompilationResultBuilder crb, SW64MacroAssembler masm, GraalHotSpotVMConfig config, boolean onReturn, Register thread, Register scratch,
++                    LIRFrameState state) {
++        assert config.threadPollingPageOffset >= 0;
++        masm.ldr(64, scratch, masm.makeAddress(thread, config.threadPollingPageOffset, 8));
++        crb.recordMark(onReturn ? config.MARKID_POLL_RETURN_FAR : config.MARKID_POLL_FAR);
++        if (state != null) {
++            crb.recordInfopoint(masm.position(), state, InfopointReason.SAFEPOINT);
++        }
++        masm.ldr(32, zr, SW64Address.createBaseRegisterOnlyAddress(scratch));
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotStrategySwitchOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotStrategySwitchOp.java
+new file mode 100644
+index 0000000000..f1f2b6a259
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotStrategySwitchOp.java
+@@ -0,0 +1,82 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import java.util.function.Function;
++
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.calc.Condition;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.LabelRef;
++import org.graalvm.compiler.lir.SwitchStrategy;
++import org.graalvm.compiler.lir.sw64.SW64ControlFlow;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.hotspot.HotSpotMetaspaceConstant;
++import jdk.vm.ci.meta.Constant;
++import jdk.vm.ci.meta.Value;
++
++final class SW64HotSpotStrategySwitchOp extends SW64ControlFlow.StrategySwitchOp {
++    public static final LIRInstructionClass<SW64HotSpotStrategySwitchOp> TYPE = LIRInstructionClass.create(SW64HotSpotStrategySwitchOp.class);
++
++    SW64HotSpotStrategySwitchOp(SwitchStrategy strategy, LabelRef[] keyTargets, LabelRef defaultTarget, Value key, Value scratch, Function<Condition, SW64Assembler.ConditionFlag> converter) {
++        super(TYPE, strategy, keyTargets, defaultTarget, key, scratch, converter);
++    }
++
++    @Override
++    public void emitCode(final CompilationResultBuilder crb, final SW64MacroAssembler masm) {
++        strategy.run(new HotSpotSwitchClosure(asRegister(key), crb, masm));
++    }
++
++    public class HotSpotSwitchClosure extends SwitchClosure {
++
++        protected HotSpotSwitchClosure(Register keyRegister, CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            super(keyRegister, crb, masm);
++        }
++
++        @Override
++        protected void emitComparison(Constant c) {
++            if (c instanceof HotSpotMetaspaceConstant) {
++                HotSpotMetaspaceConstant meta = (HotSpotMetaspaceConstant) c;
++                if (meta.isCompressed()) {
++                    crb.recordInlineDataInCode(meta);
++                    // masm.cmpl(keyRegister, 0xDEADDEAD);
++                    throw GraalError.unimplemented();
++                } else {
++                    crb.recordInlineDataInCode(meta);
++                    masm.movNativeAddress(asRegister(scratch), 0x0000_DEAD_DEAD_DEADL);
++                    masm.cmp(64, keyRegister, asRegister(scratch));
++                }
++            } else {
++                super.emitComparison(c);
++            }
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotUnwindOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotUnwindOp.java
+new file mode 100644
+index 0000000000..d6d4755c8b
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64HotSpotUnwindOp.java
+@@ -0,0 +1,73 @@
++/*
++ * Copyright (c) 2013, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static jdk.vm.ci.sw64.SW64.lr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static org.graalvm.compiler.hotspot.HotSpotBackend.UNWIND_EXCEPTION_TO_CALLER;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.spi.ForeignCallLinkage;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.hotspot.stubs.UnwindExceptionToCallerStub;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64Call;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.CallingConvention;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterValue;
++
++/**
++ * Removes the current frame and jumps to the {@link UnwindExceptionToCallerStub}.
++ */
++@Opcode("UNWIND")
++public final class SW64HotSpotUnwindOp extends SW64HotSpotEpilogueOp {
++    public static final LIRInstructionClass<SW64HotSpotUnwindOp> TYPE = LIRInstructionClass.create(SW64HotSpotUnwindOp.class);
++
++    @Use protected RegisterValue exception;
++
++    public SW64HotSpotUnwindOp(GraalHotSpotVMConfig config, RegisterValue exception) {
++        super(TYPE, config);
++        this.exception = exception;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        leaveFrame(crb, masm, /* emitSafepoint */false, false);
++
++        ForeignCallLinkage linkage = crb.foreignCalls.lookupForeignCall(UNWIND_EXCEPTION_TO_CALLER);
++        CallingConvention cc = linkage.getOutgoingCallingConvention();
++        assert cc.getArgumentCount() == 2;
++        assert exception.equals(cc.getArgument(0));
++
++        // Get return address (is in lr after frame leave)
++        Register returnAddress = asRegister(cc.getArgument(1));
++        masm.movx(returnAddress, lr);
++
++        SW64Call.directJmp(crb, masm, linkage);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64IndirectCallOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64IndirectCallOp.java
+new file mode 100644
+index 0000000000..71a13f4486
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.hotspot.sw64/src/org/graalvm/compiler/hotspot/sw64/SW64IndirectCallOp.java
+@@ -0,0 +1,84 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.hotspot.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static jdk.vm.ci.sw64.SW64.r12;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.hotspot.GraalHotSpotVMConfig;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.sw64.SW64Call;
++import org.graalvm.compiler.lir.sw64.SW64Call.IndirectCallOp;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.ResolvedJavaMethod;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * A register indirect call that complies with the extra conventions for such calls in HotSpot. In
++ * particular, the metaspace Method of the callee must be in r12 for the case where a vtable entry's
++ * _from_compiled_entry is the address of an C2I adapter. Such adapters expect the target method to
++ * be in r12.
++ */
++@Opcode("CALL_INDIRECT")
++final class SW64IndirectCallOp extends IndirectCallOp {
++
++    public static final LIRInstructionClass<SW64IndirectCallOp> TYPE = LIRInstructionClass.create(SW64IndirectCallOp.class);
++
++    /**
++     * Vtable stubs expect the metaspace Method in r12.
++     */
++    public static final Register METHOD = r12;
++
++    @Use({REG}) private Value metaspaceMethod;
++
++    private final GraalHotSpotVMConfig config;
++
++    SW64IndirectCallOp(ResolvedJavaMethod callTarget, Value result, Value[] parameters, Value[] temps, Value metaspaceMethod, Value targetAddress, LIRFrameState state,
++                    GraalHotSpotVMConfig config) {
++        super(TYPE, callTarget, result, parameters, temps, targetAddress, state);
++        this.metaspaceMethod = metaspaceMethod;
++        this.config = config;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        crb.recordMark(config.MARKID_INLINE_INVOKE);
++        Register callReg = asRegister(targetAddress);
++        assert !callReg.equals(METHOD);
++        SW64Call.indirectCall(crb, masm, callReg, callTarget, state);
++    }
++
++    @Override
++    public void verify() {
++        super.verify();
++        assert asRegister(metaspaceMethod).equals(METHOD);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64AddressValue.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64AddressValue.java
+new file mode 100644
+index 0000000000..5a2d9746dc
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64AddressValue.java
+@@ -0,0 +1,122 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import java.util.EnumSet;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Address.AddressingMode;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ExtendType;
++import org.graalvm.compiler.lir.CompositeValue;
++import org.graalvm.compiler.lir.InstructionValueConsumer;
++import org.graalvm.compiler.lir.InstructionValueProcedure;
++import org.graalvm.compiler.lir.LIRInstruction;
++import org.graalvm.compiler.lir.LIRInstruction.OperandFlag;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterValue;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Value;
++import jdk.vm.ci.meta.ValueKind;
++
++public final class SW64AddressValue extends CompositeValue {
++    private static final EnumSet<OperandFlag> flags = EnumSet.of(OperandFlag.REG, OperandFlag.ILLEGAL);
++
++    @Component({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue base;
++    @Component({OperandFlag.REG, OperandFlag.ILLEGAL}) protected AllocatableValue offset;
++    private final int displacement;
++
++    /**
++     * Whether register offset should be scaled or not.
++     */
++    private final int scaleFactor;
++    private final AddressingMode addressingMode;
++
++    public SW64AddressValue(ValueKind<?> kind, AllocatableValue base, AllocatableValue offset, int displacement, int scaleFactor, AddressingMode addressingMode) {
++        super(kind);
++        this.base = base;
++        this.offset = offset;
++        this.displacement = displacement;
++        this.scaleFactor = scaleFactor;
++        this.addressingMode = addressingMode;
++    }
++
++    private static Register toRegister(AllocatableValue value) {
++        if (value.equals(Value.ILLEGAL)) {
++            return SW64.zr;
++        } else {
++            return ((RegisterValue) value).getRegister();
++        }
++    }
++
++    public AllocatableValue getBase() {
++        return base;
++    }
++
++    public AllocatableValue getOffset() {
++        return offset;
++    }
++
++    public int getDisplacement() {
++        return displacement;
++    }
++
++    public boolean isScaled() {
++        return scaleFactor != 1;
++    }
++
++    public int getScaleFactor() {
++        return scaleFactor;
++    }
++
++    public AddressingMode getAddressingMode() {
++        return addressingMode;
++    }
++
++    public SW64Address toAddress() {
++        Register baseReg = toRegister(base);
++        Register offsetReg = toRegister(offset);
++        SW64Assembler.ExtendType extendType = addressingMode == AddressingMode.EXTENDED_REGISTER_OFFSET ? ExtendType.SXTW : null;
++        return SW64Address.createAddress(addressingMode, baseReg, offsetReg, displacement / scaleFactor, isScaled(), extendType);
++    }
++
++    @Override
++    public CompositeValue forEachComponent(LIRInstruction inst, LIRInstruction.OperandMode mode, InstructionValueProcedure proc) {
++        AllocatableValue newBase = (AllocatableValue) proc.doValue(inst, base, mode, flags);
++        AllocatableValue newOffset = (AllocatableValue) proc.doValue(inst, offset, mode, flags);
++        if (!base.identityEquals(newBase) || !offset.identityEquals(newOffset)) {
++            return new SW64AddressValue(getValueKind(), newBase, newOffset, displacement, scaleFactor, addressingMode);
++        }
++        return this;
++    }
++
++    @Override
++    protected void visitEachComponent(LIRInstruction inst, LIRInstruction.OperandMode mode, InstructionValueConsumer proc) {
++        proc.visitValue(inst, base, mode, flags);
++        proc.visitValue(inst, offset, mode, flags);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArithmeticLIRGeneratorTool.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArithmeticLIRGeneratorTool.java
+new file mode 100644
+index 0000000000..4a6877f795
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArithmeticLIRGeneratorTool.java
+@@ -0,0 +1,58 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import org.graalvm.compiler.lir.Variable;
++import org.graalvm.compiler.lir.gen.ArithmeticLIRGeneratorTool;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * This interface can be used to generate SW64 LIR for arithmetic operations.
++ */
++public interface SW64ArithmeticLIRGeneratorTool extends ArithmeticLIRGeneratorTool {
++
++    Value emitCountLeadingZeros(Value value);
++
++    Value emitCountTrailingZeros(Value value);
++
++    enum RoundingMode {
++        NEAREST(0),
++        DOWN(1),
++        UP(2),
++        TRUNCATE(3);
++
++        public final int encoding;
++
++        RoundingMode(int encoding) {
++            this.encoding = encoding;
++        }
++    }
++
++    Value emitRound(Value value, RoundingMode mode);
++
++    void emitCompareOp(SW64Kind cmpKind, Variable left, Value right);
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArithmeticOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArithmeticOp.java
+new file mode 100644
+index 0000000000..93be72372e
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArithmeticOp.java
+@@ -0,0 +1,440 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static org.graalvm.compiler.lir.sw64.SW64ArithmeticOp.ARMv8ConstantCategory.ARITHMETIC;
++import static org.graalvm.compiler.lir.sw64.SW64ArithmeticOp.ARMv8ConstantCategory.LOGICAL;
++import static org.graalvm.compiler.lir.sw64.SW64ArithmeticOp.ARMv8ConstantCategory.NONE;
++import static org.graalvm.compiler.lir.sw64.SW64ArithmeticOp.ARMv8ConstantCategory.SHIFT;
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ConditionFlag;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.JavaConstant;
++
++public enum SW64ArithmeticOp {
++    // TODO At least add and sub *can* be used with SP, so this should be supported
++    NEG,
++    NOT,
++    ADD(ARITHMETIC),
++    ADDS(ARITHMETIC),
++    SUB(ARITHMETIC),
++    SUBS(ARITHMETIC),
++    MUL,
++    MULVS,
++    DIV,
++    SMULH,
++    UMULH,
++    REM,
++    UDIV,
++    UREM,
++    AND(LOGICAL),
++    ANDS(LOGICAL),
++    OR(LOGICAL),
++    XOR(LOGICAL),
++    SHL(SHIFT),
++    LSHR(SHIFT),
++    ASHR(SHIFT),
++    ABS,
++
++    FADD,
++    FSUB,
++    FMUL,
++    FDIV,
++    FREM,
++    FNEG,
++    FABS,
++    FRINTM,
++    FRINTN,
++    FRINTP,
++    SQRT;
++
++    /**
++     * Specifies what constants can be used directly without having to be loaded into a register
++     * with the given instruction.
++     */
++    public enum ARMv8ConstantCategory {
++        NONE,
++        LOGICAL,
++        ARITHMETIC,
++        SHIFT
++    }
++
++    public final ARMv8ConstantCategory category;
++
++    SW64ArithmeticOp(ARMv8ConstantCategory category) {
++        this.category = category;
++    }
++
++    SW64ArithmeticOp() {
++        this(NONE);
++    }
++
++    public static class UnaryOp extends SW64LIRInstruction {
++        private static final LIRInstructionClass<UnaryOp> TYPE = LIRInstructionClass.create(UnaryOp.class);
++
++        @Opcode private final SW64ArithmeticOp opcode;
++        @Def({REG}) protected AllocatableValue result;
++        @Use({REG}) protected AllocatableValue x;
++
++        public UnaryOp(SW64ArithmeticOp opcode, AllocatableValue result, AllocatableValue x) {
++            super(TYPE);
++            this.opcode = opcode;
++            this.result = result;
++            this.x = x;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register dst = asRegister(result);
++            Register src = asRegister(x);
++            int size = result.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            switch (opcode) {
++                case NEG:
++                    masm.sub(size, dst, zr, src);
++                    break;
++                case FNEG:
++                    masm.fneg(size, dst, src);
++                    break;
++                case NOT:
++                    masm.not(size, dst, src);
++                    break;
++                case ABS:
++                    masm.cmp(size, src, 0);
++                    masm.csneg(size, dst, src, ConditionFlag.LT);
++                    break;
++                case FABS:
++                    masm.fabs(size, dst, src);
++                    break;
++                case FRINTM:
++                    masm.frintm(size, dst, src);
++                    break;
++                case FRINTN:
++                    masm.frintn(size, dst, src);
++                    break;
++                case FRINTP:
++                    masm.frintp(size, dst, src);
++                    break;
++                case SQRT:
++                    masm.fsqrt(size, dst, src);
++                    break;
++                default:
++                    throw GraalError.shouldNotReachHere("op=" + opcode.name());
++            }
++        }
++    }
++
++    public static class BinaryConstOp extends SW64LIRInstruction {
++        private static final LIRInstructionClass<BinaryConstOp> TYPE = LIRInstructionClass.create(BinaryConstOp.class);
++
++        @Opcode private final SW64ArithmeticOp op;
++        @Def({REG}) protected AllocatableValue result;
++        @Use({REG}) protected AllocatableValue a;
++        private final JavaConstant b;
++
++        public BinaryConstOp(SW64ArithmeticOp op, AllocatableValue result, AllocatableValue a, JavaConstant b) {
++            super(TYPE);
++            this.op = op;
++            this.result = result;
++            this.a = a;
++            this.b = b;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            assert op.category != NONE;
++            Register dst = asRegister(result);
++            Register src = asRegister(a);
++            int size = result.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            switch (op) {
++                case ADD:
++                    // Don't use asInt() here, since we can't use asInt on a long variable, even
++                    // if the constant easily fits as an int.
++                    assert SW64MacroAssembler.isArithmeticImmediate(b.asLong());
++                    masm.add(size, dst, src, (int) b.asLong());
++                    break;
++                case SUB:
++                    // Don't use asInt() here, since we can't use asInt on a long variable, even
++                    // if the constant easily fits as an int.
++                    assert SW64MacroAssembler.isArithmeticImmediate(b.asLong());
++                    masm.sub(size, dst, src, (int) b.asLong());
++                    break;
++                case ADDS:
++                    assert SW64MacroAssembler.isArithmeticImmediate(b.asLong());
++                    masm.adds(size, dst, src, (int) b.asLong());
++                    break;
++                case SUBS:
++                    assert SW64MacroAssembler.isArithmeticImmediate(b.asLong());
++                    masm.subs(size, dst, src, (int) b.asLong());
++                    break;
++                case AND:
++                    // XXX Should this be handled somewhere else?
++                    if (size == 32 && b.asLong() == 0xFFFF_FFFFL) {
++                        masm.mov(size, dst, src);
++                    } else {
++                        masm.and(size, dst, src, b.asLong());
++                    }
++                    break;
++                case ANDS:
++                    masm.ands(size, dst, src, b.asLong());
++                    break;
++                case OR:
++                    masm.or(size, dst, src, b.asLong());
++                    break;
++                case XOR:
++                    masm.eor(size, dst, src, b.asLong());
++                    break;
++                case SHL:
++                    masm.shl(size, dst, src, b.asLong());
++                    break;
++                case LSHR:
++                    masm.lshr(size, dst, src, b.asLong());
++                    break;
++                case ASHR:
++                    masm.ashr(size, dst, src, b.asLong());
++                    break;
++                default:
++                    throw GraalError.shouldNotReachHere("op=" + op.name());
++            }
++        }
++    }
++
++    public static class BinaryOp extends SW64LIRInstruction {
++        private static final LIRInstructionClass<BinaryOp> TYPE = LIRInstructionClass.create(BinaryOp.class);
++
++        @Opcode private final SW64ArithmeticOp op;
++        @Def({REG}) protected AllocatableValue result;
++        @Use({REG}) protected AllocatableValue a;
++        @Use({REG}) protected AllocatableValue b;
++
++        public BinaryOp(SW64ArithmeticOp op, AllocatableValue result, AllocatableValue a, AllocatableValue b) {
++            super(TYPE);
++            this.op = op;
++            this.result = result;
++            this.a = a;
++            this.b = b;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register dst = asRegister(result);
++            Register src1 = asRegister(a);
++            Register src2 = asRegister(b);
++            int size = result.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            switch (op) {
++                case ADD:
++                    masm.add(size, dst, src1, src2);
++                    break;
++                case ADDS:
++                    masm.adds(size, dst, src1, src2);
++                    break;
++                case SUB:
++                    masm.sub(size, dst, src1, src2);
++                    break;
++                case SUBS:
++                    masm.subs(size, dst, src1, src2);
++                    break;
++                case MUL:
++                    masm.mul(size, dst, src1, src2);
++                    break;
++                case UMULH:
++                    masm.umulh(size, dst, src1, src2);
++                    break;
++                case SMULH:
++                    masm.smulh(size, dst, src1, src2);
++                    break;
++                case DIV:
++                    masm.sdiv(size, dst, src1, src2);
++                    break;
++                case UDIV:
++                    masm.udiv(size, dst, src1, src2);
++                    break;
++                case AND:
++                    masm.and(size, dst, src1, src2);
++                    break;
++                case ANDS:
++                    masm.ands(size, dst, src1, src2);
++                    break;
++                case OR:
++                    masm.or(size, dst, src1, src2);
++                    break;
++                case XOR:
++                    masm.eor(size, dst, src1, src2);
++                    break;
++                case SHL:
++                    masm.shl(size, dst, src1, src2);
++                    break;
++                case LSHR:
++                    masm.lshr(size, dst, src1, src2);
++                    break;
++                case ASHR:
++                    masm.ashr(size, dst, src1, src2);
++                    break;
++                case FADD:
++                    masm.fadd(size, dst, src1, src2);
++                    break;
++                case FSUB:
++                    masm.fsub(size, dst, src1, src2);
++                    break;
++                case FMUL:
++                    masm.fmul(size, dst, src1, src2);
++                    break;
++                case FDIV:
++                    masm.fdiv(size, dst, src1, src2);
++                    break;
++                case MULVS:
++                    masm.mulvs(size, dst, src1, src2);
++                    break;
++                default:
++                    throw GraalError.shouldNotReachHere("op=" + op.name());
++            }
++        }
++    }
++
++    /**
++     * Class used for instructions that have to reuse one of their arguments. This only applies to
++     * the remainder instructions at the moment, since we have to compute n % d using rem = n -
++     * TruncatingDivision(n, d) * d
++     *
++     * TODO (das) Replace the remainder nodes in the LIR.
++     */
++    public static class BinaryCompositeOp extends SW64LIRInstruction {
++        private static final LIRInstructionClass<BinaryCompositeOp> TYPE = LIRInstructionClass.create(BinaryCompositeOp.class);
++        @Opcode private final SW64ArithmeticOp op;
++        @Def({REG}) protected AllocatableValue result;
++        @Alive({REG}) protected AllocatableValue a;
++        @Alive({REG}) protected AllocatableValue b;
++
++        public BinaryCompositeOp(SW64ArithmeticOp op, AllocatableValue result, AllocatableValue a, AllocatableValue b) {
++            super(TYPE);
++            this.op = op;
++            this.result = result;
++            this.a = a;
++            this.b = b;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register dst = asRegister(result);
++            Register src1 = asRegister(a);
++            Register src2 = asRegister(b);
++            int size = result.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            switch (op) {
++                case REM:
++                    masm.rem(size, dst, src1, src2);
++                    break;
++                case UREM:
++                    masm.urem(size, dst, src1, src2);
++                    break;
++                case FREM:
++                    masm.frem(size, dst, src1, src2);
++                    break;
++                default:
++                    throw GraalError.shouldNotReachHere();
++            }
++        }
++    }
++
++    public static class AddSubShiftOp extends SW64LIRInstruction {
++        private static final LIRInstructionClass<AddSubShiftOp> TYPE = LIRInstructionClass.create(AddSubShiftOp.class);
++
++        @Opcode private final SW64ArithmeticOp op;
++        @Def(REG) protected AllocatableValue result;
++        @Use(REG) protected AllocatableValue src1;
++        @Use(REG) protected AllocatableValue src2;
++        private final SW64MacroAssembler.ShiftType shiftType;
++        private final int shiftAmt;
++
++        /**
++         * Computes <code>result = src1 <op> src2 <shiftType> <shiftAmt></code>.
++         */
++        public AddSubShiftOp(SW64ArithmeticOp op, AllocatableValue result, AllocatableValue src1, AllocatableValue src2, SW64MacroAssembler.ShiftType shiftType, int shiftAmt) {
++            super(TYPE);
++            assert op == ADD || op == SUB;
++            this.op = op;
++            this.result = result;
++            this.src1 = src1;
++            this.src2 = src2;
++            this.shiftType = shiftType;
++            this.shiftAmt = shiftAmt;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            int size = result.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            switch (op) {
++                case ADD:
++                    masm.add(size, asRegister(result), asRegister(src1), asRegister(src2), shiftType, shiftAmt);
++                    break;
++                case SUB:
++                    masm.sub(size, asRegister(result), asRegister(src1), asRegister(src2), shiftType, shiftAmt);
++                    break;
++                default:
++                    throw GraalError.shouldNotReachHere();
++            }
++        }
++    }
++
++    public static class ExtendedAddShiftOp extends SW64LIRInstruction {
++        private static final LIRInstructionClass<ExtendedAddShiftOp> TYPE = LIRInstructionClass.create(ExtendedAddShiftOp.class);
++        @Def(REG) protected AllocatableValue result;
++        @Use(REG) protected AllocatableValue src1;
++        @Use(REG) protected AllocatableValue src2;
++        private final SW64Assembler.ExtendType extendType;
++        private final int shiftAmt;
++
++        /**
++         * Computes <code>result = src1 + extendType(src2) << shiftAmt</code>.
++         *
++         * @param extendType defines how src2 is extended to the same size as src1.
++         * @param shiftAmt must be in range 0 to 4.
++         */
++        public ExtendedAddShiftOp(AllocatableValue result, AllocatableValue src1, AllocatableValue src2, SW64Assembler.ExtendType extendType, int shiftAmt) {
++            super(TYPE);
++            this.result = result;
++            this.src1 = src1;
++            this.src2 = src2;
++            this.extendType = extendType;
++            this.shiftAmt = shiftAmt;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            int size = result.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            masm.add(size, asRegister(result), asRegister(src1), asRegister(src2), extendType, shiftAmt);
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArrayCompareToOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArrayCompareToOp.java
+new file mode 100644
+index 0000000000..4fb520911c
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArrayCompareToOp.java
+@@ -0,0 +1,288 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++
++import java.lang.reflect.Array;
++import java.lang.reflect.Field;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ConditionFlag;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.Value;
++import sun.misc.Unsafe;
++
++/**
++ * Emits code which compares two arrays lexicographically. If the CPU supports any vector
++ * instructions specialized code is emitted to leverage these instructions.
++ */
++@Opcode("ARRAY_COMPARE_TO")
++public final class SW64ArrayCompareToOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64ArrayCompareToOp> TYPE = LIRInstructionClass.create(SW64ArrayCompareToOp.class);
++
++    private final JavaKind kind1;
++    private final JavaKind kind2;
++
++    private final int array1BaseOffset;
++    private final int array2BaseOffset;
++
++    @Def({REG}) protected Value resultValue;
++
++    @Alive({REG}) protected Value array1Value;
++    @Alive({REG}) protected Value array2Value;
++    @Use({REG}) protected Value length1Value;
++    @Use({REG}) protected Value length2Value;
++    @Temp({REG}) protected Value length1ValueTemp;
++    @Temp({REG}) protected Value length2ValueTemp;
++
++    @Temp({REG}) protected Value temp1;
++    @Temp({REG}) protected Value temp2;
++    @Temp({REG}) protected Value temp3;
++    @Temp({REG}) protected Value temp4;
++    @Temp({REG}) protected Value temp5;
++    @Temp({REG}) protected Value temp6;
++
++    public SW64ArrayCompareToOp(LIRGeneratorTool tool, JavaKind kind1, JavaKind kind2, Value result, Value array1, Value array2, Value length1, Value length2) {
++        super(TYPE);
++        this.kind1 = kind1;
++        this.kind2 = kind2;
++
++        // Both offsets should be the same but better be safe than sorry.
++        Class<?> array1Class = Array.newInstance(kind1.toJavaClass(), 0).getClass();
++        Class<?> array2Class = Array.newInstance(kind2.toJavaClass(), 0).getClass();
++        this.array1BaseOffset = UNSAFE.arrayBaseOffset(array1Class);
++        this.array2BaseOffset = UNSAFE.arrayBaseOffset(array2Class);
++
++        this.resultValue = result;
++
++        this.array1Value = array1;
++        this.array2Value = array2;
++
++        /*
++         * The length values are inputs but are also killed like temporaries so need both Use and
++         * Temp annotations, which will only work with fixed registers.
++         */
++
++        this.length1Value = length1;
++        this.length2Value = length2;
++        this.length1ValueTemp = length1;
++        this.length2ValueTemp = length2;
++
++        // Allocate some temporaries.
++        this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++        this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++        this.temp3 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++        this.temp4 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++        this.temp5 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++        this.temp6 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++    }
++
++    private static final Unsafe UNSAFE = initUnsafe();
++
++    private static Unsafe initUnsafe() {
++        try {
++            return Unsafe.getUnsafe();
++        } catch (SecurityException se) {
++            try {
++                Field theUnsafe = Unsafe.class.getDeclaredField("theUnsafe");
++                theUnsafe.setAccessible(true);
++                return (Unsafe) theUnsafe.get(Unsafe.class);
++            } catch (Exception e) {
++                throw new RuntimeException("exception while trying to get Unsafe", e);
++            }
++        }
++    }
++
++    @Override
++    protected void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++
++        Register result = asRegister(resultValue);
++        Register length1 = asRegister(length1Value);
++        Register length2 = asRegister(length2Value);
++
++        Register array1 = asRegister(temp1);
++        Register array2 = asRegister(temp2);
++        Register length = asRegister(temp3);
++        Register temp = asRegister(temp4);
++        Register tailCount = asRegister(temp5);
++        Register vecCount = asRegister(temp6);
++
++        // Checkstyle: stop
++        final Label BREAK_LABEL = new Label();
++        final Label STRING_DIFFER_LABEL = new Label();
++        final Label LENGTH_DIFFER_LABEL = new Label();
++        final Label MAIN_LOOP_LABEL = new Label();
++        final Label COMPARE_SHORT_LABEL = new Label();
++        // Checkstyle: resume
++
++        // Checkstyle: stop
++        int CHAR_SIZE_BYTES = 1;
++        int VECTOR_SIZE_BYTES = 8;
++        int VECTOR_COUNT_BYTES = 8;
++        // Checkstyle: resume
++
++        // Byte is expanded to short if we compare strings with different encoding
++        if (kind1 != kind2 || kind1 == JavaKind.Char) {
++            CHAR_SIZE_BYTES = 2;
++        }
++
++        if (kind1 != kind2) {
++            VECTOR_COUNT_BYTES = 4;
++        }
++
++        // Load array base addresses.
++        masm.lea(array1, SW64Address.createUnscaledImmediateAddress(asRegister(array1Value), array1BaseOffset));
++        masm.lea(array2, SW64Address.createUnscaledImmediateAddress(asRegister(array2Value), array2BaseOffset));
++
++        // Calculate minimal length in chars for different kind case
++        // Conditions could be squashed but lets keep it readable
++        if (kind1 != kind2) {
++            masm.lshr(64, length2, length2, 1);
++        }
++
++        if (kind1 == kind2 && kind1 == JavaKind.Char) {
++            masm.lshr(64, length1, length1, 1);
++            masm.lshr(64, length2, length2, 1);
++        }
++
++        masm.cmp(64, length1, length2);
++        masm.cmov(64, length, length1, length2, ConditionFlag.LT);
++
++        // One of strings is empty
++        masm.cbz(64, length, LENGTH_DIFFER_LABEL);
++
++        // Go back to bytes if necessary
++        if (kind1 != kind2 || kind1 == JavaKind.Char) {
++            masm.shl(64, length, length, 1);
++        }
++
++        masm.mov(64, vecCount, zr);
++        masm.and(64, tailCount, length, VECTOR_SIZE_BYTES - 1); // tail count (in bytes)
++        masm.ands(64, length, length, ~(VECTOR_SIZE_BYTES - 1));  // vector count (in bytes)
++
++        // Length of string is less than VECTOR_SIZE, go to simple compare
++        masm.branchConditionally(ConditionFlag.EQ, COMPARE_SHORT_LABEL);
++
++        // MAIN_LOOP - read strings by 8 byte.
++        masm.bind(MAIN_LOOP_LABEL);
++        if (kind1 != kind2) {
++            // Load 32 bits ad unpack it to entire 64bit register
++            masm.ldr(32, result, SW64Address.createRegisterOffsetAddress(array1, vecCount, false));
++            masm.ubfm(64, temp, result, 0, 7);
++            masm.lshr(64, result, result, 8);
++            masm.bfm(64, temp, result, 48, 7);
++            masm.lshr(64, result, result, 8);
++            masm.bfm(64, temp, result, 32, 7);
++            masm.lshr(64, result, result, 8);
++            masm.bfm(64, temp, result, 16, 7);
++            // Unpacked value placed in temp now
++
++            masm.shl(64, result, vecCount, 1);
++            masm.ldr(64, result, SW64Address.createRegisterOffsetAddress(array2, result, false));
++        } else {
++            masm.ldr(64, temp, SW64Address.createRegisterOffsetAddress(array1, vecCount, false));
++            masm.ldr(64, result, SW64Address.createRegisterOffsetAddress(array2, vecCount, false));
++        }
++        masm.eor(64, result, temp, result);
++        masm.cbnz(64, result, STRING_DIFFER_LABEL);
++        masm.add(64, vecCount, vecCount, VECTOR_COUNT_BYTES);
++        masm.cmp(64, vecCount, length);
++        masm.branchConditionally(ConditionFlag.LT, MAIN_LOOP_LABEL);
++        // End of MAIN_LOOP
++
++        // Strings are equal and no TAIL go to END
++        masm.cbz(64, tailCount, LENGTH_DIFFER_LABEL);
++
++        // Compaire tail of long string ...
++        masm.lea(array1, SW64Address.createRegisterOffsetAddress(array1, length, false));
++        masm.lea(array2, SW64Address.createRegisterOffsetAddress(array2, length, false));
++
++        // ... or string less than vector length
++        masm.bind(COMPARE_SHORT_LABEL);
++        for (int i = 0; i < VECTOR_COUNT_BYTES; i += CHAR_SIZE_BYTES) {
++            if (kind1 != kind2) {
++                masm.ldr(8, temp, SW64Address.createUnscaledImmediateAddress(array1, i / 2));
++            } else {
++                masm.ldr(8 * CHAR_SIZE_BYTES, temp, SW64Address.createUnscaledImmediateAddress(array1, i));
++            }
++
++            masm.ldr(8 * CHAR_SIZE_BYTES, result, SW64Address.createUnscaledImmediateAddress(array2, i));
++
++            if (kind1 != kind2 && kind1 == JavaKind.Char) {
++                // Weird swap of substraction order
++                masm.subs(64, result, result, temp);
++            } else {
++                masm.subs(64, result, temp, result);
++            }
++
++            masm.branchConditionally(ConditionFlag.NE, BREAK_LABEL);
++            masm.subs(64, tailCount, tailCount, CHAR_SIZE_BYTES);
++            masm.branchConditionally(ConditionFlag.EQ, LENGTH_DIFFER_LABEL);
++        }
++
++        // STRING_DIFFER extract exact value of a difference
++        masm.bind(STRING_DIFFER_LABEL);
++        masm.rbit(64, tailCount, result);
++        masm.clz(64, vecCount, tailCount);
++        masm.and(64, vecCount, vecCount, ~((8 * CHAR_SIZE_BYTES) - 1)); // Round to byte or short
++
++        masm.eor(64, result, temp, result);
++        masm.ashr(64, result, result, vecCount);
++        masm.ashr(64, temp, temp, vecCount);
++
++        masm.and(64, result, result, 0xFFFF >>> (16 - (8 * CHAR_SIZE_BYTES))); // 0xFF or 0xFFFF
++        masm.and(64, temp, temp, 0xFFFF >>> (16 - (8 * CHAR_SIZE_BYTES)));
++
++        masm.sub(64, result, temp, result);
++        masm.branchConditionally(ConditionFlag.AL, BREAK_LABEL);
++        // End of STRING_DIFFER
++
++        // Strings are equials up to length,
++        // return length difference in chars
++        masm.bind(LENGTH_DIFFER_LABEL);
++        if (kind1 != kind2 && kind1 == JavaKind.Char) {
++            // Weird swap of substraction order
++            masm.sub(64, result, length2, length1);
++        } else {
++            masm.sub(64, result, length1, length2);
++        }
++
++        // We are done
++        masm.bind(BREAK_LABEL);
++    }
++
++} // class
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArrayEqualsOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArrayEqualsOp.java
+new file mode 100644
+index 0000000000..e160b1a9ae
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ArrayEqualsOp.java
+@@ -0,0 +1,218 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ConditionFlag;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.ScratchRegister;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++import org.graalvm.compiler.lir.gen.LIRGeneratorTool;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * Emits code which compares two arrays of the same length. If the CPU supports any vector
++ * instructions specialized code is emitted to leverage these instructions.
++ */
++@Opcode("ARRAY_EQUALS")
++public final class SW64ArrayEqualsOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64ArrayEqualsOp> TYPE = LIRInstructionClass.create(SW64ArrayEqualsOp.class);
++
++    private final JavaKind kind;
++    private final int arrayBaseOffset;
++    private final int arrayIndexScale;
++
++    @Def({REG}) protected Value resultValue;
++    @Alive({REG}) protected Value array1Value;
++    @Alive({REG}) protected Value array2Value;
++    @Alive({REG}) protected Value lengthValue;
++    @Temp({REG}) protected Value temp1;
++    @Temp({REG}) protected Value temp2;
++    @Temp({REG}) protected Value temp3;
++    @Temp({REG}) protected Value temp4;
++
++    public SW64ArrayEqualsOp(LIRGeneratorTool tool, JavaKind kind, Value result, Value array1, Value array2, Value length) {
++        super(TYPE);
++
++        assert !kind.isNumericFloat() : "Float arrays comparison (bitwise_equal || both_NaN) isn't supported";
++        this.kind = kind;
++
++        this.arrayBaseOffset = tool.getProviders().getArrayOffsetProvider().arrayBaseOffset(kind);
++        this.arrayIndexScale = tool.getProviders().getArrayOffsetProvider().arrayScalingFactor(kind);
++
++        this.resultValue = result;
++        this.array1Value = array1;
++        this.array2Value = array2;
++        this.lengthValue = length;
++
++        // Allocate some temporaries.
++        this.temp1 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++        this.temp2 = tool.newVariable(LIRKind.unknownReference(tool.target().arch.getWordKind()));
++        this.temp3 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
++        this.temp4 = tool.newVariable(LIRKind.value(tool.target().arch.getWordKind()));
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        Register result = asRegister(resultValue);
++        Register array1 = asRegister(temp1);
++        Register array2 = asRegister(temp2);
++        Register length = asRegister(temp3);
++
++        Label breakLabel = new Label();
++
++        try (ScratchRegister sc1 = masm.getScratchRegister()) {
++            Register rscratch1 = sc1.getRegister();
++            // Load array base addresses.
++            masm.lea(array1, SW64Address.createUnscaledImmediateAddress(asRegister(array1Value), arrayBaseOffset));
++            masm.lea(array2, SW64Address.createUnscaledImmediateAddress(asRegister(array2Value), arrayBaseOffset));
++
++            // Get array length in bytes.
++            masm.mov(rscratch1, arrayIndexScale);
++            masm.smaddl(length, asRegister(lengthValue), rscratch1, zr);
++            masm.mov(64, result, length); // copy
++
++            emit8ByteCompare(crb, masm, result, array1, array2, length, breakLabel, rscratch1);
++            emitTailCompares(masm, result, array1, array2, breakLabel, rscratch1);
++
++            // Return: rscratch1 is non-zero iff the arrays differ
++            masm.bind(breakLabel);
++            masm.cmp(64, rscratch1, zr);
++            masm.cset(result, ConditionFlag.EQ);
++        }
++    }
++
++    /**
++     * Vector size used in {@link #emit8ByteCompare}.
++     */
++    private static final int VECTOR_SIZE = 8;
++
++    /**
++     * Emits code that uses 8-byte vector compares.
++     *
++     */
++    private void emit8ByteCompare(CompilationResultBuilder crb, SW64MacroAssembler masm, Register result, Register array1, Register array2, Register length, Label breakLabel,
++                    Register rscratch1) {
++        Label loop = new Label();
++        Label compareTail = new Label();
++
++        Register temp = asRegister(temp4);
++
++        masm.and(64, result, result, VECTOR_SIZE - 1); // tail count (in bytes)
++        masm.ands(64, length, length, ~(VECTOR_SIZE - 1));  // vector count (in bytes)
++        masm.branchConditionally(ConditionFlag.EQ, compareTail);
++
++        masm.lea(array1, SW64Address.createRegisterOffsetAddress(array1, length, false));
++        masm.lea(array2, SW64Address.createRegisterOffsetAddress(array2, length, false));
++        masm.sub(64, length, zr, length);
++
++        // Align the main loop
++        masm.align(crb.target.wordSize * 2);
++        masm.bind(loop);
++        masm.ldr(64, temp, SW64Address.createRegisterOffsetAddress(array1, length, false));
++        masm.ldr(64, rscratch1, SW64Address.createRegisterOffsetAddress(array2, length, false));
++        masm.eor(64, rscratch1, temp, rscratch1);
++        masm.cbnz(64, rscratch1, breakLabel);
++        masm.add(64, length, length, VECTOR_SIZE);
++        masm.cbnz(64, length, loop);
++
++        masm.cbz(64, result, breakLabel);
++
++        /*
++         * Compare the remaining bytes with an unaligned memory load aligned to the end of the
++         * array.
++         */
++        masm.lea(array1, SW64Address.createUnscaledImmediateAddress(array1, -VECTOR_SIZE));
++        masm.lea(array2, SW64Address.createUnscaledImmediateAddress(array2, -VECTOR_SIZE));
++        masm.ldr(64, temp, SW64Address.createRegisterOffsetAddress(array1, result, false));
++        masm.ldr(64, rscratch1, SW64Address.createRegisterOffsetAddress(array2, result, false));
++        masm.eor(64, rscratch1, temp, rscratch1);
++        masm.jmp(breakLabel);
++
++        masm.bind(compareTail);
++    }
++
++    /**
++     * Emits code to compare the remaining 1 to 4 bytes.
++     *
++     */
++    private void emitTailCompares(SW64MacroAssembler masm, Register result, Register array1, Register array2, Label breakLabel, Register rscratch1) {
++        Label compare2Bytes = new Label();
++        Label compare1Byte = new Label();
++        Label end = new Label();
++
++        Register temp = asRegister(temp4);
++
++        if (kind.getByteCount() <= 4) {
++            // Compare trailing 4 bytes, if any.
++            masm.ands(32, zr, result, 4);
++            masm.branchConditionally(ConditionFlag.EQ, compare2Bytes);
++            masm.ldr(32, temp, SW64Address.createPostIndexedImmediateAddress(array1, 4));
++            masm.ldr(32, rscratch1, SW64Address.createPostIndexedImmediateAddress(array2, 4));
++            masm.eor(32, rscratch1, temp, rscratch1);
++            masm.cbnz(32, rscratch1, breakLabel);
++
++            if (kind.getByteCount() <= 2) {
++                // Compare trailing 2 bytes, if any.
++                masm.bind(compare2Bytes);
++                masm.ands(32, zr, result, 2);
++                masm.branchConditionally(ConditionFlag.EQ, compare1Byte);
++                masm.ldr(16, temp, SW64Address.createPostIndexedImmediateAddress(array1, 2));
++                masm.ldr(16, rscratch1, SW64Address.createPostIndexedImmediateAddress(array2, 2));
++                masm.eor(32, rscratch1, temp, rscratch1);
++                masm.cbnz(32, rscratch1, breakLabel);
++
++                // The one-byte tail compare is only required for boolean and byte arrays.
++                if (kind.getByteCount() <= 1) {
++                    // Compare trailing byte, if any.
++                    masm.bind(compare1Byte);
++                    masm.ands(32, zr, result, 1);
++                    masm.branchConditionally(ConditionFlag.EQ, end);
++                    masm.ldr(8, temp, SW64Address.createBaseRegisterOnlyAddress(array1));
++                    masm.ldr(8, rscratch1, SW64Address.createBaseRegisterOnlyAddress(array2));
++                    masm.eor(32, rscratch1, temp, rscratch1);
++                    masm.cbnz(32, rscratch1, breakLabel);
++                } else {
++                    masm.bind(compare1Byte);
++                }
++            } else {
++                masm.bind(compare2Bytes);
++            }
++        }
++        masm.bind(end);
++        masm.mov(64, rscratch1, zr);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64AtomicMove.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64AtomicMove.java
+new file mode 100644
+index 0000000000..a3c3aec0cb
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64AtomicMove.java
+@@ -0,0 +1,258 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.CONST;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.ScratchRegister;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.LIRValueUtil;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Value;
++
++public class SW64AtomicMove {
++    /**
++     * Compare and swap instruction. Does the following atomically: <code>
++     *  CAS(newVal, expected, address):
++     *    oldVal = *address
++     *    if oldVal == expected:
++     *        *address = newVal
++     *    return oldVal
++     * </code>
++     */
++    @Opcode("CAS")
++    public static class CompareAndSwapOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<CompareAndSwapOp> TYPE = LIRInstructionClass.create(CompareAndSwapOp.class);
++
++        @Def protected AllocatableValue resultValue;
++        @Alive protected Value expectedValue;
++        @Alive protected AllocatableValue newValue;
++        @Alive protected AllocatableValue addressValue;
++        @Temp protected AllocatableValue scratchValue;
++
++        public CompareAndSwapOp(AllocatableValue result, Value expectedValue, AllocatableValue newValue, AllocatableValue addressValue, AllocatableValue scratch) {
++            super(TYPE);
++            this.resultValue = result;
++            this.expectedValue = expectedValue;
++            this.newValue = newValue;
++            this.addressValue = addressValue;
++            this.scratchValue = scratch;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            SW64Kind kind = (SW64Kind) expectedValue.getPlatformKind();
++            assert kind.isInteger();
++            final int size = kind.getSizeInBytes() * Byte.SIZE;
++
++            Register address = asRegister(addressValue);
++            Register result = asRegister(resultValue);
++            Register newVal = asRegister(newValue);
++            if (SW64LIRFlagsVersioned.useLSE(masm.target.arch)) {
++                Register expected = asRegister(expectedValue);
++                masm.mov(size, result, expected);
++                masm.cas(size, result, newVal, address, true /* acquire */, true /* release */);
++                SW64Compare.gpCompare(masm, resultValue, expectedValue);
++            } else {
++                // We could avoid using a scratch register here, by reusing resultValue for the
++                // stlxr success flag and issue a mov resultValue, expectedValue in case of success
++                // before returning.
++                Register scratch = asRegister(scratchValue);
++                Label retry = new Label();
++                Label fail = new Label();
++                masm.bind(retry);
++                masm.ldaxr(size, result, address);
++                SW64Compare.gpCompare(masm, resultValue, expectedValue);
++                masm.branchConditionally(SW64Assembler.ConditionFlag.NE, fail);
++                masm.stlxr(size, scratch, newVal, address);
++                // if scratch == 0 then write successful, else retry.
++                masm.cbnz(32, scratch, retry);
++                masm.bind(fail);
++            }
++        }
++    }
++
++    /**
++     * Load (Read) and Add instruction. Does the following atomically: <code>
++     *  ATOMIC_READ_AND_ADD(addend, result, address):
++     *    result = *address
++     *    *address = result + addend
++     *    return result
++     * </code>
++     */
++    @Opcode("ATOMIC_READ_AND_ADD")
++    public static final class AtomicReadAndAddOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<AtomicReadAndAddOp> TYPE = LIRInstructionClass.create(AtomicReadAndAddOp.class);
++
++        private final SW64Kind accessKind;
++
++        @Def({REG}) protected AllocatableValue resultValue;
++        @Alive({REG}) protected AllocatableValue addressValue;
++        @Alive({REG, CONST}) protected Value deltaValue;
++
++        public AtomicReadAndAddOp(SW64Kind kind, AllocatableValue result, AllocatableValue address, Value delta) {
++            super(TYPE);
++            this.accessKind = kind;
++            this.resultValue = result;
++            this.addressValue = address;
++            this.deltaValue = delta;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            assert accessKind.isInteger();
++            final int size = accessKind.getSizeInBytes() * Byte.SIZE;
++
++            Register address = asRegister(addressValue);
++            Register result = asRegister(resultValue);
++
++            Label retry = new Label();
++            masm.bind(retry);
++            masm.ldaxr(size, result, address);
++            try (ScratchRegister scratchRegister1 = masm.getScratchRegister()) {
++                Register scratch1 = scratchRegister1.getRegister();
++                if (LIRValueUtil.isConstantValue(deltaValue)) {
++                    long delta = LIRValueUtil.asConstantValue(deltaValue).getJavaConstant().asLong();
++                    masm.add(size, scratch1, result, delta);
++                } else { // must be a register then
++                    masm.add(size, scratch1, result, asRegister(deltaValue));
++                }
++                try (ScratchRegister scratchRegister2 = masm.getScratchRegister()) {
++                    Register scratch2 = scratchRegister2.getRegister();
++                    masm.stlxr(size, scratch2, scratch1, address);
++                    // if scratch2 == 0 then write successful, else retry
++                    masm.cbnz(32, scratch2, retry);
++                }
++            }
++        }
++    }
++
++    /**
++     * Load (Read) and Add instruction. Does the following atomically: <code>
++     *  ATOMIC_READ_AND_ADD(addend, result, address):
++     *    result = *address
++     *    *address = result + addend
++     *    return result
++     * </code>
++     *
++     * The LSE version has different properties with regards to the register allocator. To define
++     * these differences, we have to create a separate LIR instruction class.
++     *
++     * The difference to {@linkplain AtomicReadAndAddOp} is:
++     * <li>{@linkplain #deltaValue} must be a register (@Use({REG}) instead @Alive({REG,CONST}))
++     * <li>{@linkplain #resultValue} may be an alias for the input registers (@Use instead
++     * of @Alive)
++     */
++    @Opcode("ATOMIC_READ_AND_ADD")
++    public static final class AtomicReadAndAddLSEOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<AtomicReadAndAddLSEOp> TYPE = LIRInstructionClass.create(AtomicReadAndAddLSEOp.class);
++
++        private final SW64Kind accessKind;
++
++        @Def({REG}) protected AllocatableValue resultValue;
++        @Use({REG}) protected AllocatableValue addressValue;
++        @Use({REG}) protected AllocatableValue deltaValue;
++
++        public AtomicReadAndAddLSEOp(SW64Kind kind, AllocatableValue result, AllocatableValue address, AllocatableValue delta) {
++            super(TYPE);
++            this.accessKind = kind;
++            this.resultValue = result;
++            this.addressValue = address;
++            this.deltaValue = delta;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            assert accessKind.isInteger();
++            final int size = accessKind.getSizeInBytes() * Byte.SIZE;
++
++            Register address = asRegister(addressValue);
++            Register delta = asRegister(deltaValue);
++            Register result = asRegister(resultValue);
++            masm.ldadd(size, delta, result, address, true, true);
++        }
++    }
++
++    /**
++     * Load (Read) and Write instruction. Does the following atomically: <code>
++     *  ATOMIC_READ_AND_WRITE(newValue, result, address):
++     *    result = *address
++     *    *address = newValue
++     *    return result
++     * </code>
++     */
++    @Opcode("ATOMIC_READ_AND_WRITE")
++    public static final class AtomicReadAndWriteOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<AtomicReadAndWriteOp> TYPE = LIRInstructionClass.create(AtomicReadAndWriteOp.class);
++
++        private final SW64Kind accessKind;
++
++        @Def protected AllocatableValue resultValue;
++        @Alive protected AllocatableValue addressValue;
++        @Alive protected AllocatableValue newValue;
++        @Temp protected AllocatableValue scratchValue;
++
++        public AtomicReadAndWriteOp(SW64Kind kind, AllocatableValue result, AllocatableValue address, AllocatableValue newValue, AllocatableValue scratch) {
++            super(TYPE);
++            this.accessKind = kind;
++            this.resultValue = result;
++            this.addressValue = address;
++            this.newValue = newValue;
++            this.scratchValue = scratch;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            assert accessKind.isInteger();
++            final int size = accessKind.getSizeInBytes() * Byte.SIZE;
++
++            Register address = asRegister(addressValue);
++            Register value = asRegister(newValue);
++            Register result = asRegister(resultValue);
++
++            if (SW64LIRFlagsVersioned.useLSE(masm.target.arch)) {
++                masm.swp(size, value, result, address, true, true);
++            } else {
++                Register scratch = asRegister(scratchValue);
++                Label retry = new Label();
++                masm.bind(retry);
++                masm.ldaxr(size, result, address);
++                masm.stlxr(size, scratch, value, address);
++                // if scratch == 0 then write successful, else retry
++                masm.cbnz(32, scratch, retry);
++            }
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BitManipulationOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BitManipulationOp.java
+new file mode 100644
+index 0000000000..9836b2b4c7
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BitManipulationOp.java
+@@ -0,0 +1,91 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++
++/**
++ * Bit manipulation ops for ARMv8 ISA.
++ */
++public class SW64BitManipulationOp extends SW64LIRInstruction {
++    public enum BitManipulationOpCode {
++        CTZ,
++        BSR,
++        BSWP,
++        CLZ,
++    }
++
++    private static final LIRInstructionClass<SW64BitManipulationOp> TYPE = LIRInstructionClass.create(SW64BitManipulationOp.class);
++
++    @Opcode private final BitManipulationOpCode opcode;
++    @Def protected AllocatableValue result;
++    @Use({REG}) protected AllocatableValue input;
++
++    public SW64BitManipulationOp(BitManipulationOpCode opcode, AllocatableValue result, AllocatableValue input) {
++        super(TYPE);
++        this.opcode = opcode;
++        this.result = result;
++        this.input = input;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        Register dst = asRegister(result);
++        Register src = asRegister(input);
++        final int size = input.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++        switch (opcode) {
++            case CLZ:
++                masm.clz(size, dst, src);
++                break;
++            case BSR:
++                // BSR == <type width> - 1 - CLZ(input)
++                masm.clz(size, dst, src);
++                masm.neg(size, dst, dst);
++                masm.add(size, dst, dst, size - 1);
++                break;
++            case CTZ:
++                // CTZ == CLZ(rbit(input))
++                masm.rbit(size, dst, src);
++                masm.clz(size, dst, dst);
++                break;
++            case BSWP:
++                masm.rev(size, dst, src);
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BlockEndOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BlockEndOp.java
+new file mode 100644
+index 0000000000..91fc8975aa
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BlockEndOp.java
+@@ -0,0 +1,48 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstruction;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.StandardOp.BlockEndOp;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++public abstract class SW64BlockEndOp extends LIRInstruction implements BlockEndOp {
++
++    public static final LIRInstructionClass<SW64BlockEndOp> TYPE = LIRInstructionClass.create(SW64BlockEndOp.class);
++
++    protected SW64BlockEndOp(LIRInstructionClass<? extends SW64BlockEndOp> c) {
++        super(c);
++    }
++
++    @Override
++    public final void emitCode(CompilationResultBuilder crb) {
++        emitCode(crb, (SW64MacroAssembler) crb.asm);
++    }
++
++    protected abstract void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm);
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BreakpointOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BreakpointOp.java
+new file mode 100644
+index 0000000000..f9a5f6560f
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64BreakpointOp.java
+@@ -0,0 +1,56 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.SW64ExceptionCode;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.meta.Value;
++
++@Opcode("BREAKPOINT")
++public class SW64BreakpointOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64BreakpointOp> TYPE = LIRInstructionClass.create(SW64BreakpointOp.class);
++
++    /**
++     * A set of values loaded into the Java ABI parameter locations (for inspection by a debugger).
++     */
++    @Use({REG, STACK}) private Value[] parameters;
++
++    public SW64BreakpointOp(Value[] parameters) {
++        super(TYPE);
++        this.parameters = parameters;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        masm.brk(SW64ExceptionCode.BREAKPOINT);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ByteSwapOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ByteSwapOp.java
+new file mode 100644
+index 0000000000..db6519d876
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ByteSwapOp.java
+@@ -0,0 +1,63 @@
++/*
++ * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.ValueUtil;
++import jdk.vm.ci.meta.Value;
++
++@Opcode("BSWAP")
++public final class SW64ByteSwapOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64ByteSwapOp> TYPE = LIRInstructionClass.create(SW64ByteSwapOp.class);
++
++    @Def({OperandFlag.REG, OperandFlag.HINT}) protected Value result;
++    @Use protected Value input;
++
++    public SW64ByteSwapOp(Value result, Value input) {
++        super(TYPE);
++        this.result = result;
++        this.input = input;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        switch ((SW64Kind) input.getPlatformKind()) {
++            case DWORD:
++                masm.rev(32, ValueUtil.asRegister(result), ValueUtil.asRegister(input));
++                break;
++            case QWORD:
++                masm.rev(64, ValueUtil.asRegister(result), ValueUtil.asRegister(input));
++                break;
++            default:
++                throw GraalError.shouldNotReachHere();
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64CCall.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64CCall.java
+new file mode 100644
+index 0000000000..4864d9be0c
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64CCall.java
+@@ -0,0 +1,69 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.ValueUtil;
++import jdk.vm.ci.meta.Value;
++
++public final class SW64CCall extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64CCall> TYPE = LIRInstructionClass.create(SW64CCall.class);
++
++    @Def({REG, ILLEGAL}) protected Value result;
++    @Use({REG, STACK}) protected Value[] parameters;
++    @Use({REG}) protected Value functionPtr;
++
++    public SW64CCall(Value result, Value functionPtr, Value[] parameters) {
++        super(TYPE);
++        this.result = result;
++        this.functionPtr = functionPtr;
++        this.parameters = parameters;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        directCall(masm);
++    }
++
++    private void directCall(SW64MacroAssembler masm) {
++        Register reg = ValueUtil.asRegister(functionPtr);
++        masm.blr(reg);
++        masm.ensureUniquePC();
++    }
++
++    @Override
++    public boolean destroysCallerSavedRegisters() {
++        return true;
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Call.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Call.java
+new file mode 100644
+index 0000000000..ff8d5fbb56
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Call.java
+@@ -0,0 +1,269 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.core.common.GraalOptions.GeneratePIC;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.ILLEGAL;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
++import static jdk.vm.ci.sw64.SW64.r8;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static jdk.vm.ci.code.ValueUtil.isRegister;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.spi.ForeignCallLinkage;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.InvokeTarget;
++import jdk.vm.ci.meta.ResolvedJavaMethod;
++import jdk.vm.ci.meta.Value;
++
++public class SW64Call {
++
++    public abstract static class CallOp extends SW64LIRInstruction {
++        @Def({REG, ILLEGAL}) protected Value result;
++        @Use({REG, STACK}) protected Value[] parameters;
++        @Temp({REG, STACK}) protected Value[] temps;
++        @State protected LIRFrameState state;
++
++        protected CallOp(LIRInstructionClass<? extends CallOp> c, Value result, Value[] parameters, Value[] temps, LIRFrameState state) {
++            super(c);
++            this.result = result;
++            this.parameters = parameters;
++            this.state = state;
++            this.temps = addStackSlotsToTemporaries(parameters, temps);
++            assert temps != null;
++        }
++
++        @Override
++        public boolean destroysCallerSavedRegisters() {
++            return true;
++        }
++    }
++
++    public abstract static class MethodCallOp extends CallOp {
++        protected final ResolvedJavaMethod callTarget;
++
++        protected MethodCallOp(LIRInstructionClass<? extends MethodCallOp> c, ResolvedJavaMethod callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState state) {
++            super(c, result, parameters, temps, state);
++            this.callTarget = callTarget;
++        }
++    }
++
++    @Opcode("CALL_INDIRECT")
++    public static class IndirectCallOp extends MethodCallOp {
++        public static final LIRInstructionClass<IndirectCallOp> TYPE = LIRInstructionClass.create(IndirectCallOp.class);
++
++        @Use({REG}) protected Value targetAddress;
++
++        public IndirectCallOp(ResolvedJavaMethod callTarget, Value result, Value[] parameters, Value[] temps, Value targetAddress, LIRFrameState state) {
++            this(TYPE, callTarget, result, parameters, temps, targetAddress, state);
++        }
++
++        protected IndirectCallOp(LIRInstructionClass<? extends IndirectCallOp> c, ResolvedJavaMethod callTarget, Value result, Value[] parameters, Value[] temps, Value targetAddress,
++                        LIRFrameState state) {
++            super(c, callTarget, result, parameters, temps, state);
++            this.targetAddress = targetAddress;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register target = asRegister(targetAddress);
++            indirectCall(crb, masm, target, callTarget, state);
++        }
++
++        @Override
++        public void verify() {
++            super.verify();
++            assert isRegister(targetAddress) : "The current register allocator cannot handle variables to be used at call sites, " + "it must be in a fixed register for now";
++        }
++    }
++
++    @Opcode("CALL_DIRECT")
++    public abstract static class DirectCallOp extends MethodCallOp {
++        public static final LIRInstructionClass<DirectCallOp> TYPE = LIRInstructionClass.create(DirectCallOp.class);
++
++        public DirectCallOp(ResolvedJavaMethod target, Value result, Value[] parameters, Value[] temps, LIRFrameState state) {
++            super(TYPE, target, result, parameters, temps, state);
++        }
++
++        protected DirectCallOp(LIRInstructionClass<? extends DirectCallOp> c, ResolvedJavaMethod callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState state) {
++            super(c, callTarget, result, parameters, temps, state);
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            directCall(crb, masm, callTarget, null, state);
++        }
++    }
++
++    public abstract static class ForeignCallOp extends CallOp {
++        protected final ForeignCallLinkage callTarget;
++        protected final Label label;
++
++        protected ForeignCallOp(LIRInstructionClass<? extends ForeignCallOp> c, ForeignCallLinkage callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState state, Label label) {
++            super(c, result, parameters, temps, state);
++            this.callTarget = callTarget;
++            this.label = label;
++        }
++
++        @Override
++        public boolean destroysCallerSavedRegisters() {
++            return callTarget.destroysRegisters();
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            emitCall(crb, masm);
++        }
++
++        protected abstract void emitCall(CompilationResultBuilder crb, SW64MacroAssembler masm);
++    }
++
++    @Opcode("NEAR_FOREIGN_CALL")
++    public static class DirectNearForeignCallOp extends ForeignCallOp {
++        public static final LIRInstructionClass<DirectNearForeignCallOp> TYPE = LIRInstructionClass.create(DirectNearForeignCallOp.class);
++
++        public DirectNearForeignCallOp(ForeignCallLinkage callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState state, Label label) {
++            super(TYPE, callTarget, result, parameters, temps, state, label);
++        }
++
++        @Override
++        protected void emitCall(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            directCall(crb, masm, callTarget, null, state, label);
++        }
++    }
++
++    @Opcode("FAR_FOREIGN_CALL")
++    public static class DirectFarForeignCallOp extends ForeignCallOp {
++        public static final LIRInstructionClass<DirectFarForeignCallOp> TYPE = LIRInstructionClass.create(DirectFarForeignCallOp.class);
++
++        public DirectFarForeignCallOp(ForeignCallLinkage callTarget, Value result, Value[] parameters, Value[] temps, LIRFrameState state, Label label) {
++            super(TYPE, callTarget, result, parameters, temps, state, label);
++        }
++
++        @Override
++        protected void emitCall(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            // We can use any scratch register we want, since we know that they have been saved
++            // before calling.
++            directCall(crb, masm, callTarget, r8, state, label);
++        }
++    }
++
++    /**
++     * Tests whether linkage can be called directly under all circumstances without the need for a
++     * scratch register.
++     *
++     * Note this is a pessimistic assumption: This may return false despite a near call/jump being
++     * adequate.
++     *
++     * @param linkage Foreign call description
++     * @return true if foreign call can be called directly and does not need a scratch register to
++     *         load the address into.
++     */
++    public static boolean isNearCall(ForeignCallLinkage linkage) {
++        long maxOffset = linkage.getMaxCallTargetOffset();
++        return maxOffset != -1 && SW64MacroAssembler.isBranchImmediateOffset(maxOffset);
++    }
++
++    public static void directCall(CompilationResultBuilder crb, SW64MacroAssembler masm, InvokeTarget callTarget, Register scratch, LIRFrameState info) {
++        directCall(crb, masm, callTarget, scratch, info, null);
++    }
++
++    public static void directCall(CompilationResultBuilder crb, SW64MacroAssembler masm, InvokeTarget callTarget, Register scratch, LIRFrameState info, Label label) {
++        int before = masm.position();
++        if (scratch != null) {
++            if (GeneratePIC.getValue(crb.getOptions())) {
++                masm.bl(0);
++            } else {
++                /*
++                 * Offset might not fit into a 28-bit immediate, generate an indirect call with a
++                 * 64-bit immediate address which is fixed up by HotSpot.
++                 */
++                masm.movNativeAddress(scratch, 0L);
++                masm.blr(scratch);
++            }
++        } else {
++            // Address is fixed up by HotSpot.
++            masm.bl(0);
++        }
++        if (label != null) {
++            // We need this label to be the return address.
++            masm.bind(label);
++        }
++        int after = masm.position();
++        crb.recordDirectCall(before, after, callTarget, info);
++        crb.recordExceptionHandlers(after, info);
++        masm.ensureUniquePC();
++    }
++
++    public static void indirectCall(CompilationResultBuilder crb, SW64MacroAssembler masm, Register dst, InvokeTarget callTarget, LIRFrameState info) {
++        int before = masm.position();
++        masm.blr(dst);
++        int after = masm.position();
++        crb.recordIndirectCall(before, after, callTarget, info);
++        crb.recordExceptionHandlers(after, info);
++        masm.ensureUniquePC();
++    }
++
++    public static void directJmp(CompilationResultBuilder crb, SW64MacroAssembler masm, InvokeTarget callTarget) {
++        try (SW64MacroAssembler.ScratchRegister scratch = masm.getScratchRegister()) {
++            int before = masm.position();
++            if (GeneratePIC.getValue(crb.getOptions())) {
++                masm.jmp();
++            } else {
++                masm.movNativeAddress(scratch.getRegister(), 0L);
++                masm.jmp(scratch.getRegister());
++            }
++            int after = masm.position();
++            crb.recordDirectCall(before, after, callTarget, null);
++            masm.ensureUniquePC();
++        }
++    }
++
++    public static void indirectJmp(CompilationResultBuilder crb, SW64MacroAssembler masm, Register dst, InvokeTarget target) {
++        int before = masm.position();
++        masm.jmp(dst);
++        int after = masm.position();
++        crb.recordIndirectCall(before, after, target, null);
++        masm.ensureUniquePC();
++    }
++
++    public static void directConditionalJmp(CompilationResultBuilder crb, SW64MacroAssembler masm, InvokeTarget target, SW64Assembler.ConditionFlag cond) {
++        int before = masm.position();
++        masm.branchConditionally(cond);
++        int after = masm.position();
++        crb.recordDirectCall(before, after, target, null);
++        masm.ensureUniquePC();
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Compare.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Compare.java
+new file mode 100644
+index 0000000000..b96207415c
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Compare.java
+@@ -0,0 +1,176 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.CONST;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static org.graalvm.compiler.lir.LIRValueUtil.asJavaConstant;
++import static org.graalvm.compiler.lir.LIRValueUtil.isJavaConstant;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static jdk.vm.ci.code.ValueUtil.isRegister;
++
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.core.common.calc.Condition;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.Value;
++
++public class SW64Compare {
++
++    public static class CompareOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<CompareOp> TYPE = LIRInstructionClass.create(CompareOp.class);
++
++        @Use protected Value x;
++        @Use({REG, CONST}) protected Value y;
++
++        public CompareOp(Value x, Value y) {
++            super(TYPE);
++            assert ((SW64Kind) x.getPlatformKind()).isInteger() && ((SW64Kind) y.getPlatformKind()).isInteger();
++            assert x.getPlatformKind() == y.getPlatformKind();
++            this.x = x;
++            this.y = y;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            gpCompare(masm, x, y);
++        }
++    }
++
++    /**
++     * Compares integer values x and y.
++     *
++     * @param x integer value to compare. May not be null.
++     * @param y integer value to compare. May not be null.
++     */
++    public static void gpCompare(SW64MacroAssembler masm, Value x, Value y) {
++        final int size = x.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++        if (isRegister(y)) {
++            masm.cmp(size, asRegister(x), asRegister(y));
++        } else {
++            JavaConstant constant = asJavaConstant(y);
++            if (constant.isDefaultForKind()) {
++                masm.cmp(size, asRegister(x), 0);
++            } else {
++                final long longValue = constant.asLong();
++                assert NumUtil.isInt(longValue);
++                int maskedValue;
++                switch (constant.getJavaKind()) {
++                    case Boolean:
++                    case Byte:
++                        maskedValue = (int) (longValue & 0xFF);
++                        break;
++                    case Char:
++                    case Short:
++                        maskedValue = (int) (longValue & 0xFFFF);
++                        break;
++                    case Int:
++                    case Long:
++                        maskedValue = (int) longValue;
++                        break;
++                    default:
++                        throw GraalError.shouldNotReachHere();
++                }
++                masm.cmp(size, asRegister(x), maskedValue);
++            }
++        }
++    }
++
++    public static class FloatCompareOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<FloatCompareOp> TYPE = LIRInstructionClass.create(FloatCompareOp.class);
++
++        @Use protected Value x;
++        @Use({REG, CONST}) protected Value y;
++        private final Condition condition;
++        private final boolean unorderedIsTrue;
++
++        public FloatCompareOp(Value x, Value y, Condition condition, boolean unorderedIsTrue) {
++            super(TYPE);
++            assert !isJavaConstant(y) || isFloatCmpConstant(y, condition, unorderedIsTrue);
++            this.x = x;
++            this.y = y;
++            this.condition = condition;
++            this.unorderedIsTrue = unorderedIsTrue;
++        }
++
++        /**
++         * Checks if val can be used as a constant for the gpCompare operation or not.
++         */
++        public static boolean isFloatCmpConstant(Value val, Condition condition, boolean unorderedIsTrue) {
++            // If the condition is "EQ || unordered" or "NE && unordered" we have to use 2 registers
++            // in any case.
++            if (!(condition == Condition.EQ && unorderedIsTrue || condition == Condition.NE && !unorderedIsTrue)) {
++                return false;
++            }
++            return isJavaConstant(val) && asJavaConstant(val).isDefaultForKind();
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            assert isRegister(x);
++            int size = x.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            if (isRegister(y)) {
++                masm.fcmp(size, asRegister(x), asRegister(y));
++                // There is no condition code for "EQ || unordered" nor one for "NE && unordered",
++                // so we have to fix them up ourselves.
++                // In both cases we combine the asked for condition into the EQ, respectively NE
++                // condition, i.e.
++                // if EQ && unoreredIsTrue, then the EQ flag will be set if the two values gpCompare
++                // unequal but are
++                // unordered.
++                if (condition == Condition.EQ && unorderedIsTrue) {
++                    // if f1 ordered f2:
++                    // result = f1 == f2
++                    // else:
++                    // result = EQUAL
++                    int nzcv = 0b0100;   // EQUAL -> Z = 1
++                    masm.fccmp(size, asRegister(x), asRegister(y), nzcv, SW64Assembler.ConditionFlag.VC);
++                } else if (condition == Condition.NE && !unorderedIsTrue) {
++                    // if f1 ordered f2:
++                    // result = f1 != f2
++                    // else:
++                    // result = !NE == EQUAL
++                    int nzcv = 0b0100;   // EQUAL -> Z = 1
++                    masm.fccmp(size, asRegister(x), asRegister(y), nzcv, SW64Assembler.ConditionFlag.VC);
++                }
++            } else {
++                // cmp against +0.0
++                masm.fcmpZero(size, asRegister(x));
++            }
++        }
++
++        @Override
++        public void verify() {
++            assert x.getPlatformKind().equals(y.getPlatformKind()) : "a: " + x + " b: " + y;
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ControlFlow.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ControlFlow.java
+new file mode 100644
+index 0000000000..156672a3e2
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ControlFlow.java
+@@ -0,0 +1,300 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asAllocatableValue;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.HINT;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++
++import java.util.function.Function;
++
++import org.graalvm.compiler.asm.Label;
++import org.graalvm.compiler.core.common.NumUtil;
++import org.graalvm.compiler.asm.sw64.SW64Assembler;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ConditionFlag;
++import org.graalvm.compiler.asm.sw64.SW64Assembler.ExtendType;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.code.CompilationResult.JumpTable;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.calc.Condition;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.ConstantValue;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.LabelRef;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.StandardOp;
++import org.graalvm.compiler.lir.SwitchStrategy;
++import org.graalvm.compiler.lir.SwitchStrategy.BaseSwitchClosure;
++import org.graalvm.compiler.lir.Variable;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.Constant;
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.Value;
++
++public class SW64ControlFlow {
++
++    /**
++     * Compares integer register to 0 and branches if condition is true. Condition may only be equal
++     * or non-equal.
++     */
++    // TODO (das) where do we need this?
++    // public static class CompareAndBranchOp extends SW64LIRInstruction implements
++    // StandardOp.BranchOp {
++    // private final ConditionFlag condition;
++    // private final LabelRef destination;
++    // @Use({REG}) private Value x;
++    //
++    // public CompareAndBranchOp(Condition condition, LabelRef destination, Value x) {
++    // assert condition == Condition.EQ || condition == Condition.NE;
++    // assert ARMv8.isGpKind(x.getKind());
++    // this.condition = condition == Condition.EQ ? ConditionFlag.EQ : ConditionFlag.NE;
++    // this.destination = destination;
++    // this.x = x;
++    // }
++    //
++    // @Override
++    // public void emitCode(CompilationResultBuilder crb, ARMv8MacroAssembler masm) {
++    // int size = ARMv8.bitsize(x.getKind());
++    // if (condition == ConditionFlag.EQ) {
++    // masm.cbz(size, asRegister(x), destination.label());
++    // } else {
++    // masm.cbnz(size, asRegister(x), destination.label());
++    // }
++    // }
++    // }
++
++    public static class BranchOp extends SW64BlockEndOp implements StandardOp.BranchOp {
++        public static final LIRInstructionClass<BranchOp> TYPE = LIRInstructionClass.create(BranchOp.class);
++
++        private final SW64Assembler.ConditionFlag condition;
++        private final LabelRef trueDestination;
++        private final LabelRef falseDestination;
++
++        private final double trueDestinationProbability;
++
++        public BranchOp(SW64Assembler.ConditionFlag condition, LabelRef trueDestination, LabelRef falseDestination, double trueDestinationProbability) {
++            super(TYPE);
++            this.condition = condition;
++            this.trueDestination = trueDestination;
++            this.falseDestination = falseDestination;
++            this.trueDestinationProbability = trueDestinationProbability;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            /*
++             * Explanation: Depending on what the successor edge is, we can use the fall-through to
++             * optimize the generated code. If neither is a successor edge, use the branch
++             * probability to try to take the conditional jump as often as possible to avoid
++             * executing two instructions instead of one.
++             */
++            if (crb.isSuccessorEdge(trueDestination)) {
++                masm.branchConditionally(condition.negate(), falseDestination.label());
++            } else if (crb.isSuccessorEdge(falseDestination)) {
++                masm.branchConditionally(condition, trueDestination.label());
++            } else if (trueDestinationProbability < 0.5) {
++                masm.branchConditionally(condition.negate(), falseDestination.label());
++                masm.jmp(trueDestination.label());
++            } else {
++                masm.branchConditionally(condition, trueDestination.label());
++                masm.jmp(falseDestination.label());
++            }
++        }
++
++    }
++
++    @Opcode("CMOVE")
++    public static class CondMoveOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<CondMoveOp> TYPE = LIRInstructionClass.create(CondMoveOp.class);
++
++        @Def protected Value result;
++        @Use protected Value trueValue;
++        @Use protected Value falseValue;
++        private final SW64Assembler.ConditionFlag condition;
++
++        public CondMoveOp(Variable result, SW64Assembler.ConditionFlag condition, Value trueValue, Value falseValue) {
++            super(TYPE);
++            assert trueValue.getPlatformKind() == falseValue.getPlatformKind() && trueValue.getPlatformKind() == result.getPlatformKind();
++            this.result = result;
++            this.condition = condition;
++            this.trueValue = trueValue;
++            this.falseValue = falseValue;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            SW64Kind kind = (SW64Kind) trueValue.getPlatformKind();
++            int size = kind.getSizeInBytes() * Byte.SIZE;
++            if (kind.isInteger()) {
++                masm.cmov(size, asRegister(result), asRegister(trueValue), asRegister(falseValue), condition);
++            } else {
++                masm.fcmov(size, asRegister(result), asRegister(trueValue), asRegister(falseValue), condition);
++            }
++        }
++    }
++
++    public static class StrategySwitchOp extends SW64BlockEndOp implements StandardOp.BlockEndOp {
++        public static final LIRInstructionClass<StrategySwitchOp> TYPE = LIRInstructionClass.create(StrategySwitchOp.class);
++
++        private final Constant[] keyConstants;
++        protected final SwitchStrategy strategy;
++        private final Function<Condition, ConditionFlag> converter;
++        private final LabelRef[] keyTargets;
++        private final LabelRef defaultTarget;
++        @Alive protected Value key;
++        // TODO (das) This could be optimized: We only need the scratch register in case of a
++        // datapatch, or too large immediates.
++        @Temp protected Value scratch;
++
++        public StrategySwitchOp(SwitchStrategy strategy, LabelRef[] keyTargets, LabelRef defaultTarget, Value key, Value scratch,
++                        Function<Condition, ConditionFlag> converter) {
++            this(TYPE, strategy, keyTargets, defaultTarget, key, scratch, converter);
++        }
++
++        protected StrategySwitchOp(LIRInstructionClass<? extends StrategySwitchOp> c, SwitchStrategy strategy, LabelRef[] keyTargets, LabelRef defaultTarget, Value key, Value scratch,
++                        Function<Condition, ConditionFlag> converter) {
++            super(c);
++            this.strategy = strategy;
++            this.converter = converter;
++            this.keyConstants = strategy.getKeyConstants();
++            this.keyTargets = keyTargets;
++            this.defaultTarget = defaultTarget;
++            this.key = key;
++            this.scratch = scratch;
++            assert keyConstants.length == keyTargets.length;
++            assert keyConstants.length == strategy.keyProbabilities.length;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            strategy.run(new SwitchClosure(asRegister(key), crb, masm));
++        }
++
++        public class SwitchClosure extends BaseSwitchClosure {
++
++            protected final Register keyRegister;
++            protected final CompilationResultBuilder crb;
++            protected final SW64MacroAssembler masm;
++
++            protected SwitchClosure(Register keyRegister, CompilationResultBuilder crb, SW64MacroAssembler masm) {
++                super(crb, masm, keyTargets, defaultTarget);
++                this.keyRegister = keyRegister;
++                this.crb = crb;
++                this.masm = masm;
++            }
++
++            protected void emitComparison(Constant c) {
++                JavaConstant jc = (JavaConstant) c;
++                ConstantValue constVal = new ConstantValue(LIRKind.value(key.getPlatformKind()), c);
++                switch (jc.getJavaKind()) {
++                    case Int:
++                        long lc = jc.asLong();
++                        assert NumUtil.isInt(lc);
++                        emitCompare(crb, masm, key, scratch, constVal);
++                        break;
++                    case Long:
++                        emitCompare(crb, masm, key, scratch, constVal);
++                        break;
++                    case Object:
++                        emitCompare(crb, masm, key, scratch, constVal);
++                        break;
++                    default:
++                        throw new GraalError("switch only supported for int, long and object");
++                }
++            }
++
++            @Override
++            protected void conditionalJump(int index, Condition condition, Label target) {
++                emitComparison(keyConstants[index]);
++                masm.branchConditionally(converter.apply(condition), target);
++            }
++        }
++    }
++
++    public static final class TableSwitchOp extends SW64BlockEndOp {
++        public static final LIRInstructionClass<TableSwitchOp> TYPE = LIRInstructionClass.create(TableSwitchOp.class);
++        private final int lowKey;
++        private final LabelRef defaultTarget;
++        private final LabelRef[] targets;
++        @Use protected Value index;
++        @Temp({REG, HINT}) protected Value idxScratch;
++        @Temp protected Value scratch;
++
++        public TableSwitchOp(final int lowKey, final LabelRef defaultTarget, final LabelRef[] targets, Value index, Variable scratch, Variable idxScratch) {
++            super(TYPE);
++            this.lowKey = lowKey;
++            this.defaultTarget = defaultTarget;
++            this.targets = targets;
++            this.index = index;
++            this.scratch = scratch;
++            this.idxScratch = idxScratch;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register indexReg = asRegister(index, SW64Kind.DWORD);
++            Register idxScratchReg = asRegister(idxScratch, SW64Kind.DWORD);
++            Register scratchReg = asRegister(scratch, SW64Kind.QWORD);
++
++            // Compare index against jump table bounds
++            int highKey = lowKey + targets.length - 1;
++            masm.sub(32, idxScratchReg, indexReg, lowKey);
++            masm.cmp(32, idxScratchReg, highKey - lowKey);
++
++            // Jump to default target if index is not within the jump table
++            if (defaultTarget != null) {
++                masm.branchConditionally(ConditionFlag.HI, defaultTarget.label());
++            }
++
++            Label jumpTable = new Label();
++            masm.adr(scratchReg, jumpTable);
++            masm.add(64, scratchReg, scratchReg, idxScratchReg, ExtendType.UXTW, 2);
++            masm.jmp(scratchReg);
++            masm.bind(jumpTable);
++            // emit jump table entries
++            for (LabelRef target : targets) {
++                masm.jmp(target.label());
++            }
++            JumpTable jt = new JumpTable(jumpTable.position(), lowKey, highKey - 1, 4);
++            crb.compilationResult.addAnnotation(jt);
++        }
++    }
++
++    private static void emitCompare(CompilationResultBuilder crb, SW64MacroAssembler masm, Value key, Value scratchValue, ConstantValue c) {
++        long imm = c.getJavaConstant().asLong();
++        final int size = key.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++        if (SW64MacroAssembler.isComparisonImmediate(imm)) {
++            masm.cmp(size, asRegister(key), (int) imm);
++        } else {
++            SW64Move.move(crb, masm, asAllocatableValue(scratchValue), c);
++            masm.cmp(size, asRegister(key), asRegister(scratchValue));
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64FrameMap.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64FrameMap.java
+new file mode 100644
+index 0000000000..44f6fc3da5
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64FrameMap.java
+@@ -0,0 +1,107 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.lir.framemap.FrameMap;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.CodeCacheProvider;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++
++/**
++ * SW64 specific frame map.
++ * <p/>
++ * This is the format of an SW64 stack frame:
++ * <p/>
++ *
++ * <pre>
++ *   Base       Contents
++ *
++ *            :                                :  -----
++ *   caller   | incoming overflow argument n   |    ^
++ *   frame    :     ...                        :    | positive
++ *            | incoming overflow argument 0   |    | offsets
++ *   ---------+--------------------------------+-------------------------
++ *            | return address                 |    |            ^
++ *            | prev. frame pointer            |    |            |
++ *            +--------------------------------+    |            |
++ *            | spill slot 0                   |    | negative   |      ^
++ *    callee  :     ...                        :    v offsets    |      |
++ *    frame   | spill slot n                   |  -----        total  frame
++ *            +--------------------------------+               frame  size
++ *            | alignment padding              |               size     |
++ *            +--------------------------------+  -----          |      |
++ *            | outgoing overflow argument n   |    ^            |      |
++ *            :     ...                        :    | positive   |      |
++ *            | outgoing overflow argument 0   |    | offsets    v      v
++ *    %sp-->  +--------------------------------+---------------------------
++ *
++ * </pre>
++ *
++ * The spill slot area also includes stack allocated memory blocks (ALLOCA blocks). The size of such
++ * a block may be greater than the size of a normal spill slot or the word size.
++ * <p/>
++ * A runtime can reserve space at the beginning of the overflow argument area. The calling
++ * convention can specify that the first overflow stack argument is not at offset 0, but at a
++ * specified offset. Use {@link CodeCacheProvider#getMinimumOutgoingSize()} to make sure that
++ * call-free methods also have this space reserved. Then the VM can use the memory at offset 0
++ * relative to the stack pointer.
++ * <p/>
++ */
++public class SW64FrameMap extends FrameMap {
++    // Note: Spill size includes callee save area
++
++    /**
++     * Creates a new frame map for the specified method.
++     */
++    public SW64FrameMap(CodeCacheProvider codeCache, RegisterConfig registerConfig, ReferenceMapBuilderFactory referenceMapFactory) {
++        super(codeCache, registerConfig, referenceMapFactory);
++        initialSpillSize = frameSetupSize();
++        spillSize = initialSpillSize;
++    }
++
++    @Override
++    public int totalFrameSize() {
++        // frameSize + return address + frame pointer
++        return frameSize() + frameSetupSize();
++    }
++
++    private int frameSetupSize() {
++        // Size of return address and frame pointer that are saved in function prologue
++        return getTarget().arch.getWordSize() * 2;
++    }
++
++    @Override
++    public int currentFrameSize() {
++        return alignFrameSize(outgoingSize + spillSize);
++    }
++
++    public StackSlot allocateDeoptimizationRescueSlot() {
++        assert spillSize == initialSpillSize : "Deoptimization rescue slot must be the first stack slot";
++        return allocateSpillSlot(LIRKind.value(SW64Kind.QWORD));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64FrameMapBuilder.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64FrameMapBuilder.java
+new file mode 100644
+index 0000000000..17f2af464b
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64FrameMapBuilder.java
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import org.graalvm.compiler.lir.framemap.FrameMap;
++import org.graalvm.compiler.lir.framemap.FrameMapBuilderImpl;
++
++import jdk.vm.ci.code.CodeCacheProvider;
++import jdk.vm.ci.code.RegisterConfig;
++import jdk.vm.ci.code.StackSlot;
++
++public class SW64FrameMapBuilder extends FrameMapBuilderImpl {
++
++    public SW64FrameMapBuilder(FrameMap frameMap, CodeCacheProvider codeCache, RegisterConfig registerConfig) {
++        super(frameMap, codeCache, registerConfig);
++    }
++
++    public StackSlot allocateDeoptimizationRescueSlot() {
++        return ((SW64FrameMap) getFrameMap()).allocateDeoptimizationRescueSlot();
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64LIRFlagsVersioned.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64LIRFlagsVersioned.java
+new file mode 100644
+index 0000000000..28c8ed7674
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64LIRFlagsVersioned.java
+@@ -0,0 +1,37 @@
++/*
++ * Copyright (c) 2018, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import jdk.vm.ci.sw64.SW64;
++import jdk.vm.ci.sw64.SW64.CPUFeature;
++import jdk.vm.ci.sw64.SW64.Flag;
++import jdk.vm.ci.code.Architecture;
++
++public class SW64LIRFlagsVersioned {
++    public static boolean useLSE(Architecture arch) {
++        SW64 sw64 = (SW64) arch;
++        return sw64.getFeatures().contains(CPUFeature.LSE) || sw64.getFlags().contains(Flag.UseLSE);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64LIRInstruction.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64LIRInstruction.java
+new file mode 100644
+index 0000000000..fa44227574
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64LIRInstruction.java
+@@ -0,0 +1,43 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstruction;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++public abstract class SW64LIRInstruction extends LIRInstruction {
++    protected SW64LIRInstruction(LIRInstructionClass<? extends SW64LIRInstruction> c) {
++        super(c);
++    }
++
++    @Override
++    public final void emitCode(CompilationResultBuilder crb) {
++        emitCode(crb, (SW64MacroAssembler) crb.asm);
++    }
++
++    protected abstract void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm);
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Move.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Move.java
+new file mode 100644
+index 0000000000..91dd91410f
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Move.java
+@@ -0,0 +1,557 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.COMPOSITE;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.HINT;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.UNINITIALIZED;
++import static org.graalvm.compiler.lir.LIRValueUtil.asJavaConstant;
++import static org.graalvm.compiler.lir.LIRValueUtil.isJavaConstant;
++import static jdk.vm.ci.sw64.SW64.sp;
++import static jdk.vm.ci.sw64.SW64.zr;
++import static jdk.vm.ci.code.ValueUtil.asAllocatableValue;
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static jdk.vm.ci.code.ValueUtil.asStackSlot;
++import static jdk.vm.ci.code.ValueUtil.isRegister;
++import static jdk.vm.ci.code.ValueUtil.isStackSlot;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler.ScratchRegister;
++import org.graalvm.compiler.core.common.LIRKind;
++import org.graalvm.compiler.core.common.type.DataPointerConstant;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.StandardOp;
++import org.graalvm.compiler.lir.StandardOp.LoadConstantOp;
++import org.graalvm.compiler.lir.StandardOp.NullCheck;
++import org.graalvm.compiler.lir.StandardOp.ValueMoveOp;
++import org.graalvm.compiler.lir.VirtualStackSlot;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Constant;
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.PlatformKind;
++import jdk.vm.ci.meta.Value;
++
++public class SW64Move {
++
++    public static class LoadInlineConstant extends SW64LIRInstruction implements LoadConstantOp {
++        public static final LIRInstructionClass<LoadInlineConstant> TYPE = LIRInstructionClass.create(LoadInlineConstant.class);
++
++        private JavaConstant constant;
++        @Def({REG, STACK}) AllocatableValue result;
++
++        public LoadInlineConstant(JavaConstant constant, AllocatableValue result) {
++            super(TYPE);
++            this.constant = constant;
++            this.result = result;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            if (isRegister(result)) {
++                const2reg(crb, masm, result, constant);
++            } else if (isStackSlot(result)) {
++                StackSlot slot = asStackSlot(result);
++                const2stack(crb, masm, slot, constant);
++            }
++        }
++
++        @Override
++        public Constant getConstant() {
++            return constant;
++        }
++
++        @Override
++        public AllocatableValue getResult() {
++            return result;
++        }
++    }
++
++    @Opcode("MOVE")
++    public static class Move extends SW64LIRInstruction implements ValueMoveOp {
++        public static final LIRInstructionClass<Move> TYPE = LIRInstructionClass.create(Move.class);
++
++        @Def({REG, STACK, HINT}) protected AllocatableValue result;
++        @Use({REG, STACK}) protected AllocatableValue input;
++
++        public Move(AllocatableValue result, AllocatableValue input) {
++            super(TYPE);
++            this.result = result;
++            this.input = input;
++            assert !(isStackSlot(result) && isStackSlot(input));
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            move(crb, masm, getResult(), getInput());
++        }
++
++        @Override
++        public AllocatableValue getInput() {
++            return input;
++        }
++
++        @Override
++        public AllocatableValue getResult() {
++            return result;
++        }
++    }
++
++    public static class LoadAddressOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<LoadAddressOp> TYPE = LIRInstructionClass.create(LoadAddressOp.class);
++
++        @Def protected AllocatableValue result;
++        @Use(COMPOSITE) protected SW64AddressValue address;
++
++        public LoadAddressOp(AllocatableValue result, SW64AddressValue address) {
++            super(TYPE);
++            this.result = result;
++            this.address = address;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register dst = asRegister(result);
++            SW64Address adr = address.toAddress();
++            masm.loadAddress(dst, adr, address.getScaleFactor());
++        }
++    }
++
++    public static class LoadDataOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<LoadDataOp> TYPE = LIRInstructionClass.create(LoadDataOp.class);
++
++        @Def protected AllocatableValue result;
++        private final DataPointerConstant data;
++
++        public LoadDataOp(AllocatableValue result, DataPointerConstant data) {
++            super(TYPE);
++            this.result = result;
++            this.data = data;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            Register dst = asRegister(result);
++            if (crb.compilationResult.isImmutablePIC()) {
++                crb.recordDataReferenceInCode(data);
++                masm.addressOf(dst);
++            } else {
++                masm.loadAddress(dst, (SW64Address) crb.recordDataReferenceInCode(data), data.getAlignment());
++            }
++        }
++    }
++
++    public static class StackLoadAddressOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<StackLoadAddressOp> TYPE = LIRInstructionClass.create(StackLoadAddressOp.class);
++
++        @Def protected AllocatableValue result;
++        @Use({STACK, UNINITIALIZED}) protected AllocatableValue slot;
++
++        public StackLoadAddressOp(AllocatableValue result, AllocatableValue slot) {
++            super(TYPE);
++            assert slot instanceof VirtualStackSlot || slot instanceof StackSlot;
++            this.result = result;
++            this.slot = slot;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            try (ScratchRegister addrReg = masm.getScratchRegister()) {
++                SW64Address address = loadStackSlotAddress(crb, masm, (StackSlot) slot, addrReg.getRegister());
++                PlatformKind kind = SW64Kind.QWORD;
++                masm.loadAddress(asRegister(result, kind), address, kind.getSizeInBytes());
++            }
++        }
++    }
++
++    public static class MembarOp extends SW64LIRInstruction {
++        public static final LIRInstructionClass<MembarOp> TYPE = LIRInstructionClass.create(MembarOp.class);
++
++        // For future use.
++        @SuppressWarnings("unused") private final int barriers;
++
++        public MembarOp(int barriers) {
++            super(TYPE);
++            this.barriers = barriers;
++        }
++
++        @Override
++        // The odd-looking @SuppressWarnings("all") is here because of
++        // a compiler bug which warns that crb is unused, and also
++        // warns that @SuppressWarnings("unused") is unnecessary.
++        public void emitCode(@SuppressWarnings("all") CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            // As I understand it load acquire/store release have the same semantics as on IA64
++            // and allow us to handle LoadStore, LoadLoad and StoreStore without an explicit
++            // barrier.
++            // But Graal support to figure out if a load/store is volatile is non-existant so for
++            // now just use memory barriers everywhere.
++            // if ((barrier & MemoryBarriers.STORE_LOAD) != 0) {
++            masm.dmb(SW64MacroAssembler.BarrierKind.ANY_ANY);
++            // }
++        }
++    }
++
++    abstract static class MemOp extends SW64LIRInstruction implements StandardOp.ImplicitNullCheck {
++
++        protected final SW64Kind kind;
++        @Use({COMPOSITE}) protected SW64AddressValue addressValue;
++        @State protected LIRFrameState state;
++
++        MemOp(LIRInstructionClass<? extends MemOp> c, SW64Kind kind, SW64AddressValue address, LIRFrameState state) {
++            super(c);
++            this.kind = kind;
++            this.addressValue = address;
++            this.state = state;
++        }
++
++        protected abstract void emitMemAccess(CompilationResultBuilder crb, SW64MacroAssembler masm);
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            if (state != null) {
++                crb.recordImplicitException(masm.position(), state);
++            }
++            emitMemAccess(crb, masm);
++        }
++
++        @Override
++        public boolean makeNullCheckFor(Value value, LIRFrameState nullCheckState, int implicitNullCheckLimit) {
++            int displacement = addressValue.getDisplacement();
++            if (state == null && value.equals(addressValue.getBase()) && addressValue.getOffset().equals(Value.ILLEGAL) && displacement >= 0 && displacement < implicitNullCheckLimit) {
++                state = nullCheckState;
++                return true;
++            }
++            return false;
++        }
++    }
++
++    public static final class LoadOp extends MemOp {
++        public static final LIRInstructionClass<LoadOp> TYPE = LIRInstructionClass.create(LoadOp.class);
++
++        @Def protected AllocatableValue result;
++
++        public LoadOp(SW64Kind kind, AllocatableValue result, SW64AddressValue address, LIRFrameState state) {
++            super(TYPE, kind, address, state);
++            this.result = result;
++        }
++
++        @Override
++        protected void emitMemAccess(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            SW64Address address = addressValue.toAddress();
++            Register dst = asRegister(result);
++
++            int destSize = result.getPlatformKind().getSizeInBytes() * Byte.SIZE;
++            int srcSize = kind.getSizeInBytes() * Byte.SIZE;
++            if (kind.isInteger()) {
++                masm.ldr(srcSize, dst, address);
++            } else {
++                assert srcSize == destSize;
++                masm.fldr(srcSize, dst, address);
++            }
++        }
++    }
++
++    public static class StoreOp extends MemOp {
++        public static final LIRInstructionClass<StoreOp> TYPE = LIRInstructionClass.create(StoreOp.class);
++        @Use protected AllocatableValue input;
++
++        public StoreOp(SW64Kind kind, SW64AddressValue address, AllocatableValue input, LIRFrameState state) {
++            super(TYPE, kind, address, state);
++            this.input = input;
++        }
++
++        @Override
++        protected void emitMemAccess(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            emitStore(crb, masm, kind, addressValue.toAddress(), input);
++        }
++    }
++
++    public static final class StoreConstantOp extends MemOp {
++        public static final LIRInstructionClass<StoreConstantOp> TYPE = LIRInstructionClass.create(StoreConstantOp.class);
++
++        protected final JavaConstant input;
++
++        public StoreConstantOp(SW64Kind kind, SW64AddressValue address, JavaConstant input, LIRFrameState state) {
++            super(TYPE, kind, address, state);
++            this.input = input;
++            if (!input.isDefaultForKind()) {
++                throw GraalError.shouldNotReachHere("Can only store null constants to memory");
++            }
++        }
++
++        @Override
++        public void emitMemAccess(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            emitStore(crb, masm, kind, addressValue.toAddress(), zr.asValue(LIRKind.combine(addressValue)));
++        }
++    }
++
++    public static final class NullCheckOp extends SW64LIRInstruction implements NullCheck {
++        public static final LIRInstructionClass<NullCheckOp> TYPE = LIRInstructionClass.create(NullCheckOp.class);
++
++        @Use(COMPOSITE) protected SW64AddressValue address;
++        @State protected LIRFrameState state;
++
++        public NullCheckOp(SW64AddressValue address, LIRFrameState state) {
++            super(TYPE);
++            this.address = address;
++            this.state = state;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            crb.recordImplicitException(masm.position(), state);
++            masm.ldr(64, zr, address.toAddress());
++        }
++
++        @Override
++        public Value getCheckedValue() {
++            return address.base;
++        }
++
++        @Override
++        public LIRFrameState getState() {
++            return state;
++        }
++    }
++
++    private static void emitStore(@SuppressWarnings("unused") CompilationResultBuilder crb, SW64MacroAssembler masm, SW64Kind kind, SW64Address dst, Value src) {
++        int destSize = kind.getSizeInBytes() * Byte.SIZE;
++        if (kind.isInteger()) {
++            masm.str(destSize, asRegister(src), dst);
++        } else {
++            masm.fstr(destSize, asRegister(src), dst);
++        }
++    }
++
++    public static void move(CompilationResultBuilder crb, SW64MacroAssembler masm, AllocatableValue result, Value input) {
++        if (isRegister(input)) {
++            if (isRegister(result)) {
++                reg2reg(crb, masm, result, asAllocatableValue(input));
++            } else if (isStackSlot(result)) {
++                reg2stack(crb, masm, result, asAllocatableValue(input));
++            } else {
++                throw GraalError.shouldNotReachHere();
++            }
++        } else if (isStackSlot(input)) {
++            if (isRegister(result)) {
++                stack2reg(crb, masm, result, asAllocatableValue(input));
++            } else if (isStackSlot(result)) {
++                emitStackMove(crb, masm, result, input);
++            } else {
++                throw GraalError.shouldNotReachHere();
++            }
++        } else if (isJavaConstant(input)) {
++            if (isRegister(result)) {
++                const2reg(crb, masm, result, asJavaConstant(input));
++            } else {
++                throw GraalError.shouldNotReachHere();
++            }
++        } else {
++            throw GraalError.shouldNotReachHere();
++        }
++    }
++
++    private static void emitStackMove(CompilationResultBuilder crb, SW64MacroAssembler masm, AllocatableValue result, Value input) {
++        try (ScratchRegister r1 = masm.getScratchRegister()) {
++            try (ScratchRegister r2 = masm.getScratchRegister()) {
++                Register rscratch1 = r1.getRegister();
++                Register rscratch2 = r2.getRegister();
++                // use the slot kind to define the operand size
++                PlatformKind kind = input.getPlatformKind();
++                final int size = kind.getSizeInBytes() * Byte.SIZE;
++
++                // Always perform stack -> stack copies through integer registers
++                crb.blockComment("[stack -> stack copy]");
++                SW64Address src = loadStackSlotAddress(crb, masm, asStackSlot(input), rscratch2);
++                masm.ldr(size, rscratch1, src);
++                SW64Address dst = loadStackSlotAddress(crb, masm, asStackSlot(result), rscratch2);
++                masm.str(size, rscratch1, dst);
++            }
++        }
++    }
++
++    private static void reg2reg(@SuppressWarnings("unused") CompilationResultBuilder crb, SW64MacroAssembler masm, AllocatableValue result, AllocatableValue input) {
++        Register dst = asRegister(result);
++        Register src = asRegister(input);
++        if (src.equals(dst)) {
++            return;
++        }
++        SW64Kind kind = (SW64Kind) input.getPlatformKind();
++        int size = kind.getSizeInBytes() * Byte.SIZE;
++        if (kind.isInteger()) {
++            masm.mov(size, dst, src);
++        } else {
++            masm.fmov(size, dst, src);
++        }
++    }
++
++    static void reg2stack(CompilationResultBuilder crb, SW64MacroAssembler masm, AllocatableValue result, AllocatableValue input) {
++        SW64Address dest = loadStackSlotAddress(crb, masm, asStackSlot(result), Value.ILLEGAL);
++        Register src = asRegister(input);
++        // use the slot kind to define the operand size
++        SW64Kind kind = (SW64Kind) result.getPlatformKind();
++        final int size = kind.getSizeInBytes() * Byte.SIZE;
++        if (kind.isInteger()) {
++            masm.str(size, src, dest);
++        } else {
++            masm.fstr(size, src, dest);
++        }
++    }
++
++    static void stack2reg(CompilationResultBuilder crb, SW64MacroAssembler masm, AllocatableValue result, AllocatableValue input) {
++        SW64Kind kind = (SW64Kind) input.getPlatformKind();
++        // use the slot kind to define the operand size
++        final int size = kind.getSizeInBytes() * Byte.SIZE;
++        if (kind.isInteger()) {
++            SW64Address src = loadStackSlotAddress(crb, masm, asStackSlot(input), result);
++            masm.ldr(size, asRegister(result), src);
++        } else {
++            try (ScratchRegister sc = masm.getScratchRegister()) {
++                AllocatableValue scratchRegisterValue = sc.getRegister().asValue(LIRKind.combine(input));
++                SW64Address src = loadStackSlotAddress(crb, masm, asStackSlot(input), scratchRegisterValue);
++                masm.fldr(size, asRegister(result), src);
++            }
++        }
++    }
++
++    private static void const2reg(CompilationResultBuilder crb, SW64MacroAssembler masm, Value result, JavaConstant input) {
++        Register dst = asRegister(result);
++        switch (input.getJavaKind().getStackKind()) {
++            case Int:
++                final int value = input.asInt();
++                int maskedValue;
++                switch (input.getJavaKind()) {
++                    case Boolean:
++                    case Byte:
++                        maskedValue = value & 0xFF;
++                        break;
++                    case Char:
++                    case Short:
++                        maskedValue = value & 0xFFFF;
++                        break;
++                    case Int:
++                        maskedValue = value;
++                        break;
++                    default:
++                        throw GraalError.shouldNotReachHere();
++                }
++                masm.mov(dst, maskedValue);
++                break;
++            case Long:
++                masm.mov(dst, input.asLong());
++                break;
++            case Float:
++                if (SW64MacroAssembler.isFloatImmediate(input.asFloat())) {
++                    masm.fmov(32, dst, input.asFloat());
++                } else if (crb.compilationResult.isImmutablePIC()) {
++                    try (ScratchRegister scr = masm.getScratchRegister()) {
++                        Register scratch = scr.getRegister();
++                        masm.mov(scratch, Float.floatToRawIntBits(input.asFloat()));
++                        masm.fmov(32, dst, scratch);
++                    }
++                } else {
++                    masm.fldr(32, dst, (SW64Address) crb.asFloatConstRef(input));
++                }
++                break;
++            case Double:
++                if (SW64MacroAssembler.isDoubleImmediate(input.asDouble())) {
++                    masm.fmov(64, dst, input.asDouble());
++                } else if (crb.compilationResult.isImmutablePIC()) {
++                    try (ScratchRegister scr = masm.getScratchRegister()) {
++                        Register scratch = scr.getRegister();
++                        masm.mov(scratch, Double.doubleToRawLongBits(input.asDouble()));
++                        masm.fmov(64, dst, scratch);
++                    }
++                } else {
++                    masm.fldr(64, dst, (SW64Address) crb.asDoubleConstRef(input));
++                }
++                break;
++            case Object:
++                if (input.isNull()) {
++                    masm.mov(dst, 0);
++                } else if (crb.target.inlineObjects) {
++                    crb.recordInlineDataInCode(input);
++                    masm.movNativeAddress(dst, 0xDEADDEADDEADDEADL);
++                } else {
++                    masm.ldr(64, dst, (SW64Address) crb.recordDataReferenceInCode(input, 8));
++                }
++                break;
++            default:
++                throw GraalError.shouldNotReachHere("kind=" + input.getJavaKind().getStackKind());
++        }
++    }
++
++    private static void const2stack(CompilationResultBuilder crb, SW64MacroAssembler masm, Value result, JavaConstant constant) {
++        try (ScratchRegister addrReg = masm.getScratchRegister()) {
++            StackSlot slot = (StackSlot) result;
++            SW64Address resultAddress = loadStackSlotAddress(crb, masm, slot, addrReg.getRegister());
++            if (constant.isDefaultForKind() || constant.isNull()) {
++                emitStore(crb, masm, (SW64Kind) result.getPlatformKind(), resultAddress, zr.asValue(LIRKind.combine(result)));
++            } else {
++                try (ScratchRegister sc = masm.getScratchRegister()) {
++                    Value scratchRegisterValue = sc.getRegister().asValue(LIRKind.combine(result));
++                    const2reg(crb, masm, scratchRegisterValue, constant);
++                    emitStore(crb, masm, (SW64Kind) result.getPlatformKind(), resultAddress, scratchRegisterValue);
++                }
++            }
++        }
++    }
++
++    /**
++     * Returns SW64Address of given StackSlot. We cannot use CompilationResultBuilder.asAddress
++     * since this calls SW64MacroAssembler.makeAddress with displacements that may be larger than
++     * 9-bit signed, which cannot be handled by that method.
++     *
++     * Instead we create an address ourselves. We use scaled unsigned addressing since we know the
++     * transfersize, which gives us a 15-bit address range (for longs/doubles) respectively a 14-bit
++     * range (for everything else).
++     *
++     * @param scratch Scratch register that can be used to load address. If Value.ILLEGAL this
++     *            instruction fails if we try to access a StackSlot that is too large to be loaded
++     *            directly.
++     * @return SW64Address of given StackSlot. Uses scratch register if necessary to do so.
++     */
++    private static SW64Address loadStackSlotAddress(CompilationResultBuilder crb, SW64MacroAssembler masm, StackSlot slot, AllocatableValue scratch) {
++        Register scratchReg = Value.ILLEGAL.equals(scratch) ? zr : asRegister(scratch);
++        return loadStackSlotAddress(crb, masm, slot, scratchReg);
++    }
++
++    private static SW64Address loadStackSlotAddress(CompilationResultBuilder crb, SW64MacroAssembler masm, StackSlot slot, Register scratchReg) {
++        int displacement = crb.frameMap.offsetForStackSlot(slot);
++        int transferSize = slot.getPlatformKind().getSizeInBytes();
++        return masm.makeAddress(sp, displacement, scratchReg, transferSize, /* allowOverwrite */false);
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64PauseOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64PauseOp.java
+new file mode 100644
+index 0000000000..b5c6950239
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64PauseOp.java
+@@ -0,0 +1,47 @@
++/*
++ * Copyright (c) 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++/**
++ * Emits a pause.
++ */
++@Opcode("PAUSE")
++public final class SW64PauseOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64PauseOp> TYPE = LIRInstructionClass.create(SW64PauseOp.class);
++
++    public SW64PauseOp() {
++        super(TYPE);
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        masm.pause();
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64PrefetchOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64PrefetchOp.java
+new file mode 100644
+index 0000000000..d45ecade00
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64PrefetchOp.java
+@@ -0,0 +1,52 @@
++/*
++ * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.COMPOSITE;
++
++import org.graalvm.compiler.asm.sw64.SW64Assembler.PrefetchMode;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++public final class SW64PrefetchOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64PrefetchOp> TYPE = LIRInstructionClass.create(SW64PrefetchOp.class);
++
++    private final PrefetchMode mode;  // AllocatePrefetchInstr
++    @Alive({COMPOSITE}) protected SW64AddressValue address;
++
++    public SW64PrefetchOp(SW64AddressValue address, PrefetchMode mode) {
++        super(TYPE);
++        this.address = address;
++        this.mode = mode;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        // instr gets ignored!
++        masm.prfm(address.toAddress(), mode);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ReinterpretOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ReinterpretOp.java
+new file mode 100644
+index 0000000000..1c0dc48c3b
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64ReinterpretOp.java
+@@ -0,0 +1,65 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.sw64.SW64Kind;
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++
++/**
++ * Instruction that reinterprets some bit pattern as a different type. It is possible to reinterpret
++ * the following: - int <-> float - long <-> double
++ */
++public class SW64ReinterpretOp extends SW64LIRInstruction {
++    private static final LIRInstructionClass<SW64ReinterpretOp> TYPE = LIRInstructionClass.create(SW64ReinterpretOp.class);
++
++    @Def protected AllocatableValue resultValue;
++    @Use protected AllocatableValue inputValue;
++
++    public SW64ReinterpretOp(AllocatableValue resultValue, AllocatableValue inputValue) {
++        super(TYPE);
++        SW64Kind from = (SW64Kind) inputValue.getPlatformKind();
++        SW64Kind to = (SW64Kind) resultValue.getPlatformKind();
++        assert from.getSizeInBytes() == to.getSizeInBytes() && from.isInteger() ^ to.isInteger();
++        this.resultValue = resultValue;
++        this.inputValue = inputValue;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        Register result = asRegister(resultValue);
++        Register input = asRegister(inputValue);
++        SW64Kind to = (SW64Kind) resultValue.getPlatformKind();
++        final int size = to.getSizeInBytes() * Byte.SIZE;
++        masm.fmov(size, result, input);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64RestoreRegistersOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64RestoreRegistersOp.java
+new file mode 100644
+index 0000000000..e53de1eae8
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64RestoreRegistersOp.java
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
++import static jdk.vm.ci.code.ValueUtil.asStackSlot;
++import static jdk.vm.ci.code.ValueUtil.isStackSlot;
++
++import java.util.Arrays;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.LIRValueUtil;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.meta.AllocatableValue;
++
++/**
++ * Restores registers from stack slots.
++ */
++@Opcode("RESTORE_REGISTER")
++public class SW64RestoreRegistersOp extends SW64LIRInstruction {
++    public static final LIRInstructionClass<SW64RestoreRegistersOp> TYPE = LIRInstructionClass.create(SW64RestoreRegistersOp.class);
++
++    /**
++     * The slots from which the registers are restored.
++     */
++    @Use(STACK) protected final AllocatableValue[] slots;
++
++    /**
++     * The operation that saved the registers restored by this operation.
++     */
++    private final SW64SaveRegistersOp save;
++
++    public SW64RestoreRegistersOp(AllocatableValue[] values, SW64SaveRegistersOp save) {
++        this(TYPE, values, save);
++    }
++
++    protected SW64RestoreRegistersOp(LIRInstructionClass<? extends SW64RestoreRegistersOp> c, AllocatableValue[] values, SW64SaveRegistersOp save) {
++        super(c);
++        assert Arrays.asList(values).stream().allMatch(LIRValueUtil::isVirtualStackSlot);
++        this.slots = values;
++        this.save = save;
++    }
++
++    protected Register[] getSavedRegisters() {
++        return save.savedRegisters;
++    }
++
++    protected void restoreRegister(CompilationResultBuilder crb, SW64MacroAssembler masm, Register result, StackSlot input) {
++        SW64Move.stack2reg(crb, masm, result.asValue(), input);
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        Register[] savedRegisters = getSavedRegisters();
++        for (int i = 0; i < savedRegisters.length; i++) {
++            if (savedRegisters[i] != null) {
++                assert isStackSlot(slots[i]) : "not a StackSlot: " + slots[i];
++                restoreRegister(crb, masm, savedRegisters[i], asStackSlot(slots[i]));
++            }
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64SaveRegistersOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64SaveRegistersOp.java
+new file mode 100644
+index 0000000000..bd9006d923
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64SaveRegistersOp.java
+@@ -0,0 +1,170 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asStackSlot;
++import static jdk.vm.ci.code.ValueUtil.isStackSlot;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.STACK;
++
++import java.util.Arrays;
++
++import jdk.internal.vm.compiler.collections.EconomicSet;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.LIRValueUtil;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.StandardOp.SaveRegistersOp;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++import org.graalvm.compiler.lir.framemap.FrameMap;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.code.RegisterSaveLayout;
++import jdk.vm.ci.code.StackSlot;
++import jdk.vm.ci.meta.AllocatableValue;
++
++/**
++ * Saves registers to stack slots.
++ */
++@Opcode("SAVE_REGISTER")
++public class SW64SaveRegistersOp extends SW64LIRInstruction implements SaveRegistersOp {
++    public static final LIRInstructionClass<SW64SaveRegistersOp> TYPE = LIRInstructionClass.create(SW64SaveRegistersOp.class);
++
++    /**
++     * The registers (potentially) saved by this operation.
++     */
++    protected final Register[] savedRegisters;
++
++    /**
++     * The slots to which the registers are saved.
++     */
++    @Def(STACK) protected final AllocatableValue[] slots;
++
++    /**
++     * Specifies if {@link #remove(EconomicSet)} should have an effect.
++     */
++    protected final boolean supportsRemove;
++
++    /**
++     *
++     * @param savedRegisters the registers saved by this operation which may be subject to
++     *            {@linkplain #remove(EconomicSet) pruning}
++     * @param savedRegisterLocations the slots to which the registers are saved
++     * @param supportsRemove determines if registers can be {@linkplain #remove(EconomicSet) pruned}
++     */
++    public SW64SaveRegistersOp(Register[] savedRegisters, AllocatableValue[] savedRegisterLocations, boolean supportsRemove) {
++        this(TYPE, savedRegisters, savedRegisterLocations, supportsRemove);
++    }
++
++    public SW64SaveRegistersOp(LIRInstructionClass<? extends SW64SaveRegistersOp> c, Register[] savedRegisters, AllocatableValue[] savedRegisterLocations, boolean supportsRemove) {
++        super(c);
++        assert Arrays.asList(savedRegisterLocations).stream().allMatch(LIRValueUtil::isVirtualStackSlot);
++        this.savedRegisters = savedRegisters;
++        this.slots = savedRegisterLocations;
++        this.supportsRemove = supportsRemove;
++    }
++
++    protected void saveRegister(CompilationResultBuilder crb, SW64MacroAssembler masm, StackSlot result, Register input) {
++        SW64Move.reg2stack(crb, masm, result, input.asValue());
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        for (int i = 0; i < savedRegisters.length; i++) {
++            if (savedRegisters[i] != null) {
++                assert isStackSlot(slots[i]) : "not a StackSlot: " + slots[i];
++                saveRegister(crb, masm, asStackSlot(slots[i]), savedRegisters[i]);
++            }
++        }
++    }
++
++    public AllocatableValue[] getSlots() {
++        return slots;
++    }
++
++    @Override
++    public boolean supportsRemove() {
++        return supportsRemove;
++    }
++
++    @Override
++    public int remove(EconomicSet<Register> doNotSave) {
++        if (!supportsRemove) {
++            throw new UnsupportedOperationException();
++        }
++        return prune(doNotSave, savedRegisters);
++    }
++
++    static int prune(EconomicSet<Register> toRemove, Register[] registers) {
++        int pruned = 0;
++        for (int i = 0; i < registers.length; i++) {
++            if (registers[i] != null) {
++                if (toRemove.contains(registers[i])) {
++                    registers[i] = null;
++                    pruned++;
++                }
++            }
++        }
++        return pruned;
++    }
++
++    @Override
++    public RegisterSaveLayout getMap(FrameMap frameMap) {
++        int total = 0;
++        for (int i = 0; i < savedRegisters.length; i++) {
++            if (savedRegisters[i] != null) {
++                total++;
++            }
++        }
++        Register[] keys = new Register[total];
++        int[] values = new int[total];
++        if (total != 0) {
++            int mapIndex = 0;
++            for (int i = 0; i < savedRegisters.length; i++) {
++                if (savedRegisters[i] != null) {
++                    keys[mapIndex] = savedRegisters[i];
++                    assert isStackSlot(slots[i]) : "not a StackSlot: " + slots[i];
++                    StackSlot slot = asStackSlot(slots[i]);
++                    values[mapIndex] = indexForStackSlot(frameMap, slot);
++                    mapIndex++;
++                }
++            }
++            assert mapIndex == total;
++        }
++        return new RegisterSaveLayout(keys, values);
++    }
++
++    /**
++     * Computes the index of a stack slot relative to slot 0. This is also the bit index of stack
++     * slots in the reference map.
++     *
++     * @param slot a stack slot
++     * @return the index of the stack slot
++     */
++    private static int indexForStackSlot(FrameMap frameMap, StackSlot slot) {
++        assert frameMap.offsetForStackSlot(slot) % frameMap.getTarget().wordSize == 0;
++        int value = frameMap.offsetForStackSlot(slot) / frameMap.getTarget().wordSize;
++        return value;
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64SignExtendOp.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64SignExtendOp.java
+new file mode 100644
+index 0000000000..59ee67599a
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64SignExtendOp.java
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 2015, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.Opcode;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++
++@Opcode("SIGNEXTEND")
++public class SW64SignExtendOp extends SW64LIRInstruction {
++    private static final LIRInstructionClass<SW64SignExtendOp> TYPE = LIRInstructionClass.create(SW64SignExtendOp.class);
++
++    @Def protected AllocatableValue resultValue;
++    @Use protected AllocatableValue inputValue;
++    private final int fromBits;
++    private final int toBits;
++
++    public SW64SignExtendOp(AllocatableValue resultValue, AllocatableValue inputValue, int fromBits, int toBits) {
++        super(TYPE);
++        this.resultValue = resultValue;
++        this.inputValue = inputValue;
++        this.fromBits = fromBits;
++        this.toBits = toBits;
++    }
++
++    @Override
++    public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++        Register result = asRegister(resultValue);
++        Register input = asRegister(inputValue);
++        masm.sxt(toBits <= 32 ? 32 : 64, fromBits, result, input);
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Unary.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Unary.java
+new file mode 100644
+index 0000000000..0edc76f8f2
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.lir.sw64/src/org/graalvm/compiler/lir/sw64/SW64Unary.java
+@@ -0,0 +1,97 @@
++/*
++ * Copyright (c) 2015, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.lir.sw64;
++
++import static jdk.vm.ci.code.ValueUtil.asRegister;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.COMPOSITE;
++import static org.graalvm.compiler.lir.LIRInstruction.OperandFlag.REG;
++
++import org.graalvm.compiler.asm.sw64.SW64Address;
++import org.graalvm.compiler.asm.sw64.SW64MacroAssembler;
++import org.graalvm.compiler.lir.LIRFrameState;
++import org.graalvm.compiler.lir.LIRInstructionClass;
++import org.graalvm.compiler.lir.StandardOp.ImplicitNullCheck;
++import org.graalvm.compiler.lir.asm.CompilationResultBuilder;
++
++import jdk.vm.ci.code.Register;
++import jdk.vm.ci.meta.AllocatableValue;
++import jdk.vm.ci.meta.Value;
++
++/**
++ * AARCH64 LIR instructions that have one input and one output.
++ */
++public class SW64Unary {
++
++    /**
++     * Instruction with a {@link SW64AddressValue memory} operand.
++     */
++    public static class MemoryOp extends SW64LIRInstruction implements ImplicitNullCheck {
++        public static final LIRInstructionClass<MemoryOp> TYPE = LIRInstructionClass.create(MemoryOp.class);
++
++        private final boolean isSigned;
++
++        @Def({REG}) protected AllocatableValue result;
++        @Use({COMPOSITE}) protected SW64AddressValue input;
++
++        @State protected LIRFrameState state;
++
++        private int targetSize;
++        private int srcSize;
++
++        public MemoryOp(boolean isSigned, int targetSize, int srcSize, AllocatableValue result, SW64AddressValue input, LIRFrameState state) {
++            super(TYPE);
++            this.targetSize = targetSize;
++            this.srcSize = srcSize;
++            this.isSigned = isSigned;
++            this.result = result;
++            this.input = input;
++            this.state = state;
++        }
++
++        @Override
++        public void emitCode(CompilationResultBuilder crb, SW64MacroAssembler masm) {
++            if (state != null) {
++                crb.recordImplicitException(masm.position(), state);
++            }
++            SW64Address address = input.toAddress();
++            Register dst = asRegister(result);
++            if (isSigned) {
++                masm.ldrs(targetSize, srcSize, dst, address);
++            } else {
++                masm.ldr(srcSize, dst, address);
++            }
++        }
++
++        @Override
++        public boolean makeNullCheckFor(Value value, LIRFrameState nullCheckState, int implicitNullCheckLimit) {
++            int displacement = input.getDisplacement();
++            if (state == null && value.equals(input.getBase()) && input.getOffset().equals(Value.ILLEGAL) && displacement >= 0 && displacement < implicitNullCheckLimit) {
++                state = nullCheckState;
++                return true;
++            }
++            return false;
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64CountLeadingZerosNode.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64CountLeadingZerosNode.java
+new file mode 100644
+index 0000000000..d627b3a6fa
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64CountLeadingZerosNode.java
+@@ -0,0 +1,90 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import static org.graalvm.compiler.nodeinfo.NodeCycles.CYCLES_2;
++import static org.graalvm.compiler.nodeinfo.NodeSize.SIZE_1;
++
++import org.graalvm.compiler.core.common.type.IntegerStamp;
++import org.graalvm.compiler.core.common.type.Stamp;
++import org.graalvm.compiler.graph.NodeClass;
++import org.graalvm.compiler.graph.spi.CanonicalizerTool;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticLIRGeneratorTool;
++import org.graalvm.compiler.lir.gen.ArithmeticLIRGeneratorTool;
++import org.graalvm.compiler.nodeinfo.NodeInfo;
++import org.graalvm.compiler.nodes.ConstantNode;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.calc.UnaryNode;
++import org.graalvm.compiler.nodes.spi.ArithmeticLIRLowerable;
++import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
++import org.graalvm.compiler.nodes.type.StampTool;
++
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.JavaKind;
++
++@NodeInfo(cycles = CYCLES_2, size = SIZE_1)
++public final class SW64CountLeadingZerosNode extends UnaryNode implements ArithmeticLIRLowerable {
++
++    public static final NodeClass<SW64CountLeadingZerosNode> TYPE = NodeClass.create(SW64CountLeadingZerosNode.class);
++
++    public SW64CountLeadingZerosNode(ValueNode value) {
++        super(TYPE, computeStamp(value.stamp(NodeView.DEFAULT), value), value);
++    }
++
++    @Override
++    public Stamp foldStamp(Stamp newStamp) {
++        return computeStamp(newStamp, getValue());
++    }
++
++    private static Stamp computeStamp(Stamp newStamp, ValueNode theValue) {
++        assert newStamp.isCompatible(theValue.stamp(NodeView.DEFAULT));
++        assert theValue.getStackKind() == JavaKind.Int || theValue.getStackKind() == JavaKind.Long;
++        return StampTool.stampForLeadingZeros((IntegerStamp) newStamp);
++    }
++
++    public static ValueNode tryFold(ValueNode value) {
++        if (value.isConstant()) {
++            JavaConstant c = value.asJavaConstant();
++            if (value.getStackKind() == JavaKind.Int) {
++                return ConstantNode.forInt(Integer.numberOfLeadingZeros(c.asInt()));
++            } else {
++                return ConstantNode.forInt(Long.numberOfLeadingZeros(c.asLong()));
++            }
++        }
++        return null;
++    }
++
++    @Override
++    public ValueNode canonical(CanonicalizerTool tool, ValueNode forValue) {
++        ValueNode folded = tryFold(forValue);
++        return folded != null ? folded : this;
++    }
++
++    @Override
++    public void generate(NodeLIRBuilderTool builder, ArithmeticLIRGeneratorTool gen) {
++        builder.setResult(this, ((SW64ArithmeticLIRGeneratorTool) gen).emitCountLeadingZeros(builder.operand(getValue())));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64CountTrailingZerosNode.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64CountTrailingZerosNode.java
+new file mode 100644
+index 0000000000..d9b3a90495
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64CountTrailingZerosNode.java
+@@ -0,0 +1,93 @@
++/*
++ * Copyright (c) 2012, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import static org.graalvm.compiler.nodeinfo.NodeCycles.CYCLES_2;
++import static org.graalvm.compiler.nodeinfo.NodeSize.SIZE_2;
++
++import org.graalvm.compiler.core.common.type.IntegerStamp;
++import org.graalvm.compiler.core.common.type.Stamp;
++import org.graalvm.compiler.graph.NodeClass;
++import org.graalvm.compiler.graph.spi.CanonicalizerTool;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticLIRGeneratorTool;
++import org.graalvm.compiler.lir.gen.ArithmeticLIRGeneratorTool;
++import org.graalvm.compiler.nodeinfo.NodeInfo;
++import org.graalvm.compiler.nodes.ConstantNode;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.calc.UnaryNode;
++import org.graalvm.compiler.nodes.spi.ArithmeticLIRLowerable;
++import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
++import org.graalvm.compiler.nodes.type.StampTool;
++
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.JavaKind;
++
++/**
++ * Count the number of trailing zeros using the {@code rbit; clz} instructions.
++ */
++@NodeInfo(cycles = CYCLES_2, size = SIZE_2)
++public final class SW64CountTrailingZerosNode extends UnaryNode implements ArithmeticLIRLowerable {
++    public static final NodeClass<SW64CountTrailingZerosNode> TYPE = NodeClass.create(SW64CountTrailingZerosNode.class);
++
++    public SW64CountTrailingZerosNode(ValueNode value) {
++        super(TYPE, computeStamp(value.stamp(NodeView.DEFAULT), value), value);
++        assert value.getStackKind() == JavaKind.Int || value.getStackKind() == JavaKind.Long;
++    }
++
++    @Override
++    public Stamp foldStamp(Stamp newStamp) {
++        return computeStamp(newStamp, getValue());
++    }
++
++    static Stamp computeStamp(Stamp newStamp, ValueNode value) {
++        assert newStamp.isCompatible(value.stamp(NodeView.DEFAULT));
++        IntegerStamp valueStamp = (IntegerStamp) newStamp;
++        return StampTool.stampForTrailingZeros(valueStamp);
++    }
++
++    public static ValueNode tryFold(ValueNode value) {
++        if (value.isConstant()) {
++            JavaConstant c = value.asJavaConstant();
++            if (value.getStackKind() == JavaKind.Int) {
++                return ConstantNode.forInt(Integer.numberOfTrailingZeros(c.asInt()));
++            } else {
++                return ConstantNode.forInt(Long.numberOfTrailingZeros(c.asLong()));
++            }
++        }
++        return null;
++    }
++
++    @Override
++    public ValueNode canonical(CanonicalizerTool tool, ValueNode forValue) {
++        ValueNode folded = tryFold(forValue);
++        return folded != null ? folded : this;
++    }
++
++    @Override
++    public void generate(NodeLIRBuilderTool builder, ArithmeticLIRGeneratorTool gen) {
++        builder.setResult(this, ((SW64ArithmeticLIRGeneratorTool) gen).emitCountTrailingZeros(builder.operand(getValue())));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64FloatArithmeticSnippets.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64FloatArithmeticSnippets.java
+new file mode 100644
+index 0000000000..12a4a68cba
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64FloatArithmeticSnippets.java
+@@ -0,0 +1,156 @@
++/*
++ * Copyright (c) 2014, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import static org.graalvm.compiler.nodeinfo.NodeCycles.CYCLES_IGNORED;
++import static org.graalvm.compiler.nodeinfo.NodeSize.SIZE_IGNORED;
++
++import org.graalvm.compiler.api.replacements.Snippet;
++import org.graalvm.compiler.api.replacements.SnippetReflectionProvider;
++import org.graalvm.compiler.debug.DebugHandlersFactory;
++import org.graalvm.compiler.graph.Node.NodeIntrinsic;
++import org.graalvm.compiler.graph.NodeClass;
++import org.graalvm.compiler.nodeinfo.NodeInfo;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.StructuredGraph;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.calc.RemNode;
++import org.graalvm.compiler.nodes.spi.LoweringTool;
++import org.graalvm.compiler.options.OptionValues;
++import org.graalvm.compiler.phases.util.Providers;
++import org.graalvm.compiler.replacements.SnippetTemplate;
++import org.graalvm.compiler.replacements.SnippetTemplate.Arguments;
++import org.graalvm.compiler.replacements.Snippets;
++
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.meta.JavaKind;
++
++/**
++ * SW64 does not have a remainder operation. We use <code>n % d == n - Truncate(n / d) * d</code>
++ * for it instead. This is not correct for some edge cases, so we have to fix it up using these
++ * snippets.
++ */
++public class SW64FloatArithmeticSnippets extends SnippetTemplate.AbstractTemplates implements Snippets {
++
++    private final SnippetTemplate.SnippetInfo drem;
++    private final SnippetTemplate.SnippetInfo frem;
++
++    public SW64FloatArithmeticSnippets(OptionValues options, Iterable<DebugHandlersFactory> factories, Providers providers, SnippetReflectionProvider snippetReflection,
++                    TargetDescription target) {
++        super(options, factories, providers, snippetReflection, target);
++        drem = snippet(SW64FloatArithmeticSnippets.class, "dremSnippet");
++        frem = snippet(SW64FloatArithmeticSnippets.class, "fremSnippet");
++    }
++
++    public void lower(RemNode node, LoweringTool tool) {
++        JavaKind kind = node.stamp(NodeView.DEFAULT).getStackKind();
++        assert kind == JavaKind.Float || kind == JavaKind.Double;
++        if (node instanceof SafeNode) {
++            // We already introduced the necessary checks, nothing to do.
++            return;
++        }
++        SnippetTemplate.SnippetInfo snippet = kind == JavaKind.Float ? frem : drem;
++        StructuredGraph graph = node.graph();
++        Arguments args = new Arguments(snippet, graph.getGuardsStage(), tool.getLoweringStage());
++        args.add("x", node.getX());
++        args.add("y", node.getY());
++        template(node, args).instantiate(providers.getMetaAccess(), node, SnippetTemplate.DEFAULT_REPLACER, tool, args);
++    }
++
++    @Snippet
++    public static float fremSnippet(float x, float y) {
++        // JVMS: If either value1' or value2' is NaN, the result is NaN.
++        // JVMS: If the dividend is an infinity or the divisor is a zero or both, the result is NaN.
++        if (Float.isInfinite(x) || y == 0.0f || Float.isNaN(y)) {
++            return Float.NaN;
++        }
++        // JVMS: If the dividend is finite and the divisor is an infinity, the result equals the
++        // dividend.
++        // JVMS: If the dividend is a zero and the divisor is finite, the result equals the
++        // dividend.
++        if (x == 0.0f || Float.isInfinite(y)) {
++            return x;
++        }
++
++        float result = safeRem(x, y);
++
++        // JVMS: If neither value1' nor value2' is NaN, the sign of the result equals the sign of
++        // the dividend.
++        if (result == 0.0f && x < 0.0f) {
++            return -result;
++        }
++        return result;
++    }
++
++    @Snippet
++    public static double dremSnippet(double x, double y) {
++        // JVMS: If either value1' or value2' is NaN, the result is NaN.
++        // JVMS: If the dividend is an infinity or the divisor is a zero or both, the result is NaN.
++        if (Double.isInfinite(x) || y == 0.0 || Double.isNaN(y)) {
++            return Double.NaN;
++        }
++        // JVMS: If the dividend is finite and the divisor is an infinity, the result equals the
++        // dividend.
++        // JVMS: If the dividend is a zero and the divisor is finite, the result equals the
++        // dividend.
++        if (x == 0.0 || Double.isInfinite(y)) {
++            return x;
++        }
++
++        double result = safeRem(x, y);
++
++        // JVMS: If neither value1' nor value2' is NaN, the sign of the result equals the sign of
++        // the dividend.
++        if (result == 0.0 && x < 0.0) {
++            return -result;
++        }
++        return result;
++    }
++
++    @NodeIntrinsic(SafeFloatRemNode.class)
++    private static native float safeRem(float x, float y);
++
++    @NodeIntrinsic(SafeFloatRemNode.class)
++    private static native double safeRem(double x, double y);
++
++    /**
++     * Marker interface to distinguish untreated nodes from ones where we have installed the
++     * additional checks.
++     */
++    private interface SafeNode {
++    }
++
++    @NodeInfo(cycles = CYCLES_IGNORED, size = SIZE_IGNORED)
++    // static class SafeFloatRemNode extends FloatRemNode implements SafeNode {
++    static class SafeFloatRemNode extends RemNode implements SafeNode {
++        public static final NodeClass<SafeFloatRemNode> TYPE = NodeClass.create(SafeFloatRemNode.class);
++
++        protected SafeFloatRemNode(ValueNode x, ValueNode y) {
++            super(TYPE, x, y);
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64GraphBuilderPlugins.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64GraphBuilderPlugins.java
+new file mode 100644
+index 0000000000..3cdaf760a1
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64GraphBuilderPlugins.java
+@@ -0,0 +1,209 @@
++/*
++ * Copyright (c) 2015, 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import static org.graalvm.compiler.replacements.StandardGraphBuilderPlugins.registerPlatformSpecificUnsafePlugins;
++import static org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation.COS;
++import static org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation.EXP;
++import static org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation.LOG;
++import static org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation.LOG10;
++import static org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation.SIN;
++import static org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation.TAN;
++import static org.graalvm.compiler.serviceprovider.GraalServices.JAVA_SPECIFICATION_VERSION;
++import static org.graalvm.compiler.serviceprovider.GraalServices.Java8OrEarlier;
++
++import org.graalvm.compiler.bytecode.BytecodeProvider;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticLIRGeneratorTool.RoundingMode;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.graphbuilderconf.GraphBuilderConfiguration.Plugins;
++import org.graalvm.compiler.nodes.graphbuilderconf.GraphBuilderContext;
++import org.graalvm.compiler.nodes.graphbuilderconf.InvocationPlugin;
++import org.graalvm.compiler.nodes.graphbuilderconf.InvocationPlugin.Receiver;
++import org.graalvm.compiler.nodes.graphbuilderconf.InvocationPlugins;
++import org.graalvm.compiler.nodes.graphbuilderconf.InvocationPlugins.Registration;
++import org.graalvm.compiler.nodes.java.AtomicReadAndAddNode;
++import org.graalvm.compiler.nodes.java.AtomicReadAndWriteNode;
++import org.graalvm.compiler.nodes.memory.address.AddressNode;
++import org.graalvm.compiler.nodes.memory.address.OffsetAddressNode;
++import org.graalvm.compiler.replacements.nodes.BinaryMathIntrinsicNode;
++import org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode;
++import org.graalvm.compiler.replacements.nodes.UnaryMathIntrinsicNode.UnaryOperation;
++import jdk.internal.vm.compiler.word.LocationIdentity;
++
++import jdk.vm.ci.meta.JavaKind;
++import jdk.vm.ci.meta.ResolvedJavaMethod;
++import sun.misc.Unsafe;
++
++public class SW64GraphBuilderPlugins {
++
++    public static void register(Plugins plugins, BytecodeProvider bytecodeProvider, boolean explicitUnsafeNullChecks) {
++        InvocationPlugins invocationPlugins = plugins.getInvocationPlugins();
++        invocationPlugins.defer(new Runnable() {
++            @Override
++            public void run() {
++                registerIntegerLongPlugins(invocationPlugins, SW64IntegerSubstitutions.class, JavaKind.Int, bytecodeProvider);
++                registerIntegerLongPlugins(invocationPlugins, SW64LongSubstitutions.class, JavaKind.Long, bytecodeProvider);
++                registerMathPlugins(invocationPlugins);
++                registerStringLatin1Plugins(invocationPlugins, bytecodeProvider);
++                registerStringUTF16Plugins(invocationPlugins, bytecodeProvider);
++                registerUnsafePlugins(invocationPlugins, bytecodeProvider);
++                // This is temporarily disabled until we implement correct emitting of the CAS
++                // instructions of the proper width.
++                registerPlatformSpecificUnsafePlugins(invocationPlugins, bytecodeProvider, explicitUnsafeNullChecks,
++                                new JavaKind[]{JavaKind.Int, JavaKind.Long, JavaKind.Object});
++            }
++        });
++    }
++
++    private static void registerIntegerLongPlugins(InvocationPlugins plugins, Class<?> substituteDeclaringClass, JavaKind kind, BytecodeProvider bytecodeProvider) {
++        Class<?> declaringClass = kind.toBoxedJavaClass();
++        Class<?> type = kind.toJavaClass();
++        Registration r = new Registration(plugins, declaringClass, bytecodeProvider);
++        r.register1("numberOfLeadingZeros", type, new InvocationPlugin() {
++            @Override
++            public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode value) {
++                ValueNode folded = SW64CountLeadingZerosNode.tryFold(value);
++                if (folded != null) {
++                    b.addPush(JavaKind.Int, folded);
++                } else {
++                    b.addPush(JavaKind.Int, new SW64CountLeadingZerosNode(value));
++                }
++                return true;
++            }
++        });
++        r.register1("numberOfTrailingZeros", type, new InvocationPlugin() {
++            @Override
++            public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode value) {
++                ValueNode folded = SW64CountTrailingZerosNode.tryFold(value);
++                if (folded != null) {
++                    b.addPush(JavaKind.Int, folded);
++                } else {
++                    b.addPush(JavaKind.Int, new SW64CountTrailingZerosNode(value));
++                }
++                return true;
++            }
++        });
++        r.registerMethodSubstitution(substituteDeclaringClass, "bitCount", type);
++    }
++
++    private static void registerMathPlugins(InvocationPlugins plugins) {
++        Registration r = new Registration(plugins, Math.class);
++        registerUnaryMath(r, "sin", SIN);
++        registerUnaryMath(r, "cos", COS);
++        registerUnaryMath(r, "tan", TAN);
++        registerUnaryMath(r, "exp", EXP);
++        registerUnaryMath(r, "log", LOG);
++        registerUnaryMath(r, "log10", LOG10);
++        r.register2("pow", Double.TYPE, Double.TYPE, new InvocationPlugin() {
++            @Override
++            public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode x, ValueNode y) {
++                b.push(JavaKind.Double, b.append(BinaryMathIntrinsicNode.create(x, y, BinaryMathIntrinsicNode.BinaryOperation.POW)));
++                return true;
++            }
++        });
++        registerRound(r, "rint", RoundingMode.NEAREST);
++        registerRound(r, "ceil", RoundingMode.UP);
++        registerRound(r, "floor", RoundingMode.DOWN);
++    }
++
++    private static void registerUnaryMath(Registration r, String name, UnaryOperation operation) {
++        r.register1(name, Double.TYPE, new InvocationPlugin() {
++            @Override
++            public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode value) {
++                b.push(JavaKind.Double, b.append(UnaryMathIntrinsicNode.create(value, operation)));
++                return true;
++            }
++        });
++    }
++
++    private static void registerRound(Registration r, String name, RoundingMode mode) {
++        r.register1(name, Double.TYPE, new InvocationPlugin() {
++            @Override
++            public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver receiver, ValueNode arg) {
++                b.push(JavaKind.Double, b.append(new SW64RoundNode(arg, mode)));
++                return true;
++            }
++        });
++    }
++
++    private static void registerStringLatin1Plugins(InvocationPlugins plugins, BytecodeProvider replacementsBytecodeProvider) {
++        if (JAVA_SPECIFICATION_VERSION >= 9) {
++            Registration r = new Registration(plugins, "java.lang.StringLatin1", replacementsBytecodeProvider);
++            r.setAllowOverwrite(true);
++            r.registerMethodSubstitution(SW64StringLatin1Substitutions.class, "compareTo", byte[].class, byte[].class);
++            r.registerMethodSubstitution(SW64StringLatin1Substitutions.class, "compareToUTF16", byte[].class, byte[].class);
++        }
++    }
++
++    private static void registerStringUTF16Plugins(InvocationPlugins plugins, BytecodeProvider replacementsBytecodeProvider) {
++        if (JAVA_SPECIFICATION_VERSION >= 9) {
++            Registration r = new Registration(plugins, "java.lang.StringUTF16", replacementsBytecodeProvider);
++            r.setAllowOverwrite(true);
++            r.registerMethodSubstitution(SW64StringUTF16Substitutions.class, "compareTo", byte[].class, byte[].class);
++            r.registerMethodSubstitution(SW64StringUTF16Substitutions.class, "compareToLatin1", byte[].class, byte[].class);
++        }
++    }
++
++    private static void registerUnsafePlugins(InvocationPlugins plugins, BytecodeProvider replacementsBytecodeProvider) {
++        Registration r;
++        JavaKind[] unsafeJavaKinds;
++        if (Java8OrEarlier) {
++            r = new Registration(plugins, Unsafe.class);
++            unsafeJavaKinds = new JavaKind[]{JavaKind.Int, JavaKind.Long, JavaKind.Object};
++        } else {
++            r = new Registration(plugins, "jdk.internal.misc.Unsafe", replacementsBytecodeProvider);
++            unsafeJavaKinds = new JavaKind[]{JavaKind.Int, JavaKind.Long, JavaKind.Object};
++        }
++
++        for (JavaKind kind : unsafeJavaKinds) {
++            Class<?> javaClass = kind == JavaKind.Object ? Object.class : kind.toJavaClass();
++
++            r.register4("getAndSet" + kind.name(), Receiver.class, Object.class, long.class, javaClass, new InvocationPlugin() {
++                @Override
++                public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver unsafe, ValueNode object, ValueNode offset, ValueNode value) {
++                    // Emits a null-check for the otherwise unused receiver
++                    unsafe.get();
++                    b.addPush(kind, new AtomicReadAndWriteNode(object, offset, value, kind, LocationIdentity.any()));
++                    b.getGraph().markUnsafeAccess();
++                    return true;
++                }
++            });
++
++            if (kind != JavaKind.Boolean && kind.isNumericInteger()) {
++                r.register4("getAndAdd" + kind.name(), Receiver.class, Object.class, long.class, javaClass, new InvocationPlugin() {
++                    @Override
++                    public boolean apply(GraphBuilderContext b, ResolvedJavaMethod targetMethod, Receiver unsafe, ValueNode object, ValueNode offset, ValueNode delta) {
++                        // Emits a null-check for the otherwise unused receiver
++                        unsafe.get();
++                        AddressNode address = b.add(new OffsetAddressNode(object, offset));
++                        b.addPush(kind, new AtomicReadAndAddNode(address, delta, kind, LocationIdentity.any()));
++                        b.getGraph().markUnsafeAccess();
++                        return true;
++                    }
++                });
++            }
++        }
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64IntegerArithmeticSnippets.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64IntegerArithmeticSnippets.java
+new file mode 100644
+index 0000000000..084561ccef
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64IntegerArithmeticSnippets.java
+@@ -0,0 +1,294 @@
++/*
++ * Copyright (c) 2014, 2016, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2018, Red Hat Inc. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import org.graalvm.compiler.api.replacements.Snippet;
++import org.graalvm.compiler.api.replacements.Snippet.ConstantParameter;
++import org.graalvm.compiler.api.replacements.SnippetReflectionProvider;
++import org.graalvm.compiler.core.common.type.IntegerStamp;
++import org.graalvm.compiler.debug.DebugHandlersFactory;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.graph.Node.NodeIntrinsic;
++import org.graalvm.compiler.graph.NodeClass;
++import org.graalvm.compiler.nodeinfo.NodeInfo;
++import org.graalvm.compiler.nodes.DeoptimizeNode;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.StructuredGraph;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.calc.IntegerDivRemNode;
++import org.graalvm.compiler.nodes.calc.SignedDivNode;
++import org.graalvm.compiler.nodes.calc.SignedRemNode;
++import org.graalvm.compiler.nodes.calc.UnsignedDivNode;
++import org.graalvm.compiler.nodes.calc.UnsignedRemNode;
++import org.graalvm.compiler.nodes.spi.LoweringTool;
++import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
++import org.graalvm.compiler.options.OptionValues;
++import org.graalvm.compiler.phases.util.Providers;
++import org.graalvm.compiler.replacements.SnippetTemplate;
++import org.graalvm.compiler.replacements.SnippetTemplate.AbstractTemplates;
++import org.graalvm.compiler.replacements.SnippetTemplate.Arguments;
++import org.graalvm.compiler.replacements.Snippets;
++
++import jdk.vm.ci.code.TargetDescription;
++import jdk.vm.ci.meta.DeoptimizationAction;
++import jdk.vm.ci.meta.DeoptimizationReason;
++import jdk.vm.ci.meta.JavaKind;
++
++/**
++ * Division in SW64 ISA does not generate a trap when dividing by zero, but instead sets the
++ * result to 0. These snippets throw an ArithmethicException if the denominator is 0 and otherwise
++ * forward to the LIRGenerator.
++ */
++public class SW64IntegerArithmeticSnippets extends AbstractTemplates implements Snippets {
++
++    private final SnippetTemplate.SnippetInfo idiv;
++    private final SnippetTemplate.SnippetInfo ldiv;
++    private final SnippetTemplate.SnippetInfo irem;
++    private final SnippetTemplate.SnippetInfo lrem;
++
++    private final SnippetTemplate.SnippetInfo uidiv;
++    private final SnippetTemplate.SnippetInfo uldiv;
++    private final SnippetTemplate.SnippetInfo uirem;
++    private final SnippetTemplate.SnippetInfo ulrem;
++
++    public SW64IntegerArithmeticSnippets(OptionValues options, Iterable<DebugHandlersFactory> factories, Providers providers, SnippetReflectionProvider snippetReflection,
++                    TargetDescription target) {
++        super(options, factories, providers, snippetReflection, target);
++        idiv = snippet(SW64IntegerArithmeticSnippets.class, "idivSnippet");
++        ldiv = snippet(SW64IntegerArithmeticSnippets.class, "ldivSnippet");
++        irem = snippet(SW64IntegerArithmeticSnippets.class, "iremSnippet");
++        lrem = snippet(SW64IntegerArithmeticSnippets.class, "lremSnippet");
++
++        uidiv = snippet(SW64IntegerArithmeticSnippets.class, "uidivSnippet");
++        uldiv = snippet(SW64IntegerArithmeticSnippets.class, "uldivSnippet");
++        uirem = snippet(SW64IntegerArithmeticSnippets.class, "uiremSnippet");
++        ulrem = snippet(SW64IntegerArithmeticSnippets.class, "ulremSnippet");
++    }
++
++    public void lower(IntegerDivRemNode node, LoweringTool tool) {
++        JavaKind kind = node.stamp(NodeView.DEFAULT).getStackKind();
++        assert kind == JavaKind.Int || kind == JavaKind.Long;
++        SnippetTemplate.SnippetInfo snippet;
++        if (node instanceof SafeNode) {
++            // We already introduced the zero division check, nothing to do.
++            return;
++        } else if (node instanceof SignedDivNode) {
++            snippet = kind == JavaKind.Int ? idiv : ldiv;
++        } else if (node instanceof SignedRemNode) {
++            snippet = kind == JavaKind.Int ? irem : lrem;
++        } else if (node instanceof UnsignedDivNode) {
++            snippet = kind == JavaKind.Int ? uidiv : uldiv;
++        } else if (node instanceof UnsignedRemNode) {
++            snippet = kind == JavaKind.Int ? uirem : ulrem;
++        } else {
++            throw GraalError.shouldNotReachHere();
++        }
++        StructuredGraph graph = node.graph();
++        Arguments args = new Arguments(snippet, graph.getGuardsStage(), tool.getLoweringStage());
++        args.add("x", node.getX());
++        args.add("y", node.getY());
++
++        IntegerStamp yStamp = (IntegerStamp) node.getY().stamp(NodeView.DEFAULT);
++        args.addConst("needsZeroCheck", node.getZeroCheck() == null && yStamp.contains(0));
++
++        template(node, args).instantiate(providers.getMetaAccess(), node, SnippetTemplate.DEFAULT_REPLACER, args);
++    }
++
++    @Snippet
++    public static int idivSnippet(int x, int y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeDiv(x, y);
++    }
++
++    @Snippet
++    public static long ldivSnippet(long x, long y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeDiv(x, y);
++    }
++
++    @Snippet
++    public static int iremSnippet(int x, int y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeRem(x, y);
++    }
++
++    @Snippet
++    public static long lremSnippet(long x, long y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeRem(x, y);
++    }
++
++    @Snippet
++    public static int uidivSnippet(int x, int y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeUDiv(x, y);
++    }
++
++    @Snippet
++    public static long uldivSnippet(long x, long y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeUDiv(x, y);
++    }
++
++    @Snippet
++    public static int uiremSnippet(int x, int y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeURem(x, y);
++    }
++
++    @Snippet
++    public static long ulremSnippet(long x, long y, @ConstantParameter boolean needsZeroCheck) {
++        if (needsZeroCheck) {
++            checkForZero(y);
++        }
++        return safeURem(x, y);
++    }
++
++    private static void checkForZero(int y) {
++        if (y == 0) {
++            // "/ by zero"
++            DeoptimizeNode.deopt(DeoptimizationAction.InvalidateReprofile, DeoptimizationReason.ArithmeticException);
++        }
++    }
++
++    private static void checkForZero(long y) {
++        if (y == 0) {
++            // "/ by zero"
++            DeoptimizeNode.deopt(DeoptimizationAction.InvalidateReprofile, DeoptimizationReason.ArithmeticException);
++        }
++    }
++
++    @NodeIntrinsic(SafeSignedDivNode.class)
++    private static native int safeDiv(int x, int y);
++
++    @NodeIntrinsic(SafeSignedDivNode.class)
++    private static native long safeDiv(long x, long y);
++
++    @NodeIntrinsic(SafeSignedRemNode.class)
++    private static native int safeRem(int x, int y);
++
++    @NodeIntrinsic(SafeSignedRemNode.class)
++    private static native long safeRem(long x, long y);
++
++    @NodeIntrinsic(SafeUnsignedDivNode.class)
++    private static native int safeUDiv(int x, int y);
++
++    @NodeIntrinsic(SafeUnsignedDivNode.class)
++    private static native long safeUDiv(long x, long y);
++
++    @NodeIntrinsic(SafeUnsignedRemNode.class)
++    private static native int safeURem(int x, int y);
++
++    @NodeIntrinsic(SafeUnsignedRemNode.class)
++    private static native long safeURem(long x, long y);
++
++    /**
++     * Marker interface to distinguish untreated nodes from ones where we have installed the
++     * additional checks.
++     */
++    private interface SafeNode {
++    }
++
++    @NodeInfo
++    static class SafeSignedDivNode extends SignedDivNode implements SafeNode {
++        public static final NodeClass<SafeSignedDivNode> TYPE = NodeClass.create(SafeSignedDivNode.class);
++
++        protected SafeSignedDivNode(ValueNode x, ValueNode y) {
++            super(TYPE, x, y, null);
++        }
++
++        @Override
++        public void generate(NodeLIRBuilderTool gen) {
++            // override to ensure we always pass a null frame state
++            // the parent method expects to create one from a non null before state
++            gen.setResult(this, gen.getLIRGeneratorTool().getArithmetic().emitDiv(gen.operand(getX()), gen.operand(getY()), null));
++        }
++    }
++
++    @NodeInfo
++    static class SafeSignedRemNode extends SignedRemNode implements SafeNode {
++        public static final NodeClass<SafeSignedRemNode> TYPE = NodeClass.create(SafeSignedRemNode.class);
++
++        protected SafeSignedRemNode(ValueNode x, ValueNode y) {
++            super(TYPE, x, y, null);
++        }
++
++        @Override
++        public void generate(NodeLIRBuilderTool gen) {
++            // override to ensure we always pass a null frame state
++            // the parent method expects to create one from a non null before state
++            gen.setResult(this, gen.getLIRGeneratorTool().getArithmetic().emitRem(gen.operand(getX()), gen.operand(getY()), null));
++        }
++    }
++
++    @NodeInfo
++    static class SafeUnsignedDivNode extends UnsignedDivNode implements SafeNode {
++        public static final NodeClass<SafeUnsignedDivNode> TYPE = NodeClass.create(SafeUnsignedDivNode.class);
++
++        protected SafeUnsignedDivNode(ValueNode x, ValueNode y) {
++            super(TYPE, x, y, null);
++        }
++
++        @Override
++        public void generate(NodeLIRBuilderTool gen) {
++            // override to ensure we always pass a null frame state
++            // the parent method expects to create one from a non null before state
++            gen.setResult(this, gen.getLIRGeneratorTool().getArithmetic().emitUDiv(gen.operand(getX()), gen.operand(getY()), null));
++        }
++    }
++
++    @NodeInfo
++    static class SafeUnsignedRemNode extends UnsignedRemNode implements SafeNode {
++        public static final NodeClass<SafeUnsignedRemNode> TYPE = NodeClass.create(SafeUnsignedRemNode.class);
++
++        protected SafeUnsignedRemNode(ValueNode x, ValueNode y) {
++            super(TYPE, x, y, null);
++        }
++
++        @Override
++        public void generate(NodeLIRBuilderTool gen) {
++            // override to ensure we always pass a null frame state
++            // the parent method expects to create one from a non null before state
++            gen.setResult(this, gen.getLIRGeneratorTool().getArithmetic().emitURem(gen.operand(getX()), gen.operand(getY()), null));
++        }
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64IntegerSubstitutions.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64IntegerSubstitutions.java
+new file mode 100644
+index 0000000000..d1039f13ef
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64IntegerSubstitutions.java
+@@ -0,0 +1,51 @@
++/*
++ * Copyright (c) 2013, 2015, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import org.graalvm.compiler.api.replacements.ClassSubstitution;
++import org.graalvm.compiler.api.replacements.MethodSubstitution;
++
++/**
++ * SW64 ISA offers a count leading zeros instruction which can be used to implement
++ * numberOfLeadingZeros more efficiently than using BitScanReverse.
++ */
++@ClassSubstitution(Integer.class)
++public class SW64IntegerSubstitutions {
++
++    @MethodSubstitution
++    public static int bitCount(int value) {
++        // Based on Warren, Hacker's Delight, slightly adapted to profit from Aarch64 add + shift
++        // instruction.
++        // Assuming the peephole optimizer optimizes all x - y >>> z into a single instruction
++        // this takes 10 instructions.
++        int x = value;
++        x = x - ((x & 0xaaaaaaaa) >>> 1);
++        x = (x & 0x33333333) + ((x & 0xcccccccc) >>> 2);
++        x = (x + (x >>> 4)) & 0x0f0f0f0f;
++        x = x + (x >>> 8);
++        x = x + (x >>> 16);
++        return x & 0x3f;
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64LongSubstitutions.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64LongSubstitutions.java
+new file mode 100644
+index 0000000000..7e00362d2a
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64LongSubstitutions.java
+@@ -0,0 +1,53 @@
++/*
++ * Copyright (c) 2013, 2016, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import org.graalvm.compiler.api.replacements.ClassSubstitution;
++import org.graalvm.compiler.api.replacements.MethodSubstitution;
++
++/**
++ * Aarch64 ISA offers a count leading zeros instruction which can be used to implement
++ * numberOfLeadingZeros more efficiently than using BitScanReverse.
++ */
++@ClassSubstitution(Long.class)
++public class SW64LongSubstitutions {
++
++    @MethodSubstitution
++    public static int bitCount(long value) {
++        // Based on Warren, Hacker's Delight, slightly adapted to profit from Aarch64 add + shift
++        // instruction.
++        // Assuming the peephole optimizer optimizes all x - y >>> z into a single instruction
++        // this takes 11 instructions.
++        long x = value;
++        x = x - ((x & 0xaaaaaaaaaaaaaaaaL) >>> 1);
++        x = (x & 0x3333333333333333L) + ((x & 0xccccccccccccccccL) >>> 2);
++        x = (x + (x >>> 4)) & 0x0f0f0f0f0f0f0f0fL;
++        x = x + (x >>> 8);
++        x = x + (x >>> 16);
++        x = x + (x >>> 32);
++        return (int) x & 0x7f;
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64RoundNode.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64RoundNode.java
+new file mode 100644
+index 0000000000..15274013c5
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64RoundNode.java
+@@ -0,0 +1,115 @@
++/*
++ * Copyright (c) 2018, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import jdk.vm.ci.meta.JavaConstant;
++import jdk.vm.ci.meta.JavaKind;
++import org.graalvm.compiler.core.common.type.FloatStamp;
++import org.graalvm.compiler.core.common.type.Stamp;
++import org.graalvm.compiler.debug.GraalError;
++import org.graalvm.compiler.graph.NodeClass;
++import org.graalvm.compiler.graph.spi.CanonicalizerTool;
++import org.graalvm.compiler.lir.gen.ArithmeticLIRGeneratorTool;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticLIRGeneratorTool;
++import org.graalvm.compiler.lir.sw64.SW64ArithmeticLIRGeneratorTool.RoundingMode;
++import org.graalvm.compiler.nodeinfo.NodeInfo;
++import org.graalvm.compiler.nodes.ConstantNode;
++import org.graalvm.compiler.nodes.NodeView;
++import org.graalvm.compiler.nodes.ValueNode;
++import org.graalvm.compiler.nodes.calc.UnaryNode;
++import org.graalvm.compiler.nodes.spi.ArithmeticLIRLowerable;
++import org.graalvm.compiler.nodes.spi.NodeLIRBuilderTool;
++
++import static org.graalvm.compiler.nodeinfo.NodeCycles.CYCLES_8;
++
++/**
++ * Round floating-point value.
++ */
++@NodeInfo(cycles = CYCLES_8)
++public final class SW64RoundNode extends UnaryNode implements ArithmeticLIRLowerable {
++    public static final NodeClass<SW64RoundNode> TYPE = NodeClass.create(SW64RoundNode.class);
++
++    private final RoundingMode mode;
++
++    public SW64RoundNode(ValueNode value, RoundingMode mode) {
++        super(TYPE, roundStamp((FloatStamp) value.stamp(NodeView.DEFAULT), mode), value);
++        this.mode = mode;
++    }
++
++    private static double round(RoundingMode mode, double input) {
++        switch (mode) {
++            case DOWN:
++                return Math.floor(input);
++            case NEAREST:
++                return Math.rint(input);
++            case UP:
++                return Math.ceil(input);
++            case TRUNCATE:
++                return (long) input;
++            default:
++                throw GraalError.unimplemented("unimplemented RoundingMode " + mode);
++        }
++    }
++
++    private static FloatStamp roundStamp(FloatStamp stamp, RoundingMode mode) {
++        double min = stamp.lowerBound();
++        min = Math.min(min, round(mode, min));
++
++        double max = stamp.upperBound();
++        max = Math.max(max, round(mode, max));
++
++        return new FloatStamp(stamp.getBits(), min, max, stamp.isNonNaN());
++    }
++
++    @Override
++    public Stamp foldStamp(Stamp newStamp) {
++        assert newStamp.isCompatible(getValue().stamp(NodeView.DEFAULT));
++        return roundStamp((FloatStamp) newStamp, mode);
++    }
++
++    private ValueNode tryFold(ValueNode input) {
++        if (input.isConstant()) {
++            JavaConstant c = input.asJavaConstant();
++            if (c.getJavaKind() == JavaKind.Double) {
++                return ConstantNode.forDouble(round(mode, c.asDouble()));
++            } else if (c.getJavaKind() == JavaKind.Float) {
++                return ConstantNode.forFloat((float) round(mode, c.asFloat()));
++            }
++        }
++        return null;
++    }
++
++    @Override
++    public ValueNode canonical(CanonicalizerTool tool, ValueNode forValue) {
++        ValueNode folded = tryFold(forValue);
++        return folded != null ? folded : this;
++    }
++
++    @Override
++    public void generate(NodeLIRBuilderTool builder, ArithmeticLIRGeneratorTool gen) {
++        builder.setResult(this, ((SW64ArithmeticLIRGeneratorTool) gen).emitRound(builder.operand(getValue()), mode));
++    }
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64StringLatin1Substitutions.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64StringLatin1Substitutions.java
+new file mode 100644
+index 0000000000..56867525e7
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64StringLatin1Substitutions.java
+@@ -0,0 +1,61 @@
++/*
++ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import org.graalvm.compiler.api.replacements.ClassSubstitution;
++import org.graalvm.compiler.api.replacements.MethodSubstitution;
++import org.graalvm.compiler.replacements.nodes.ArrayCompareToNode;
++
++import jdk.vm.ci.meta.JavaKind;
++
++// JaCoCo Exclude
++
++/**
++ * Substitutions for {@code java.lang.StringLatin1} methods.
++ *
++ * Since JDK 9.
++ */
++@ClassSubstitution(className = "java.lang.StringLatin1", optional = true)
++public class SW64StringLatin1Substitutions {
++
++    /**
++     * @param value is byte[]
++     * @param other is char[]
++     */
++    @MethodSubstitution
++    public static int compareTo(byte[] value, byte[] other) {
++        return ArrayCompareToNode.compareTo(value, other, value.length, other.length, JavaKind.Byte, JavaKind.Byte);
++    }
++
++    /**
++     * @param value is byte[]
++     * @param other is char[]
++     */
++    @MethodSubstitution
++    public static int compareToUTF16(byte[] value, byte[] other) {
++        return ArrayCompareToNode.compareTo(value, other, value.length, other.length, JavaKind.Byte, JavaKind.Char);
++    }
++
++}
+diff --git a/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64StringUTF16Substitutions.java b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64StringUTF16Substitutions.java
+new file mode 100644
+index 0000000000..ab22db35ac
+--- /dev/null
++++ b/src/jdk.internal.vm.compiler/share/classes/org.graalvm.compiler.replacements.sw64/src/org/graalvm/compiler/replacements/sw64/SW64StringUTF16Substitutions.java
+@@ -0,0 +1,65 @@
++/*
++ * Copyright (c) 2017, Oracle and/or its affiliates. All rights reserved.
++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
++ *
++ * This code is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License version 2 only, as
++ * published by the Free Software Foundation.
++ *
++ * This code is distributed in the hope that it will be useful, but WITHOUT
++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
++ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
++ * version 2 for more details (a copy is included in the LICENSE file that
++ * accompanied this code).
++ *
++ * You should have received a copy of the GNU General Public License version
++ * 2 along with this work; if not, write to the Free Software Foundation,
++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
++ *
++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
++ * or visit www.oracle.com if you need additional information or have any
++ * questions.
++ */
++
++
++package org.graalvm.compiler.replacements.sw64;
++
++import org.graalvm.compiler.api.replacements.ClassSubstitution;
++import org.graalvm.compiler.api.replacements.MethodSubstitution;
++import org.graalvm.compiler.replacements.nodes.ArrayCompareToNode;
++
++import jdk.vm.ci.meta.JavaKind;
++
++// JaCoCo Exclude
++
++/**
++ * Substitutions for {@code java.lang.StringUTF16} methods.
++ *
++ * Since JDK 9.
++ */
++@ClassSubstitution(className = "java.lang.StringUTF16", optional = true)
++public class SW64StringUTF16Substitutions {
++
++    /**
++     * @param value is char[]
++     * @param other is char[]
++     */
++    @MethodSubstitution
++    public static int compareTo(byte[] value, byte[] other) {
++        return ArrayCompareToNode.compareTo(value, other, value.length, other.length, JavaKind.Char, JavaKind.Char);
++    }
++
++    /**
++     * @param value is char[]
++     * @param other is byte[]
++     */
++    @MethodSubstitution
++    public static int compareToLatin1(byte[] value, byte[] other) {
++        /*
++         * Swapping array arguments because intrinsic expects order to be byte[]/char[] but kind
++         * arguments stay in original order.
++         */
++        return ArrayCompareToNode.compareTo(other, value, other.length, value.length, JavaKind.Char, JavaKind.Byte);
++    }
++
++}
+diff --git a/test/failure_handler/src/share/classes/jdk/test/failurehandler/jtreg/GatherDiagnosticInfoObserver.java b/test/failure_handler/src/share/classes/jdk/test/failurehandler/jtreg/GatherDiagnosticInfoObserver.java
+index ba80026174..bd6fb0b502 100644
+--- a/test/failure_handler/src/share/classes/jdk/test/failurehandler/jtreg/GatherDiagnosticInfoObserver.java
++++ b/test/failure_handler/src/share/classes/jdk/test/failurehandler/jtreg/GatherDiagnosticInfoObserver.java
+@@ -42,7 +42,7 @@ import java.util.Map;
+  * The jtreg test execution observer, which gathers info about
+  * system and dumps it to a file.
+  */
+-public class GatherDiagnosticInfoObserver implements Harness.Observer {
++ public class GatherDiagnosticInfoObserver implements Harness.Observer {
+     public static final String LOG_FILENAME = "environment.log";
+     public static final String ENVIRONMENT_OUTPUT = "environment.html";
+ 
+diff --git a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+index 126a43a900..33b99d98af 100644
+--- a/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
++++ b/test/hotspot/jtreg/testlibrary_tests/TestMutuallyExclusivePlatformPredicates.java
+@@ -45,7 +45,7 @@ import java.util.Set;
+  */
+ public class TestMutuallyExclusivePlatformPredicates {
+     private static enum MethodGroup {
+-        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86"),
++        ARCH("isAArch64", "isARM", "isPPC", "isS390x", "isSparc", "isX64", "isX86", "isSW64"),
+         BITNESS("is32bit", "is64bit"),
+         OS("isAix", "isLinux", "isOSX", "isSolaris", "isWindows"),
+         VM_TYPE("isClient", "isServer", "isGraal", "isMinimal", "isZero", "isEmbedded"),
+diff --git a/test/jdk/java/lang/invoke/MethodHandles/CatchExceptionTest.java b/test/jdk/java/lang/invoke/MethodHandles/CatchExceptionTest.java
+index b1c9d90264..f11adb43dd 100644
+--- a/test/jdk/java/lang/invoke/MethodHandles/CatchExceptionTest.java
++++ b/test/jdk/java/lang/invoke/MethodHandles/CatchExceptionTest.java
+@@ -46,7 +46,7 @@ import java.util.function.Supplier;
+  * @library /lib/testlibrary /java/lang/invoke/common /test/lib
+  * @build jdk.test.lib.TimeLimitedRunner
+  * @compile CatchExceptionTest.java
+- * @run main/othervm -esa test.java.lang.invoke.MethodHandles.CatchExceptionTest
++ * @run main/othervm/timeout=900000  -esa test.java.lang.invoke.MethodHandles.CatchExceptionTest
+  * @key intermittent randomness
+  */
+ public class CatchExceptionTest {
+diff --git a/test/jdk/java/util/concurrent/BlockingQueue/OfferDrainToLoops.java b/test/jdk/java/util/concurrent/BlockingQueue/OfferDrainToLoops.java
+index 7ab7567094..666c1e211c 100644
+--- a/test/jdk/java/util/concurrent/BlockingQueue/OfferDrainToLoops.java
++++ b/test/jdk/java/util/concurrent/BlockingQueue/OfferDrainToLoops.java
+@@ -155,7 +155,7 @@ public class OfferDrainToLoops {
+                 }}};
+ 
+         for (Thread thread : new Thread[] { offerer, drainer, scanner }) {
+-            thread.join(timeoutMillis + testDurationMillis);
++            thread.join(timeoutMillis*100 + testDurationMillis);
+             if (thread.isAlive()) {
+                 System.err.printf("Hung thread: %s%n", thread.getName());
+                 failed++;
+diff --git a/test/jdk/java/util/concurrent/ConcurrentQueues/OfferRemoveLoops.java b/test/jdk/java/util/concurrent/ConcurrentQueues/OfferRemoveLoops.java
+index de540abc5c..2ce510cead 100644
+--- a/test/jdk/java/util/concurrent/ConcurrentQueues/OfferRemoveLoops.java
++++ b/test/jdk/java/util/concurrent/ConcurrentQueues/OfferRemoveLoops.java
+@@ -156,7 +156,7 @@ public class OfferRemoveLoops {
+                 done.countDown();
+             }};
+ 
+-        if (! done.await(timeoutMillis + testDurationMillis, MILLISECONDS)) {
++        if (! done.await(timeoutMillis * 100 + testDurationMillis, MILLISECONDS)) {
+             for (Thread thread : new Thread[] { offerer, remover, scanner }) {
+                 if (thread.isAlive()) {
+                     System.err.printf("Hung thread: %s%n", thread.getName());
+diff --git a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+index 7990c49a1f..02a2bb1672 100644
+--- a/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
++++ b/test/jdk/jdk/jfr/event/os/TestCPUInformation.java
+@@ -54,8 +54,8 @@ public class TestCPUInformation {
+             Events.assertField(event, "hwThreads").atLeast(1);
+             Events.assertField(event, "cores").atLeast(1);
+             Events.assertField(event, "sockets").atLeast(1);
+-            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390");
+-            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390");
++            Events.assertField(event, "cpu").containsAny("Intel", "AMD", "Unknown x86", "sparc", "ARM", "PPC", "PowerPC", "AArch64", "s390", "Sw64");
++            Events.assertField(event, "description").containsAny("Intel", "AMD", "Unknown x86", "SPARC", "ARM", "PPC", "PowerPC", "AArch64", "s390", "Sw64");
+         }
+     }
+ }
+diff --git a/test/jdk/sun/misc/SunMiscSignalTest.java b/test/jdk/sun/misc/SunMiscSignalTest.java
+index dd45ec179b..b5fc75f630 100644
+--- a/test/jdk/sun/misc/SunMiscSignalTest.java
++++ b/test/jdk/sun/misc/SunMiscSignalTest.java
+@@ -141,6 +141,11 @@ public class SunMiscSignalTest {
+                 {"INFO", IsSupported.YES, CanRegister.YES, CanRaise.YES, invokedXrs},
+         };
+ 
++        Object[][] posixSW64Signals = {
++                {"BUS",  IsSupported.YES, CanRegister.YES, CanRaise.YES, invokedXrs},
++                {"INFO", IsSupported.YES, CanRegister.YES, CanRaise.YES, invokedXrs},
++        };
++
+         Object[][] windowsSignals = {
+                 {"HUP",  IsSupported.NO, CanRegister.NO, CanRaise.NO, Invoked.NO},
+                 {"QUIT", IsSupported.NO, CanRegister.NO, CanRaise.NO, Invoked.NO},
+@@ -164,8 +169,16 @@ public class SunMiscSignalTest {
+                 {"SYS",  IsSupported.NO, CanRegister.NO, CanRaise.NO, Invoked.NO},
+         };
+ 
+-        Object[][] combinedPosixSignals = concatArrays(posixSignals,
+-                                                       (Platform.isOSX() ? posixOSXSignals : posixNonOSXSignals));
++        Object[][] combinedPosixSignals;
++
++        if(Platform.isOSX()){
++            combinedPosixSignals = concatArrays(posixSignals, posixOSXSignals);
++        }else if(Platform.isSW64()){
++            combinedPosixSignals = concatArrays(posixSignals, posixSW64Signals);
++        }else{
++            combinedPosixSignals = concatArrays(posixSignals, posixNonOSXSignals);
++        }
++
+         return concatArrays(commonSignals, (Platform.isWindows() ? windowsSignals : combinedPosixSignals));
+     }
+ 
+diff --git a/test/lib/jdk/test/lib/Platform.java b/test/lib/jdk/test/lib/Platform.java
+index 6269373c2b..de14f9f536 100644
+--- a/test/lib/jdk/test/lib/Platform.java
++++ b/test/lib/jdk/test/lib/Platform.java
+@@ -229,6 +229,11 @@ public class Platform {
+         return isArch("(i386)|(x86(?!_64))");
+     }
+ 
++    public static boolean isSW64() {
++        // On Linux it's 'sw_64' or 'sw64'.
++        return isArch("(sw_64)|(sw64)");
++    }
++
+     public static String getOsArch() {
+         return osArch;
+     }
+diff --git a/version_patch.sh b/version_patch.sh
+new file mode 100755
+index 0000000000..70aa379f32
+--- /dev/null
++++ b/version_patch.sh
+@@ -0,0 +1,2 @@
++gitnum=`git log| head -n 1 |cut -b 8-15`
++sed -i 's/\$srcgitnumber/'$gitnum'/g' ./make/autoconf/jdk-version.m4
diff --git a/fix-potential-build-fail-in-jbolt.patch b/fix-potential-build-fail-in-jbolt.patch
new file mode 100644
index 0000000000000000000000000000000000000000..7d1aa5c81fc3fbf7846988285c3db7e697605f98
--- /dev/null
+++ b/fix-potential-build-fail-in-jbolt.patch
@@ -0,0 +1,129 @@
+From d84a998248d3403e093ec14abf2daf3e1bd8dd25 Mon Sep 17 00:00:00 2001
+Subject: fix potential build fail in jbolt
+
+---
+ make/hotspot/lib/JvmFeatures.gmk               | 2 ++
+ src/hotspot/os/linux/os_linux.cpp              | 5 +++++
+ src/hotspot/share/ci/ciEnv.cpp                 | 2 ++
+ src/hotspot/share/jbolt/jBoltDcmds.cpp         | 9 +++++++--
+ src/hotspot/share/jfr/periodic/jfrPeriodic.cpp | 2 ++
+ src/hotspot/share/runtime/sweeper.cpp          | 2 +-
+ src/hotspot/share/utilities/macros.hpp         | 3 +++
+ 7 files changed, 22 insertions(+), 3 deletions(-)
+
+diff --git a/make/hotspot/lib/JvmFeatures.gmk b/make/hotspot/lib/JvmFeatures.gmk
+index 7b86f83cc..55bd4ebd1 100644
+--- a/make/hotspot/lib/JvmFeatures.gmk
++++ b/make/hotspot/lib/JvmFeatures.gmk
+@@ -174,6 +174,8 @@ endif
+ ifneq ($(call check-jvm-feature, jfr), true)
+   JVM_CFLAGS_FEATURES += -DINCLUDE_JFR=0
+   JVM_EXCLUDE_PATTERNS += jfr
++  JVM_CFLAGS_FEATURES += -DINCLUDE_JBOLT=0
++  JVM_EXCLUDE_PATTERNS += jbolt
+ endif
+ 
+ ################################################################################
+diff --git a/src/hotspot/os/linux/os_linux.cpp b/src/hotspot/os/linux/os_linux.cpp
+index 0acc6a57f..52f65baba 100644
+--- a/src/hotspot/os/linux/os_linux.cpp
++++ b/src/hotspot/os/linux/os_linux.cpp
+@@ -5703,6 +5703,11 @@ void os::Linux::load_plugin_library() {
+     }
+ #endif // INCLUDE_JBOLT   
+   }
++
++  JBOLT_ONLY(log_debug(jbolt)("Plugin library for JBolt: %s %s %s %s", BOOL_TO_STR(_jboltHeap_init != NULL),
++                                                         BOOL_TO_STR(_jboltLog_precalc != NULL),
++                                                         BOOL_TO_STR(_jboltLog_do != NULL),
++                                                         BOOL_TO_STR(_jboltMerge_judge != NULL));)
+ }
+  
+ // this is called _after_ the global arguments have been parsed
+diff --git a/src/hotspot/share/ci/ciEnv.cpp b/src/hotspot/share/ci/ciEnv.cpp
+index f66926600..5a71a567e 100644
+--- a/src/hotspot/share/ci/ciEnv.cpp
++++ b/src/hotspot/share/ci/ciEnv.cpp
+@@ -1048,7 +1048,9 @@ void ciEnv::register_method(ciMethod* target,
+                                 frame_words, oop_map_set,
+                                 handler_table, inc_table,
+                                 compiler, task()->comp_level(),
++#if INCLUDE_JVMCI
+                                 NULL, NULL,
++#endif
+                                 code_blob_type);
+     } else
+ #endif // INCLUDE_JBOLT
+diff --git a/src/hotspot/share/jbolt/jBoltDcmds.cpp b/src/hotspot/share/jbolt/jBoltDcmds.cpp
+index 249a98001..d57eea39b 100644
+--- a/src/hotspot/share/jbolt/jBoltDcmds.cpp
++++ b/src/hotspot/share/jbolt/jBoltDcmds.cpp
+@@ -24,7 +24,8 @@
+ #include "jbolt/jBoltDcmds.hpp"
+ #include "jbolt/jBoltControlThread.hpp"
+ #include "jbolt/jBoltManager.hpp"
+- 
++#include "runtime/os.hpp"
++
+ bool register_jbolt_dcmds() {
+   uint32_t full_export = DCmd_Source_Internal | DCmd_Source_AttachAPI | DCmd_Source_MBean;
+   DCmdFactory::register_DCmdFactory(new DCmdFactoryImpl<JBoltStartDCmd>(full_export, true, false));
+@@ -199,8 +200,12 @@ void JBoltDumpDCmd::execute(DCmdSource source, TRAPS) {
+       output()->print_cr("Failed: File open error or NULL: %s", path);
+       break;
+     case JBoltOK:
+-      rp = realpath(path, buffer);
++#ifdef  __linux__
++      rp = os::Posix::realpath(path, buffer, sizeof(buffer));
+       output()->print_cr("Successful: Dump to %s", buffer);
++#else
++      output()->print_cr("Successful: Dump to %s", path);
++#endif
+       break;
+     default:
+       ShouldNotReachHere();
+diff --git a/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp b/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp
+index d9580e57e..a010df8d0 100644
+--- a/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp
++++ b/src/hotspot/share/jfr/periodic/jfrPeriodic.cpp
+@@ -557,8 +557,10 @@ TRACE_REQUEST_FUNC(CodeCacheConfiguration) {
+   event.set_nonNMethodSize(NonNMethodCodeHeapSize);
+   event.set_profiledSize(ProfiledCodeHeapSize);
+   event.set_nonProfiledSize(NonProfiledCodeHeapSize);
++#if INCLUDE_JBOLT
+   event.set_jboltHotSize(JBoltCodeHeapSize);
+   event.set_jboltTmpSize(JBoltCodeHeapSize);
++#endif
+   event.set_expansionSize(CodeCacheExpansionSize);
+   event.set_minBlockLength(CodeCacheMinBlockLength);
+   event.set_startAddress((u8)CodeCache::low_bound());
+diff --git a/src/hotspot/share/runtime/sweeper.cpp b/src/hotspot/share/runtime/sweeper.cpp
+index 82f25c50b..fd174d720 100644
+--- a/src/hotspot/share/runtime/sweeper.cpp
++++ b/src/hotspot/share/runtime/sweeper.cpp
+@@ -378,7 +378,7 @@ void NMethodSweeper::possibly_sweep() {
+   // allocations go to the non-profiled heap and we must be make sure that there is
+   // enough space.
+   double free_percent = 1 / CodeCache::reverse_free_ratio(CodeBlobType::MethodNonProfiled) * 100;
+-  if (free_percent <= StartAggressiveSweepingAt || (UseJBolt && JBoltManager::force_sweep())) {
++  if (free_percent <= StartAggressiveSweepingAt JBOLT_ONLY( || (UseJBolt && JBoltManager::force_sweep()) )) {
+     do_stack_scanning();
+   }
+ 
+diff --git a/src/hotspot/share/utilities/macros.hpp b/src/hotspot/share/utilities/macros.hpp
+index 6dd187652..638d73e6b 100644
+--- a/src/hotspot/share/utilities/macros.hpp
++++ b/src/hotspot/share/utilities/macros.hpp
+@@ -297,6 +297,9 @@
+ #define JFR_ONLY(code)
+ #define NOT_JFR_RETURN()      {}
+ #define NOT_JFR_RETURN_(code) { return code; }
++#if INCLUDE_JBOLT
++#define INCLUDE_JBOLT 0 // INCLUDE_JBOLT depends on INCLUDE_JFR
++#endif
+ #endif
+ 
+ #ifndef INCLUDE_JVMCI
+-- 
+2.23.0
+
diff --git a/jdk-updates-jdk11u-jdk-11.0.27-ga.tar.xz b/jdk-updates-jdk11u-jdk-11.0.27-ga.tar.xz
new file mode 100644
index 0000000000000000000000000000000000000000..79279c7beddc9d5d04bbb6c59bc7ff4da22d4a60
Binary files /dev/null and b/jdk-updates-jdk11u-jdk-11.0.27-ga.tar.xz differ
diff --git a/openjdk-11.spec b/openjdk-11.spec
index 58a2fc1c7a764078bdb4a3c29a6ea31391633dc8..28b0ddc7bc8928d6d1b99d2446daceee139f1570 100644
--- a/openjdk-11.spec
+++ b/openjdk-11.spec
@@ -123,12 +123,15 @@
 %ifarch %{ppc64le}
 %global archinstall ppc64le
 %endif
+%ifarch sw_64
+%global archinstall sw64
+%endif
 
 %global with_systemtap 1
 
 # New Version-String scheme-style defines
 %global majorver 11
-%global securityver 26
+%global securityver 27
 # buildjdkver is usually same as %%{majorver},
 # but in time of bootstrap of next jdk, it is majorver-1,
 # and this it is better to change it here, on single place
@@ -149,12 +152,12 @@
 %global origin_nice     OpenJDK
 %global top_level_dir_name   %{origin}
 %global minorver        0
-%global buildver        4
+%global buildver        6
 %global patchver	0
 
 %global project		jdk-updates
 %global repo		jdk11u
-%global revision	jdk-11.0.26-ga
+%global revision	jdk-11.0.27-ga
 %global full_revision %{project}-%{repo}-%{revision}
 # priority must be 7 digits in total
 # setting to 1, so debug ones can have 0
@@ -459,7 +462,9 @@ exit 0
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/rmiregistry
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/unpack200
 %dir %{_jvmdir}/%{sdkdir -- %{?1}}/lib
+%ifnarch sw_64
 %{_jvmdir}/%{sdkdir -- %{?1}}/lib/classlist
+%endif
 %{_jvmdir}/%{sdkdir -- %{?1}}/lib/jexec
 %{_jvmdir}/%{sdkdir -- %{?1}}/lib/jspawnhelper
 %{_jvmdir}/%{sdkdir -- %{?1}}/lib/jrt-fs.jar
@@ -576,7 +581,7 @@ exit 0
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jstatd
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/rmic
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/serialver
-%ifnarch loongarch64
+%ifnarch loongarch64 sw_64
 %ifarch %{aarch64} x86_64
 %{_jvmdir}/%{sdkdir -- %{?1}}/bin/jaotc
 %endif
@@ -902,6 +907,12 @@ Patch93: Cache-byte-when-constructing-String-with-duplicate-c.patch
 
 # 11.0.25
 Patch94: change-ActivePrcoessorCount-only-for-HBase.patch
+
+# 11.0.26
+Patch96: add-jbolt-feature.patch
+Patch97: fix-potential-build-fail-in-jbolt.patch
+Patch98: 8352716-tz-Update-Timezone-Data-to-2025b.patch
+
 ############################################
 #
 # riscv64 specific patches
@@ -909,6 +920,13 @@ Patch94: change-ActivePrcoessorCount-only-for-HBase.patch
 ############################################
 Patch2000: Add-riscv64-support.patch
 
+############################################
+#
+# sw64 specific patches
+#
+############################################
+Patch6000: add-sw_64-support.patch
+
 BuildRequires: elfutils-extra
 BuildRequires: autoconf
 BuildRequires: alsa-lib-devel
@@ -1140,7 +1158,7 @@ fi
 pushd %{top_level_dir_name}
 
 # OpenJDK patches
-%ifnarch loongarch64 ppc64le
+%ifnarch loongarch64 ppc64le sw_64
 %ifarch riscv64
 %patch2000 -p1
 %else
@@ -1195,11 +1213,17 @@ pushd %{top_level_dir_name}
 %patch92 -p1
 %patch93 -p1
 %patch94 -p1
+%patch96 -p1
+%patch97 -p1
+%patch98 -p1
 %endif
 %endif
 %ifarch loongarch64
 %patch2001 -p1
 %endif
+%ifarch sw_64
+%patch6000 -p1
+%endif
 popd # openjdk
 
 # Extract systemtap tapsets
@@ -1258,7 +1282,7 @@ export NUM_PROC=${NUM_PROC:-1}
 [ ${NUM_PROC} -gt %{?_smp_ncpus_max} ] && export NUM_PROC=%{?_smp_ncpus_max}
 %endif
 
-%ifarch %{aarch64} riscv64
+%ifarch %{aarch64} riscv64 sw_64
 export ARCH_DATA_MODEL=64
 %endif
 
@@ -1290,7 +1314,9 @@ bash ../configure \
     --with-version-opt="" \
     --with-vendor-version-string="%{vendor_version_string}" \
 %ifnarch loongarch64 ppc64le
-    --with-vendor-name="BiSheng" \
+    %ifnarch sw_64
+        --with-vendor-name="BiSheng" \
+    %endif
 %endif
 %ifarch loongarch64
     --with-vendor-name="Loongson" \
@@ -1301,17 +1327,29 @@ bash ../configure \
     --with-boot-jdk=/usr/lib/jvm/java-%{buildjdkver}-openjdk \
     --with-debug-level=$debugbuild \
     --with-native-debug-symbols=internal \
+%ifarch sw_64
+    --with-jvm-variants=custom \
+    --with-jvm-features=serialgc,vm-structs,parallelgc,compiler2,management,nmt,g1gc,cmsgc,jvmti,services,jni-check,jfr \
+    --with-zlib=bundled \
+%else
     --enable-unlimited-crypto \
     --with-zlib=system \
+%endif
     --with-libjpeg=system \
     --with-giflib=system \
     --with-libpng=system \
     --with-lcms=system \
     --with-harfbuzz=system \
     --with-stdc++lib=dynamic \
+%ifarch sw_64
+    --with-extra-cflags="  -mieee -Wno-error=maybe-uninitialized -Wno-error=deprecated-declarations -Wno-error=type-limits -Wno-error=format-security -Wno-error=conversion-null -Wno-error=sign-compare -Wno-error=int-to-pointer-cast -mgprel-size=32" \
+    --with-extra-cxxflags="-mieee -Wno-error=maybe-uninitialized -Wno-error=deprecated-declarations -Wno-error=type-limits -Wno-error=format-security -Wno-error=conversion-null -Wno-error=sign-compare -Wno-error=int-to-pointer-cast -mgprel-size=32" \
+    --with-extra-ldflags=" -mieee -Wl,-no-relax" \
+%else
     --with-extra-cxxflags="$EXTRA_CPP_FLAGS" \
     --with-extra-cflags="$EXTRA_CFLAGS" \
     --with-extra-ldflags="%{ourldflags}" \
+%endif
     --with-num-cores="$NUM_PROC" \
     --disable-javac-server \
     --disable-warnings-as-errors \
@@ -1715,6 +1753,21 @@ cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect
 
 
 %changelog
+* Tue Apr 29 2025 wulongyao <wulongyao1@huawei.com> - 1:11.0.27.6-1
+- add 8352716-tz-Update-Timezone-Data-to-2025b.patch
+
+* Thu Apr 17 2025 yangyingqing <yangyingqing@h-partners.com> - 1:11.0.27.6-0
+- update to 11.0.27+6(GA)
+- delete 8347965-tz-Update-Timezone-Data-to-2025a.patch
+- add fix-potential-build-fail-in-jbolt.patch
+
+* Tue Apr 1 2025 2025 swcompiler <lc@wxiat.cn> - 1:11.0.26.4-3
+- add sw64 port to 11.0.26
+
+* Tue Mar 04 2025 eapen <zhangyipeng7@huawei.com> - 1:11.0.26.4-2
+- (tz) update Timezone Data to 2025a
+- add jbolt feature
+
 * Wed Feb 12 2025 Dingli Zhang <dingli@iscas.ac.cn> - 1:11.0.26.4-1
 - update riscv64 port to 11.0.26
 
@@ -1746,7 +1799,7 @@ cjc.mainProgram(args) -- the returns from copy_jdk_configs.lua should not affect
 * Thu Aug 1 2024 aoqi <aoqi@loongson.cn> - 1.11.0.24.8-5
 - update LoongArch64 port to 11.0.24
 
-* Thu Jul 29 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-4
+* Mon Jul 29 2024 DXwangg <wangjiawei80@huawei.com> - 1.11.0.24.8-4
 - modified delete_expired_certificates.patch
 
 * Thu Jul 25 2024 songliyang <songliyang@kylinos.cn> - 1.11.0.24.8-3