diff --git a/8046294-Generate-the-4-byte-timestamp-randomly.patch b/8046294-Generate-the-4-byte-timestamp-randomly.patch
new file mode 100644
index 0000000000000000000000000000000000000000..c0477ed090c392f6198df05cbff97293d89b704c
--- /dev/null
+++ b/8046294-Generate-the-4-byte-timestamp-randomly.patch
@@ -0,0 +1,87 @@
+diff --git a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
+index 5f414c408..ce27f0df4 100644
+--- a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
++++ b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 1996, 2007, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -41,21 +41,8 @@ final class RandomCookie {
+     byte random_bytes[];  // exactly 32 bytes
+ 
+     RandomCookie(SecureRandom generator) {
+-        long temp = System.currentTimeMillis() / 1000;
+-        int gmt_unix_time;
+-        if (temp < Integer.MAX_VALUE) {
+-            gmt_unix_time = (int) temp;
+-        } else {
+-            gmt_unix_time = Integer.MAX_VALUE;          // Whoops!
+-        }
+-
+         random_bytes = new byte[32];
+         generator.nextBytes(random_bytes);
+-
+-        random_bytes[0] = (byte)(gmt_unix_time >> 24);
+-        random_bytes[1] = (byte)(gmt_unix_time >> 16);
+-        random_bytes[2] = (byte)(gmt_unix_time >>  8);
+-        random_bytes[3] = (byte)gmt_unix_time;
+     }
+ 
+     RandomCookie(HandshakeInStream m) throws IOException {
+@@ -68,22 +55,15 @@ final class RandomCookie {
+     }
+ 
+     void print(PrintStream s) {
+-        int i, gmt_unix_time;
+-
+-        gmt_unix_time = random_bytes[0] << 24;
+-        gmt_unix_time += random_bytes[1] << 16;
+-        gmt_unix_time += random_bytes[2] << 8;
+-        gmt_unix_time += random_bytes[3];
+-
+-        s.print("GMT: " + gmt_unix_time + " ");
+-        s.print("bytes = { ");
+-
+-        for (i = 4; i < 32; i++) {
+-            if (i != 4) {
+-                s.print(", ");
++        s.print("random_bytes = {");
++        for (int i = 0; i < 32; i++) {
++            int k = random_bytes[i] & 0xFF;
++            if (i != 0) {
++                s.print(' ');
+             }
+-            s.print(random_bytes[i] & 0x0ff);
++            s.print(Utilities.hexDigits[k >>> 4]);
++            s.print(Utilities.hexDigits[k & 0xf]);
+         }
+-        s.println(" }");
++        s.println("}");
+     }
+ }
+diff --git a/jdk/src/share/classes/sun/security/ssl/Utilities.java b/jdk/src/share/classes/sun/security/ssl/Utilities.java
+index aefb02c9a..9b267f6e1 100644
+--- a/jdk/src/share/classes/sun/security/ssl/Utilities.java
++++ b/jdk/src/share/classes/sun/security/ssl/Utilities.java
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved.
++ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved.
+  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+  *
+  * This code is free software; you can redistribute it and/or modify it
+@@ -33,6 +33,11 @@ import sun.net.util.IPAddressUtil;
+  * A utility class to share the static methods.
+  */
+ final class Utilities {
++    /**
++     * hex digits
++     */
++    static final char[] hexDigits = "0123456789ABCDEF".toCharArray();
++
+     /**
+      * Puts {@code hostname} into the {@code serverNames} list.
+      * <P>
diff --git a/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch b/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
new file mode 100644
index 0000000000000000000000000000000000000000..2c65f8243d8406b2c03da315d0ba72189bd4aa9e
--- /dev/null
+++ b/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
@@ -0,0 +1,28 @@
+diff --git a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
+index 65a441240..1e534d3da 100644
+--- a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
++++ b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp
+@@ -71,10 +71,20 @@ bool frame::safe_for_sender(JavaThread *thread) {
+     return false;
+   }
+ 
+-  // unextended sp must be within the stack and above or equal sp
+-  bool unextended_sp_safe = (unextended_sp < thread->stack_base()) &&
+-                            (unextended_sp >= sp);
++  // When we are running interpreted code the machine stack pointer, SP, is
++  // set low enough so that the Java expression stack can grow and shrink
++  // without ever exceeding the machine stack bounds.  So, ESP >= SP.
+ 
++  // When we call out of an interpreted method, SP is incremented so that
++  // the space between SP and ESP is removed.  The SP saved in the callee's
++  // frame is the SP *before* this increment.  So, when we walk a stack of
++  // interpreter frames the sender's SP saved in a frame might be less than
++  // the SP at the point of call.
++
++  // So unextended sp must be within the stack but we need not to check
++  // that unextended sp >= sp
++
++  bool unextended_sp_safe = (unextended_sp < thread->stack_base());
+   if (!unextended_sp_safe) {
+     return false;
+   }
diff --git a/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch b/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
new file mode 100644
index 0000000000000000000000000000000000000000..175682216553c831c1d0595ffc5ed073911d7c5f
--- /dev/null
+++ b/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
@@ -0,0 +1,558 @@
+diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp
+index de6d443cd..abbd4449f 100644
+--- a/hotspot/src/share/vm/opto/chaitin.hpp
++++ b/hotspot/src/share/vm/opto/chaitin.hpp
+@@ -111,9 +111,9 @@ public:
+     _msize_valid=1;
+     if (_is_vector) {
+       assert(!_fat_proj, "sanity");
+-      _mask.verify_sets(_num_regs);
++      assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets");
+     } else if (_num_regs == 2 && !_fat_proj) {
+-      _mask.verify_pairs();
++      assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs");
+     }
+ #endif
+   }
+diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp
+index 352ccfb9d..d92f09eb6 100644
+--- a/hotspot/src/share/vm/opto/regmask.cpp
++++ b/hotspot/src/share/vm/opto/regmask.cpp
+@@ -74,7 +74,8 @@ int find_lowest_bit( uint32 mask ) {
+ }
+ 
+ // Find highest 1, or return 32 if empty
+-int find_hihghest_bit( uint32 mask ) {
++int find_highest_bit( uint32 mask ) {
++  assert(mask != 0, "precondition");
+   int n = 0;
+   if( mask > 0xffff ) {
+     mask >>= 16;
+@@ -167,13 +168,14 @@ OptoReg::Name RegMask::find_first_pair() const {
+ //------------------------------ClearToPairs-----------------------------------
+ // Clear out partial bits; leave only bit pairs
+ void RegMask::clear_to_pairs() {
+-  for( int i = 0; i < RM_SIZE; i++ ) {
++  assert(valid_watermarks(), "sanity");
++  for( int i = _lwm; i < _hwm; i++ ) {
+     int bits = _A[i];
+     bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
+     bits |= (bits>>1);          // Smear 1 hi-bit into a pair
+     _A[i] = bits;
+   }
+-  verify_pairs();
++  assert(is_aligned_pairs(), "mask is not aligned, adjacent pairs");
+ }
+ 
+ //------------------------------SmearToPairs-----------------------------------
+@@ -188,10 +190,14 @@ void RegMask::smear_to_pairs() {
+   verify_pairs();
+ }
+ 
+-//------------------------------is_aligned_pairs-------------------------------
++bool RegMask::is_misaligned_pair() const {
++  return Size() == 2 && !is_aligned_pairs();
++}
++
+ bool RegMask::is_aligned_pairs() const {
+   // Assert that the register mask contains only bit pairs.
+-  for( int i = 0; i < RM_SIZE; i++ ) {
++  assert(valid_watermarks(), "sanity");
++  for( int i = _lwm; i < _hwm; i++ ) {
+     int bits = _A[i];
+     while( bits ) {             // Check bits for pairing
+       int bit = bits & -bits;   // Extract low bit
+@@ -206,39 +212,28 @@ bool RegMask::is_aligned_pairs() const {
+   return true;
+ }
+ 
+-//------------------------------is_bound1--------------------------------------
+-// Return TRUE if the mask contains a single bit
+-int RegMask::is_bound1() const {
+-  if( is_AllStack() ) return false;
+-  int bit = -1;                 // Set to hold the one bit allowed
+-  for( int i = 0; i < RM_SIZE; i++ ) {
+-    if( _A[i] ) {               // Found some bits
+-      if( bit != -1 ) return false; // Already had bits, so fail
+-      bit = _A[i] & -_A[i];     // Extract 1 bit from mask
+-      if( bit != _A[i] ) return false; // Found many bits, so fail
+-    }
+-  }
+-  // True for both the empty mask and for a single bit
+-  return true;
++bool RegMask::is_bound1() const {
++  if (is_AllStack()) return false;
++  return Size() == 1;
+ }
+ 
+ //------------------------------is_bound2--------------------------------------
+ // Return TRUE if the mask contains an adjacent pair of bits and no other bits.
+-int RegMask::is_bound_pair() const {
++bool RegMask::is_bound_pair() const {
+   if( is_AllStack() ) return false;
+-
++  assert(valid_watermarks(), "sanity");
+   int bit = -1;                 // Set to hold the one bit allowed
+-  for( int i = 0; i < RM_SIZE; i++ ) {
+-    if( _A[i] ) {               // Found some bits
+-      if( bit != -1 ) return false; // Already had bits, so fail
+-      bit = _A[i] & -(_A[i]);   // Extract 1 bit from mask
+-      if( (bit << 1) != 0 ) {   // Bit pair stays in same word?
++  for( int i = _lwm; i <= _hwm; i++ ) {
++    if( _A[i] ) {                 // Found some bits
++      if( bit != -1) return false; // Already had bits, so fail
++      bit = _A[i] & -(_A[i]);      // Extract 1 bit from mask
++      if( (bit << 1) != 0 ) {       // Bit pair stays in same word?
+         if( (bit | (bit<<1)) != _A[i] )
+-          return false;         // Require adjacent bit pair and no more bits
+-      } else {                  // Else its a split-pair case
++          return false;            // Require adjacent bit pair and no more bits
++      } else {                     // Else its a split-pair case
+         if( bit != _A[i] ) return false; // Found many bits, so fail
+-        i++;                    // Skip iteration forward
+-        if( i >= RM_SIZE || _A[i] != 1 )
++        i++;                       // Skip iteration forward
++        if( i > _hwm || _A[i] != 1 )
+           return false; // Require 1 lo bit in next word
+       }
+     }
+@@ -247,31 +242,44 @@ int RegMask::is_bound_pair() const {
+   return true;
+ }
+ 
++// Test for a single adjacent set of ideal register's size.
++bool RegMask::is_bound(uint ireg) const {
++  if (is_vector(ireg)) {
++    if (is_bound_set(num_registers(ireg)))
++      return true;
++  } else if (is_bound1() || is_bound_pair()) {
++    return true;
++  }
++  return false;
++}
++
++
++
+ static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 };
+-//------------------------------find_first_set---------------------------------
++
+ // Find the lowest-numbered register set in the mask.  Return the
+ // HIGHEST register number in the set, or BAD if no sets.
+ // Works also for size 1.
+ OptoReg::Name RegMask::find_first_set(const int size) const {
+-  verify_sets(size);
+-  for (int i = 0; i < RM_SIZE; i++) {
++  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
++  assert(valid_watermarks(), "sanity");
++  for (int i = _lwm; i <= _hwm; i++) {
+     if (_A[i]) {                // Found some bits
+-      int bit = _A[i] & -_A[i]; // Extract low bit
+       // Convert to bit number, return hi bit in pair
+-      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1));
++      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(_A[i])+(size-1));
+     }
+   }
+   return OptoReg::Bad;
+ }
+ 
+-//------------------------------clear_to_sets----------------------------------
+ // Clear out partial bits; leave only aligned adjacent bit pairs
+ void RegMask::clear_to_sets(const int size) {
+   if (size == 1) return;
+   assert(2 <= size && size <= 8, "update low bits table");
+   assert(is_power_of_2(size), "sanity");
++  assert(valid_watermarks(), "sanity");
+   int low_bits_mask = low_bits[size>>2];
+-  for (int i = 0; i < RM_SIZE; i++) {
++  for (int i = _lwm; i <= _hwm; i++) {
+     int bits = _A[i];
+     int sets = (bits & low_bits_mask);
+     for (int j = 1; j < size; j++) {
+@@ -286,17 +294,17 @@ void RegMask::clear_to_sets(const int size) {
+     }
+     _A[i] = sets;
+   }
+-  verify_sets(size);
++  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+ }
+ 
+-//------------------------------smear_to_sets----------------------------------
+ // Smear out partial bits to aligned adjacent bit sets
+ void RegMask::smear_to_sets(const int size) {
+   if (size == 1) return;
+   assert(2 <= size && size <= 8, "update low bits table");
+   assert(is_power_of_2(size), "sanity");
++  assert(valid_watermarks(), "sanity");
+   int low_bits_mask = low_bits[size>>2];
+-  for (int i = 0; i < RM_SIZE; i++) {
++  for (int i = _lwm; i <= _hwm; i++) {
+     int bits = _A[i];
+     int sets = 0;
+     for (int j = 0; j < size; j++) {
+@@ -312,17 +320,17 @@ void RegMask::smear_to_sets(const int size) {
+     }
+     _A[i] = sets;
+   }
+-  verify_sets(size);
++  assert(is_aligned_sets(size), "mask is not aligned, adjacent sets");
+ }
+ 
+-//------------------------------is_aligned_set--------------------------------
++// Assert that the register mask contains only bit sets.
+ bool RegMask::is_aligned_sets(const int size) const {
+   if (size == 1) return true;
+   assert(2 <= size && size <= 8, "update low bits table");
+   assert(is_power_of_2(size), "sanity");
+   int low_bits_mask = low_bits[size>>2];
+-  // Assert that the register mask contains only bit sets.
+-  for (int i = 0; i < RM_SIZE; i++) {
++  assert(valid_watermarks(), "sanity");
++  for (int i = _lwm; i <= _hwm; i++) {
+     int bits = _A[i];
+     while (bits) {              // Check bits for pairing
+       int bit = bits & -bits;   // Extract low bit
+@@ -339,14 +347,14 @@ bool RegMask::is_aligned_sets(const int size) const {
+   return true;
+ }
+ 
+-//------------------------------is_bound_set-----------------------------------
+ // Return TRUE if the mask contains one adjacent set of bits and no other bits.
+ // Works also for size 1.
+ int RegMask::is_bound_set(const int size) const {
+   if( is_AllStack() ) return false;
+   assert(1 <= size && size <= 8, "update low bits table");
++  assert(valid_watermarks(), "sanity");
+   int bit = -1;                 // Set to hold the one bit allowed
+-  for (int i = 0; i < RM_SIZE; i++) {
++  for (int i = _lwm; i <= _hwm; i++) {
+     if (_A[i] ) {               // Found some bits
+       if (bit != -1)
+        return false;            // Already had bits, so fail
+@@ -364,7 +372,7 @@ int RegMask::is_bound_set(const int size) const {
+         int set = bit>>24;
+         set = set & -set; // Remove sign extension.
+         set = (((set << size) - 1) >> 8);
+-        if (i >= RM_SIZE || _A[i] != set)
++	if (i > _hwm || _A[i] != set)
+           return false; // Require expected low bits in next word
+       }
+     }
+@@ -373,7 +381,6 @@ int RegMask::is_bound_set(const int size) const {
+   return true;
+ }
+ 
+-//------------------------------is_UP------------------------------------------
+ // UP means register only, Register plus stack, or stack only is DOWN
+ bool RegMask::is_UP() const {
+   // Quick common case check for DOWN (any stack slot is legal)
+@@ -386,22 +393,22 @@ bool RegMask::is_UP() const {
+   return true;
+ }
+ 
+-//------------------------------Size-------------------------------------------
+ // Compute size of register mask in bits
+ uint RegMask::Size() const {
+   extern uint8 bitsInByte[256];
+   uint sum = 0;
+-  for( int i = 0; i < RM_SIZE; i++ )
++  assert(valid_watermarks(), "sanity");
++  for( int i = _lwm; i <= _hwm; i++ ) {
+     sum +=
+       bitsInByte[(_A[i]>>24) & 0xff] +
+       bitsInByte[(_A[i]>>16) & 0xff] +
+       bitsInByte[(_A[i]>> 8) & 0xff] +
+       bitsInByte[ _A[i]      & 0xff];
++  }
+   return sum;
+ }
+ 
+ #ifndef PRODUCT
+-//------------------------------print------------------------------------------
+ void RegMask::dump(outputStream *st) const {
+   st->print("[");
+   RegMask rm = *this;           // Structure copy into local temp
+diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp
+index 5ceebb3fb..6cef16ad7 100644
+--- a/hotspot/src/share/vm/opto/regmask.hpp
++++ b/hotspot/src/share/vm/opto/regmask.hpp
+@@ -44,27 +44,12 @@
+ # include "adfiles/adGlobals_ppc_64.hpp"
+ #endif
+ 
+-// Some fun naming (textual) substitutions:
+-//
+-// RegMask::get_low_elem() ==> RegMask::find_first_elem()
+-// RegMask::Special        ==> RegMask::Empty
+-// RegMask::_flags         ==> RegMask::is_AllStack()
+-// RegMask::operator<<=()  ==> RegMask::Insert()
+-// RegMask::operator>>=()  ==> RegMask::Remove()
+-// RegMask::Union()        ==> RegMask::OR
+-// RegMask::Inter()        ==> RegMask::AND
+-//
+-// OptoRegister::RegName   ==> OptoReg::Name
+-//
+-// OptoReg::stack0()       ==> _last_Mach_Reg  or ZERO in core version
+-//
+-// numregs in chaitin      ==> proper degree in chaitin
+ 
+ //-------------Non-zero bit search methods used by RegMask---------------------
+ // Find lowest 1, or return 32 if empty
+ int find_lowest_bit( uint32 mask );
+ // Find highest 1, or return 32 if empty
+-int find_hihghest_bit( uint32 mask );
++int find_highest_bit( uint32 mask );
+ 
+ //------------------------------RegMask----------------------------------------
+ // The ADL file describes how to print the machine-specific registers, as well
+@@ -97,6 +82,12 @@ class RegMask VALUE_OBJ_CLASS_SPEC {
+ 
+ public:
+   enum { CHUNK_SIZE = RM_SIZE*_WordBits };
++  // The low and high water marks represents the lowest and highest word
++  // that might contain set register mask bits, respectively. We guarantee
++  // that there are no bits in words outside this range, but any word at
++  // and between the two marks can still be 0.
++  int _lwm;
++  int _hwm;
+ 
+   // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
+   // Also, consider the maximum alignment size for a normally allocated
+@@ -126,13 +117,21 @@ public:
+ #   define BODY(I) _A[I] = a##I;
+     FORALL_BODY
+ #   undef BODY
++    _lwm = 0;
++    _hwm = RM_SIZE - 1;
++    while (_hwm > 0 && _A[_hwm] == 0) _hwm--;
++    while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++;
++    assert(valid_watermarks(), "post-condition");
+   }
+ 
+   // Handy copying constructor
+   RegMask( RegMask *rm ) {
+-#   define BODY(I) _A[I] = rm->_A[I];
+-    FORALL_BODY
+-#   undef BODY
++    _hwm = rm->_hwm;
++    _lwm = rm->_lwm;
++    for (int i = 0; i < RM_SIZE; i++) {
++      _A[i] = rm->_A[i];
++    }
++    assert(valid_watermarks(), "post-condition");
+   }
+ 
+   // Construct an empty mask
+@@ -162,30 +161,36 @@ public:
+ 
+   // Test for being a not-empty mask.
+   int is_NotEmpty( ) const {
++    assert(valid_watermarks(), "sanity");
+     int tmp = 0;
+-#   define BODY(I) tmp |= _A[I];
+-    FORALL_BODY
+-#   undef BODY
++    for (int i = _lwm; i <= _hwm; i++) {
++      tmp |= _A[i];
++    }
+     return tmp;
+   }
+ 
+   // Find lowest-numbered register from mask, or BAD if mask is empty.
+   OptoReg::Name find_first_elem() const {
+-    int base, bits;
+-#   define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
+-    FORALL_BODY
+-#   undef BODY
+-      { base = OptoReg::Bad; bits = 1<<0; }
+-    return OptoReg::Name(base + find_lowest_bit(bits));
++    assert(valid_watermarks(), "sanity");
++    for (int i = _lwm; i <= _hwm; i++) {
++      int bits = _A[i];
++      if (bits) {
++        return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits));
++      }
++    }
++    return OptoReg::Name(OptoReg::Bad);
+   }
++
+   // Get highest-numbered register from mask, or BAD if mask is empty.
+   OptoReg::Name find_last_elem() const {
+-    int base, bits;
+-#   define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
+-    FORALL_BODY
+-#   undef BODY
+-      { base = OptoReg::Bad; bits = 1<<0; }
+-    return OptoReg::Name(base + find_hihghest_bit(bits));
++    assert(valid_watermarks(), "sanity");
++    for (int i = _hwm; i >= _lwm; i--) {
++      int bits = _A[i];
++      if (bits) {
++        return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits));
++      }
++    }
++    return OptoReg::Name(OptoReg::Bad);
+   }
+ 
+   // Find the lowest-numbered register pair in the mask.  Return the
+@@ -199,25 +204,34 @@ public:
+   void smear_to_pairs();
+   // Verify that the mask contains only aligned adjacent bit pairs
+   void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); }
++
++#ifdef ASSERT
++  // Verify watermarks are sane, i.e., within bounds and that no
++  // register words below or above the watermarks have bits set.
++  bool valid_watermarks() const {
++    assert(_hwm >= 0 && _hwm < RM_SIZE, err_msg("_hwm out of range: %d", _hwm));
++    assert(_lwm >= 0 && _lwm < RM_SIZE, err_msg("_lwm out of range: %d", _lwm));
++    for (int i = 0; i < _lwm; i++) {
++      assert(_A[i] == 0, err_msg("_lwm too high: %d regs at: %d", _lwm, i));
++    }
++    for (int i = _hwm + 1; i < RM_SIZE; i++) {
++      assert(_A[i] == 0, err_msg("_hwm too low: %d regs at: %d", _hwm, i));
++    }
++    return true;
++  }
++#endif // !ASSERT
++
+   // Test that the mask contains only aligned adjacent bit pairs
+   bool is_aligned_pairs() const;
+ 
+   // mask is a pair of misaligned registers
+-  bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); }
++  bool is_misaligned_pair() const;
+   // Test for single register
+-  int is_bound1() const;
++  bool is_bound1() const;
+   // Test for a single adjacent pair
+-  int is_bound_pair() const;
++  bool is_bound_pair() const;
+   // Test for a single adjacent set of ideal register's size.
+-  int is_bound(uint ireg) const {
+-    if (is_vector(ireg)) {
+-      if (is_bound_set(num_registers(ireg)))
+-        return true;
+-    } else if (is_bound1() || is_bound_pair()) {
+-      return true;
+-    }
+-    return false;
+-  }
++  bool is_bound(uint ireg) const;
+ 
+   // Find the lowest-numbered register set in the mask.  Return the
+   // HIGHEST register number in the set, or BAD if no sets.
+@@ -228,8 +242,6 @@ public:
+   void clear_to_sets(const int size);
+   // Smear out partial bits to aligned adjacent bit sets.
+   void smear_to_sets(const int size);
+-  // Verify that the mask contains only aligned adjacent bit sets
+-  void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); }
+   // Test that the mask contains only aligned adjacent bit sets
+   bool is_aligned_sets(const int size) const;
+ 
+@@ -244,11 +256,14 @@ public:
+ 
+   // Fast overlap test.  Non-zero if any registers in common.
+   int overlap( const RegMask &rm ) const {
+-    return
+-#   define BODY(I) (_A[I] & rm._A[I]) |
+-    FORALL_BODY
+-#   undef BODY
+-    0 ;
++    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
++    int hwm = MIN2(_hwm, rm._hwm);
++    int lwm = MAX2(_lwm, rm._lwm);
++    int result = 0;
++    for (int i = lwm; i <= hwm; i++) {
++      result |= _A[i] & rm._A[i];
++    }
++    return result; 
+   }
+ 
+   // Special test for register pressure based splitting
+@@ -257,22 +272,29 @@ public:
+ 
+   // Clear a register mask
+   void Clear( ) {
+-#   define BODY(I) _A[I] = 0;
+-    FORALL_BODY
+-#   undef BODY
++    _lwm = RM_SIZE - 1;
++    _hwm = 0;
++    memset(_A, 0, sizeof(int)*RM_SIZE);
++    assert(valid_watermarks(), "sanity");
+   }
+ 
+   // Fill a register mask with 1's
+   void Set_All( ) {
+-#   define BODY(I) _A[I] = -1;
+-    FORALL_BODY
+-#   undef BODY
++    _lwm = 0;
++    _hwm = RM_SIZE - 1;
++    memset(_A, 0xFF, sizeof(int)*RM_SIZE);
++    assert(valid_watermarks(), "sanity");
+   }
+ 
+   // Insert register into mask
+   void Insert( OptoReg::Name reg ) {
+-    assert( reg < CHUNK_SIZE, "" );
+-    _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
++    assert(reg < CHUNK_SIZE, "sanity");
++    assert(valid_watermarks(), "pre-condition");
++    int index = reg>>_LogWordBits;
++    if (index > _hwm) _hwm = index;
++    if (index < _lwm) _lwm = index;
++    _A[index] |= (1<<(reg&(_WordBits-1)));
++    assert(valid_watermarks(), "post-condition");
+   }
+ 
+   // Remove register from mask
+@@ -283,23 +305,38 @@ public:
+ 
+   // OR 'rm' into 'this'
+   void OR( const RegMask &rm ) {
+-#   define BODY(I) this->_A[I] |= rm._A[I];
+-    FORALL_BODY
+-#   undef BODY
++    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
++    // OR widens the live range
++    if (_lwm > rm._lwm) _lwm = rm._lwm;
++    if (_hwm < rm._hwm) _hwm = rm._hwm;
++    for (int i = _lwm; i <= _hwm; i++) {
++      _A[i] |= rm._A[i];
++    }
++    assert(valid_watermarks(), "sanity");
+   }
+ 
+   // AND 'rm' into 'this'
+   void AND( const RegMask &rm ) {
+-#   define BODY(I) this->_A[I] &= rm._A[I];
+-    FORALL_BODY
+-#   undef BODY
++    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
++    // Do not evaluate words outside the current watermark range, as they are
++    // already zero and an &= would not change that
++    for (int i = _lwm; i <= _hwm; i++) {
++      _A[i] &= rm._A[i];
++    }
++    // Narrow the watermarks if &rm spans a narrower range.
++    // Update after to ensure non-overlapping words are zeroed out.
++    if (_lwm < rm._lwm) _lwm = rm._lwm;
++    if (_hwm > rm._hwm) _hwm = rm._hwm;
+   }
+ 
+   // Subtract 'rm' from 'this'
+   void SUBTRACT( const RegMask &rm ) {
+-#   define BODY(I) _A[I] &= ~rm._A[I];
+-    FORALL_BODY
+-#   undef BODY
++    assert(valid_watermarks() && rm.valid_watermarks(), "sanity");
++    int hwm = MIN2(_hwm, rm._hwm);
++    int lwm = MAX2(_lwm, rm._lwm);
++    for (int i = lwm; i <= hwm; i++) {
++      _A[i] &= ~rm._A[i];
++    } 
+   }
+ 
+   // Compute size of register mask: number of bits
diff --git a/8234003-Improve-IndexSet-iteration.patch b/8234003-Improve-IndexSet-iteration.patch
new file mode 100644
index 0000000000000000000000000000000000000000..aaf31c7473d0bb247acbc2a2e1d49c04995b2a81
--- /dev/null
+++ b/8234003-Improve-IndexSet-iteration.patch
@@ -0,0 +1,1186 @@
+diff --git a/hotspot/src/share/vm/opto/chaitin.cpp b/hotspot/src/share/vm/opto/chaitin.cpp
+index ec318c515..b3b9eb39a 100644
+--- a/hotspot/src/share/vm/opto/chaitin.cpp
++++ b/hotspot/src/share/vm/opto/chaitin.cpp
+@@ -1038,11 +1038,13 @@ void PhaseChaitin::set_was_low() {
+       // low-degree neighbors when determining if this guy colors.
+       int briggs_degree = 0;
+       IndexSet *s = _ifg->neighbors(i);
+-      IndexSetIterator elements(s);
+-      uint lidx;
+-      while((lidx = elements.next()) != 0) {
+-        if( !lrgs(lidx).lo_degree() )
+-          briggs_degree += MAX2(size,lrgs(lidx).num_regs());
++      if (!s->is_empty()) {
++        IndexSetIterator elements(s);
++        uint lidx;
++        while((lidx = elements.next()) != 0) {
++          if( !lrgs(lidx).lo_degree() )
++            briggs_degree += MAX2(size,lrgs(lidx).num_regs());
++        }
+       }
+       if( briggs_degree < lrgs(i).degrees_of_freedom() )
+         lrgs(i)._was_lo = 1;    // Low degree via the briggs assertion
+@@ -1118,18 +1120,20 @@ void PhaseChaitin::Pre_Simplify( ) {
+     // list.  Note that 'degree' can only fall and 'numregs' is
+     // unchanged by this action.  Thus the two are equal at most once,
+     // so LRGs hit the lo-degree worklists at most once.
+-    IndexSetIterator elements(adj);
+-    uint neighbor;
+-    while ((neighbor = elements.next()) != 0) {
+-      LRG *n = &lrgs(neighbor);
+-      assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
+-
+-      // Check for just becoming of-low-degree
+-      if( n->just_lo_degree() && !n->_has_copy ) {
+-        assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
+-        // Put on lo-degree list
+-        n->_next = lo_no_copy;
+-        lo_no_copy = neighbor;
++    if (!adj->is_empty()) {
++      IndexSetIterator elements(adj);
++      uint neighbor;
++      while ((neighbor = elements.next()) != 0) {
++        LRG *n = &lrgs(neighbor);
++        assert(_ifg->effective_degree(neighbor) == n->degree(), "");
++
++        // Check for just becoming of-low-degree
++        if (n->just_lo_degree() && !n->_has_copy) {
++          assert(!(*_ifg->_yanked)[neighbor], "Cannot move to lo degree twice");
++          // Put on lo-degree list
++          n->_next = lo_no_copy;
++          lo_no_copy = neighbor;
++        }
+       }
+     }
+   } // End of while lo-degree no_copy worklist not empty
+@@ -1159,7 +1163,7 @@ void PhaseChaitin::Simplify( ) {
+       lrgs(lo)._next = _simplified;
+       _simplified = lo;
+       // If this guy is "at risk" then mark his current neighbors
+-      if( lrgs(lo)._at_risk ) {
++      if (lrgs(lo)._at_risk && !_ifg->neighbors(lo)->is_empty()) {
+         IndexSetIterator elements(_ifg->neighbors(lo));
+         uint datum;
+         while ((datum = elements.next()) != 0) {
+@@ -1168,7 +1172,10 @@ void PhaseChaitin::Simplify( ) {
+       }
+ 
+       // Yank this guy from the IFG.
+-      IndexSet *adj = _ifg->remove_node( lo );
++      IndexSet *adj = _ifg->remove_node(lo);
++      if (adj->is_empty()) {
++        continue;
++      }
+ 
+       // If any neighbors' degrees fall below their number of
+       // allowed registers, then put that neighbor on the low degree
+@@ -1187,13 +1194,16 @@ void PhaseChaitin::Simplify( ) {
+ 
+         // Check for just becoming of-low-degree just counting registers.
+         // _must_spill live ranges are already on the low degree list.
+-        if( n->just_lo_degree() && !n->_must_spill ) {
+-          assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
++        if (n->just_lo_degree() && !n->_must_spill) {
++          assert(!(*_ifg->_yanked)[neighbor], "Cannot move to lo degree twice");
+           // Pull from hi-degree list
+           uint prev = n->_prev;
+           uint next = n->_next;
+-          if( prev ) lrgs(prev)._next = next;
+-          else _hi_degree = next;
++          if (prev) {
++            lrgs(prev)._next = next;
++          } else {
++            _hi_degree = next;
++          }
+           lrgs(next)._prev = prev;
+           n->_next = _lo_degree;
+           _lo_degree = neighbor;
+@@ -1304,7 +1314,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
+ 
+   // Check for "at_risk" LRG's
+   uint risk_lrg = _lrg_map.find(lrg._risk_bias);
+-  if( risk_lrg != 0 ) {
++  if( risk_lrg != 0 && !_ifg->neighbors(risk_lrg)->is_empty()) {
+     // Walk the colored neighbors of the "at_risk" candidate
+     // Choose a color which is both legal and already taken by a neighbor
+     // of the "at_risk" candidate in order to improve the chances of the
+@@ -1320,9 +1330,9 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
+   }
+ 
+   uint copy_lrg = _lrg_map.find(lrg._copy_bias);
+-  if( copy_lrg != 0 ) {
++  if (copy_lrg != 0) {
+     // If he has a color,
+-    if( !(*(_ifg->_yanked))[copy_lrg] ) {
++    if (!(*(_ifg->_yanked))[copy_lrg]) {
+       OptoReg::Name reg = lrgs(copy_lrg).reg();
+       //  And it is legal for you,
+       if (is_legal_reg(lrg, reg, chunk))
+@@ -1420,41 +1430,43 @@ uint PhaseChaitin::Select( ) {
+ 
+     // Remove neighbor colors
+     IndexSet *s = _ifg->neighbors(lidx);
+-
+     debug_only(RegMask orig_mask = lrg->mask();)
+-    IndexSetIterator elements(s);
+-    uint neighbor;
+-    while ((neighbor = elements.next()) != 0) {
+-      // Note that neighbor might be a spill_reg.  In this case, exclusion
+-      // of its color will be a no-op, since the spill_reg chunk is in outer
+-      // space.  Also, if neighbor is in a different chunk, this exclusion
+-      // will be a no-op.  (Later on, if lrg runs out of possible colors in
+-      // its chunk, a new chunk of color may be tried, in which case
+-      // examination of neighbors is started again, at retry_next_chunk.)
+-      LRG &nlrg = lrgs(neighbor);
+-      OptoReg::Name nreg = nlrg.reg();
+-      // Only subtract masks in the same chunk
+-      if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) {
++
++    if (!s->is_empty()) {
++      IndexSetIterator elements(s);
++      uint neighbor;
++      while ((neighbor = elements.next()) != 0) {
++        // Note that neighbor might be a spill_reg.  In this case, exclusion
++        // of its color will be a no-op, since the spill_reg chunk is in outer
++        // space.  Also, if neighbor is in a different chunk, this exclusion
++        // will be a no-op.  (Later on, if lrg runs out of possible colors in
++        // its chunk, a new chunk of color may be tried, in which case
++        // examination of neighbors is started again, at retry_next_chunk.)
++        LRG &nlrg = lrgs(neighbor);
++        OptoReg::Name nreg = nlrg.reg();
++        // Only subtract masks in the same chunk
++        if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) {
+ #ifndef PRODUCT
+-        uint size = lrg->mask().Size();
+-        RegMask rm = lrg->mask();
++          uint size = lrg->mask().Size();
++          RegMask rm = lrg->mask();
+ #endif
+-        lrg->SUBTRACT(nlrg.mask());
++          lrg->SUBTRACT(nlrg.mask());
+ #ifndef PRODUCT
+-        if (trace_spilling() && lrg->mask().Size() != size) {
+-          ttyLocker ttyl;
+-          tty->print("L%d ", lidx);
+-          rm.dump();
+-          tty->print(" intersected L%d ", neighbor);
+-          nlrg.mask().dump();
+-          tty->print(" removed ");
+-          rm.SUBTRACT(lrg->mask());
+-          rm.dump();
+-          tty->print(" leaving ");
+-          lrg->mask().dump();
+-          tty->cr();
+-        }
++          if (trace_spilling() && lrg->mask().Size() != size) {
++            ttyLocker ttyl;
++            tty->print("L%d ", lidx);
++            rm.dump();
++            tty->print(" intersected L%d ", neighbor);
++            nlrg.mask().dump();
++            tty->print(" removed ");
++            rm.SUBTRACT(lrg->mask());
++            rm.dump();
++            tty->print(" leaving ");
++            lrg->mask().dump();
++            tty->cr();
++          }
+ #endif
++        }
+       }
+     }
+     //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
+@@ -1827,7 +1839,7 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) {
+ 
+       // Found a safepoint?
+       JVMState *jvms = n->jvms();
+-      if( jvms ) {
++      if (jvms && !liveout.is_empty()) {
+         // Now scan for a live derived pointer
+         IndexSetIterator elements(&liveout);
+         uint neighbor;
+@@ -1983,12 +1995,14 @@ void PhaseChaitin::dump(const Block *b) const {
+   // Print live-out info at end of block
+   if( _live ) {
+     tty->print("Liveout: ");
+-    IndexSet *live = _live->live(b);
+-    IndexSetIterator elements(live);
+     tty->print("{");
+-    uint i;
+-    while ((i = elements.next()) != 0) {
+-      tty->print("L%d ", _lrg_map.find_const(i));
++    IndexSet *live = _live->live(b);
++    if (!live->is_empty()) {
++      IndexSetIterator elements(live);
++      uint i;
++      while ((i = elements.next()) != 0) {
++        tty->print("L%d ", _lrg_map.find_const(i));
++      }
+     }
+     tty->print_cr("}");
+   }
+diff --git a/hotspot/src/share/vm/opto/coalesce.cpp b/hotspot/src/share/vm/opto/coalesce.cpp
+index c675445bf..988a45ec2 100644
+--- a/hotspot/src/share/vm/opto/coalesce.cpp
++++ b/hotspot/src/share/vm/opto/coalesce.cpp
+@@ -602,29 +602,40 @@ void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1,
+   // Some original neighbors of lr1 might have gone away
+   // because the constrained register mask prevented them.
+   // Remove lr1 from such neighbors.
+-  IndexSetIterator one(n_lr1);
+-  uint neighbor;
++  uint neighbor = 0;
+   LRG &lrg1 = lrgs(lr1);
+-  while ((neighbor = one.next()) != 0)
+-    if( !_ulr.member(neighbor) )
+-      if( _phc._ifg->neighbors(neighbor)->remove(lr1) )
+-        lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) );
++
++  if (!n_lr1->is_empty()) {
++    IndexSetIterator one(n_lr1);
++    while ((neighbor = one.next()) != 0)
++      if (!_ulr.member(neighbor))
++        if (_phc._ifg->neighbors(neighbor)->remove(lr1))
++          lrgs(neighbor).inc_degree(-lrg1.compute_degree(lrgs(neighbor)));
++  }
+ 
+ 
+   // lr2 is now called (coalesced into) lr1.
+   // Remove lr2 from the IFG.
+-  IndexSetIterator two(n_lr2);
+   LRG &lrg2 = lrgs(lr2);
+-  while ((neighbor = two.next()) != 0)
+-    if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
+-      lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );
++  if (!n_lr2->is_empty()) {
++    IndexSetIterator two(n_lr2);
++    while ((neighbor = two.next()) != 0) {
++      if (_phc._ifg->neighbors(neighbor)->remove(lr2)) {
++        lrgs(neighbor).inc_degree(-lrg2.compute_degree(lrgs(neighbor)));
++      }
++    }
++  }
+ 
+   // Some neighbors of intermediate copies now interfere with the
+   // combined live range.
+-  IndexSetIterator three(&_ulr);
+-  while ((neighbor = three.next()) != 0)
+-    if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
+-      lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
++  if (!_ulr.is_empty()) {
++    IndexSetIterator three(&_ulr);
++    while ((neighbor = three.next()) != 0) {
++      if (_phc._ifg->neighbors(neighbor)->insert(lr1)) {
++        lrgs(neighbor).inc_degree(lrg1.compute_degree(lrgs(neighbor)));
++      }
++    }
++  }
+ }
+ 
+ static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
+diff --git a/hotspot/src/share/vm/opto/ifg.cpp b/hotspot/src/share/vm/opto/ifg.cpp
+index 3b33aa7a9..39c0e0155 100644
+--- a/hotspot/src/share/vm/opto/ifg.cpp
++++ b/hotspot/src/share/vm/opto/ifg.cpp
+@@ -94,11 +94,13 @@ void PhaseIFG::SquareUp() {
+   assert( !_is_square, "only on triangular" );
+ 
+   // Simple transpose
+-  for( uint i = 0; i < _maxlrg; i++ ) {
+-    IndexSetIterator elements(&_adjs[i]);
+-    uint datum;
+-    while ((datum = elements.next()) != 0) {
+-      _adjs[datum].insert( i );
++  for (uint i = 0; i < _maxlrg; i++) {
++    if (!_adjs[i].is_empty()) {
++      IndexSetIterator elements(&_adjs[i]);
++      uint datum;
++      while ((datum = elements.next()) != 0) {
++        _adjs[datum].insert(i);
++      }
+     }
+   }
+   _is_square = true;
+@@ -122,44 +124,52 @@ int PhaseIFG::test_edge_sq( uint a, uint b ) const {
+ }
+ 
+ // Union edges of B into A
+-void PhaseIFG::Union( uint a, uint b ) {
++void PhaseIFG::Union(uint a, uint b) {
+   assert( _is_square, "only on square" );
+   IndexSet *A = &_adjs[a];
+-  IndexSetIterator b_elements(&_adjs[b]);
+-  uint datum;
+-  while ((datum = b_elements.next()) != 0) {
+-    if(A->insert(datum)) {
+-      _adjs[datum].insert(a);
+-      lrgs(a).invalid_degree();
+-      lrgs(datum).invalid_degree();
++  if (!_adjs[b].is_empty()) {
++    IndexSetIterator b_elements(&_adjs[b]);
++    uint datum;
++    while ((datum = b_elements.next()) != 0) {
++      if (A->insert(datum)) {
++        _adjs[datum].insert(a);
++        lrgs(a).invalid_degree();
++        lrgs(datum).invalid_degree();
++      }
+     }
+   }
+ }
+ 
+ // Yank a Node and all connected edges from the IFG.  Return a
+ // list of neighbors (edges) yanked.
+-IndexSet *PhaseIFG::remove_node( uint a ) {
++IndexSet *PhaseIFG::remove_node(uint a) {
+   assert( _is_square, "only on square" );
+   assert( !_yanked->test(a), "" );
+   _yanked->set(a);
+ 
+   // I remove the LRG from all neighbors.
+-  IndexSetIterator elements(&_adjs[a]);
+   LRG &lrg_a = lrgs(a);
+-  uint datum;
+-  while ((datum = elements.next()) != 0) {
+-    _adjs[datum].remove(a);
+-    lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) );
++  if (!_adjs[a].is_empty()) {
++    IndexSetIterator elements(&_adjs[a]);
++    uint datum;
++    while ((datum = elements.next()) != 0) {
++      _adjs[datum].remove(a);
++      lrgs(datum).inc_degree(-lrg_a.compute_degree(lrgs(datum)));
++    }
+   }
+   return neighbors(a);
+ }
+ 
+ // Re-insert a yanked Node.
+-void PhaseIFG::re_insert( uint a ) {
++void PhaseIFG::re_insert(uint a) {
+   assert( _is_square, "only on square" );
+   assert( _yanked->test(a), "" );
+   (*_yanked) >>= a;
+ 
++  if (_adjs[a].is_empty()) {
++    return;
++  }
++
+   IndexSetIterator elements(&_adjs[a]);
+   uint datum;
+   while ((datum = elements.next()) != 0) {
+@@ -173,7 +183,7 @@ void PhaseIFG::re_insert( uint a ) {
+ // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
+ // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
+ // this is so.
+-int LRG::compute_degree( LRG &l ) const {
++int LRG::compute_degree(LRG &l) const {
+   int tmp;
+   int num_regs = _num_regs;
+   int nregs = l.num_regs();
+@@ -188,14 +198,18 @@ int LRG::compute_degree( LRG &l ) const {
+ // mis-aligned (or for Fat-Projections, not-adjacent) then we have to
+ // MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
+ // this is so.
+-int PhaseIFG::effective_degree( uint lidx ) const {
++int PhaseIFG::effective_degree(uint lidx) const {
++  IndexSet *s = neighbors(lidx);
++  if (s->is_empty()) {
++    return 0;
++  }
++
+   int eff = 0;
+   int num_regs = lrgs(lidx).num_regs();
+   int fat_proj = lrgs(lidx)._fat_proj;
+-  IndexSet *s = neighbors(lidx);
+   IndexSetIterator elements(s);
+   uint nidx;
+-  while((nidx = elements.next()) != 0) {
++  while ((nidx = elements.next()) != 0) {
+     LRG &lrgn = lrgs(nidx);
+     int nregs = lrgn.num_regs();
+     eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
+@@ -210,14 +224,16 @@ int PhaseIFG::effective_degree( uint lidx ) const {
+ void PhaseIFG::dump() const {
+   tty->print_cr("-- Interference Graph --%s--",
+                 _is_square ? "square" : "triangular" );
+-  if( _is_square ) {
+-    for( uint i = 0; i < _maxlrg; i++ ) {
++  if (_is_square) {
++    for (uint i = 0; i < _maxlrg; i++) {
+       tty->print( (*_yanked)[i] ? "XX " : "  ");
+       tty->print("L%d: { ",i);
+-      IndexSetIterator elements(&_adjs[i]);
+-      uint datum;
+-      while ((datum = elements.next()) != 0) {
+-        tty->print("L%d ", datum);
++      if (!_adjs[i].is_empty()) {
++        IndexSetIterator elements(&_adjs[i]);
++        uint datum;
++        while ((datum = elements.next()) != 0) {
++          tty->print("L%d ", datum);
++        }
+       }
+       tty->print_cr("}");
+ 
+@@ -235,10 +251,12 @@ void PhaseIFG::dump() const {
+         tty->print("L%d ",j - 1);
+       }
+     tty->print("| ");
+-    IndexSetIterator elements(&_adjs[i]);
+-    uint datum;
+-    while ((datum = elements.next()) != 0) {
+-      tty->print("L%d ", datum);
++    if (!_adjs[i].is_empty()) {
++      IndexSetIterator elements(&_adjs[i]);
++      uint datum;
++      while ((datum = elements.next()) != 0) {
++        tty->print("L%d ", datum);
++      }
+     }
+     tty->print("}\n");
+   }
+@@ -265,16 +283,18 @@ void PhaseIFG::verify( const PhaseChaitin *pc ) const {
+   for( uint i = 0; i < _maxlrg; i++ ) {
+     assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" );
+     IndexSet *set = &_adjs[i];
+-    IndexSetIterator elements(set);
+-    uint idx;
+-    uint last = 0;
+-    while ((idx = elements.next()) != 0) {
+-      assert(idx != i, "Must have empty diagonal");
+-      assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
+-      assert(_adjs[idx].member(i), "IFG not square");
+-      assert(!(*_yanked)[idx], "No yanked neighbors");
+-      assert(last < idx, "not sorted increasing");
+-      last = idx;
++    if (!set->is_empty()) {
++      IndexSetIterator elements(set);
++      uint idx;
++      uint last = 0;
++      while ((idx = elements.next()) != 0) {
++        assert(idx != i, "Must have empty diagonal");
++        assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find");
++        assert(_adjs[idx].member(i), "IFG not square");
++        assert(!(*_yanked)[idx], "No yanked neighbors");
++        assert(last < idx, "not sorted increasing");
++        last = idx;
++      }
+     }
+     assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong");
+   }
+@@ -284,17 +304,21 @@ void PhaseIFG::verify( const PhaseChaitin *pc ) const {
+ // Interfere this register with everything currently live.  Use the RegMasks
+ // to trim the set of possible interferences. Return a count of register-only
+ // interferences as an estimate of register pressure.
+-void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
+-  uint retval = 0;
+-  // Interfere with everything live.
+-  const RegMask &rm = lrgs(r).mask();
+-  // Check for interference by checking overlap of regmasks.
+-  // Only interfere if acceptable register masks overlap.
+-  IndexSetIterator elements(liveout);
+-  uint l;
+-  while( (l = elements.next()) != 0 )
+-    if( rm.overlap( lrgs(l).mask() ) )
+-      _ifg->add_edge( r, l );
++void PhaseChaitin::interfere_with_live(uint r, IndexSet *liveout) {
++  if (!liveout->is_empty()) {
++    uint retval = 0;
++    // Interfere with everything live.
++    const RegMask &rm = lrgs(r).mask();
++    // Check for interference by checking overlap of regmasks.
++    // Only interfere if acceptable register masks overlap.
++    IndexSetIterator elements(liveout);
++    uint l;
++    while ((l = elements.next()) != 0) {
++      if (rm.overlap(lrgs(l).mask())) {
++        _ifg->add_edge(r, l);
++      }
++    }
++  }
+ }
+ 
+ // Actually build the interference graph.  Uses virtual registers only, no
+@@ -390,6 +414,9 @@ void PhaseChaitin::build_ifg_virtual( ) {
+ }
+ 
+ uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
++  if (liveout->is_empty()) {
++    return 0;
++  }
+   IndexSetIterator elements(liveout);
+   uint lidx;
+   uint cnt = 0;
+@@ -405,6 +432,9 @@ uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
+ }
+ 
+ uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
++  if (liveout->is_empty()) {
++    return 0;
++  }
+   IndexSetIterator elements(liveout);
+   uint lidx;
+   uint cnt = 0;
+@@ -489,23 +519,25 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
+     int inst_count = last_inst - first_inst;
+     double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count);
+     assert(!(cost < 0.0), "negative spill cost" );
+-    IndexSetIterator elements(&liveout);
+-    uint lidx;
+-    while ((lidx = elements.next()) != 0) {
+-      LRG &lrg = lrgs(lidx);
+-      lrg._area += cost;
+-      // Compute initial register pressure
+-      if (lrg.mask().is_UP() && lrg.mask_size()) {
+-        if (lrg._is_float || lrg._is_vector) {   // Count float pressure
+-          pressure[1] += lrg.reg_pressure();
+-          if (pressure[1] > block->_freg_pressure) {
+-            block->_freg_pressure = pressure[1];
+-          }
+-          // Count int pressure, but do not count the SP, flags
+-        } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
+-          pressure[0] += lrg.reg_pressure();
+-          if (pressure[0] > block->_reg_pressure) {
+-            block->_reg_pressure = pressure[0];
++    if (!liveout.is_empty()) {
++      IndexSetIterator elements(&liveout);
++      uint lidx;
++      while ((lidx = elements.next()) != 0) {
++        LRG &lrg = lrgs(lidx);
++        lrg._area += cost;
++        // Compute initial register pressure
++        if (lrg.mask().is_UP() && lrg.mask_size()) {
++          if (lrg._is_float || lrg._is_vector) {   // Count float pressure
++            pressure[1] += lrg.reg_pressure();
++            if (pressure[1] > block->_freg_pressure) {
++              block->_freg_pressure = pressure[1];
++            }
++            // Count int pressure, but do not count the SP, flags
++          } else if (lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) {
++            pressure[0] += lrg.reg_pressure();
++            if (pressure[0] > block->_reg_pressure) {
++              block->_reg_pressure = pressure[0];
++            }
+           }
+         }
+       }
+diff --git a/hotspot/src/share/vm/opto/indexSet.cpp b/hotspot/src/share/vm/opto/indexSet.cpp
+index 4ba99e727..958901007 100644
+--- a/hotspot/src/share/vm/opto/indexSet.cpp
++++ b/hotspot/src/share/vm/opto/indexSet.cpp
+@@ -177,6 +177,9 @@ IndexSet::BitBlock *IndexSet::alloc_block() {
+ IndexSet::BitBlock *IndexSet::alloc_block_containing(uint element) {
+   BitBlock *block = alloc_block();
+   uint bi = get_block_index(element);
++  if (bi >= _current_block_limit) {
++    _current_block_limit = bi + 1;
++  }
+   _blocks[bi] = block;
+   return block;
+ }
+@@ -191,7 +194,7 @@ void IndexSet::free_block(uint i) {
+   assert(block != &_empty_block, "cannot free the empty block");
+   block->set_next((IndexSet::BitBlock*)Compile::current()->indexSet_free_block_list());
+   Compile::current()->set_indexSet_free_block_list(block);
+-  set_block(i,&_empty_block);
++  set_block(i, &_empty_block);
+ }
+ 
+ //------------------------------lrg_union--------------------------------------
+@@ -234,38 +237,42 @@ uint IndexSet::lrg_union(uint lr1, uint lr2,
+   // other color.  (A variant of the Briggs assertion)
+   uint reg_degree = 0;
+ 
+-  uint element;
++  uint element = 0;
+   // Load up the combined interference set with the neighbors of one
+-  IndexSetIterator elements(one);
+-  while ((element = elements.next()) != 0) {
+-    LRG &lrg = ifg->lrgs(element);
+-    if (mask.overlap(lrg.mask())) {
+-      insert(element);
+-      if( !lrg.mask().is_AllStack() ) {
+-        reg_degree += lrg1.compute_degree(lrg);
+-        if( reg_degree >= fail_degree ) return reg_degree;
+-      } else {
+-        // !!!!! Danger!  No update to reg_degree despite having a neighbor.
+-        // A variant of the Briggs assertion.
+-        // Not needed if I simplify during coalesce, ala George/Appel.
+-        assert( lrg.lo_degree(), "" );
+-      }
+-    }
+-  }
+-  // Add neighbors of two as well
+-  IndexSetIterator elements2(two);
+-  while ((element = elements2.next()) != 0) {
+-    LRG &lrg = ifg->lrgs(element);
+-    if (mask.overlap(lrg.mask())) {
+-      if (insert(element)) {
+-        if( !lrg.mask().is_AllStack() ) {
+-          reg_degree += lrg2.compute_degree(lrg);
+-          if( reg_degree >= fail_degree ) return reg_degree;
++  if (!one->is_empty()) {
++    IndexSetIterator elements(one);
++    while ((element = elements.next()) != 0) {
++      LRG &lrg = ifg->lrgs(element);
++      if (mask.overlap(lrg.mask())) {
++        insert(element);
++        if (!lrg.mask().is_AllStack()) {
++          reg_degree += lrg1.compute_degree(lrg);
++          if (reg_degree >= fail_degree) return reg_degree;
+         } else {
+           // !!!!! Danger!  No update to reg_degree despite having a neighbor.
+           // A variant of the Briggs assertion.
+           // Not needed if I simplify during coalesce, ala George/Appel.
+-          assert( lrg.lo_degree(), "" );
++          assert(lrg.lo_degree(), "");
++        }
++      }
++    }
++  }
++  // Add neighbors of two as well
++  if (!two->is_empty()) {
++    IndexSetIterator elements2(two);
++    while ((element = elements2.next()) != 0) {
++      LRG &lrg = ifg->lrgs(element);
++      if (mask.overlap(lrg.mask())) {
++        if (insert(element)) {
++          if (!lrg.mask().is_AllStack()) {
++            reg_degree += lrg2.compute_degree(lrg);
++            if (reg_degree >= fail_degree) return reg_degree;
++          } else {
++            // !!!!! Danger!  No update to reg_degree despite having a neighbor.
++            // A variant of the Briggs assertion.
++            // Not needed if I simplify during coalesce, ala George/Appel.
++            assert(lrg.lo_degree(), "");
++          }
+         }
+       }
+     }
+@@ -285,6 +292,7 @@ IndexSet::IndexSet (IndexSet *set) {
+   _max_elements = set->_max_elements;
+ #endif
+   _count = set->_count;
++  _current_block_limit = set->_current_block_limit;
+   _max_blocks = set->_max_blocks;
+   if (_max_blocks <= preallocated_block_list_size) {
+     _blocks = _preallocated_block_list;
+@@ -314,6 +322,7 @@ void IndexSet::initialize(uint max_elements) {
+   _max_elements = max_elements;
+ #endif
+   _count = 0;
++  _current_block_limit = 0;
+   _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
+ 
+   if (_max_blocks <= preallocated_block_list_size) {
+@@ -338,6 +347,7 @@ void IndexSet::initialize(uint max_elements, Arena *arena) {
+   _max_elements = max_elements;
+ #endif // ASSERT
+   _count = 0;
++  _current_block_limit = 0;
+   _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
+ 
+   if (_max_blocks <= preallocated_block_list_size) {
+@@ -360,7 +370,8 @@ void IndexSet::swap(IndexSet *set) {
+   set->check_watch("swap", _serial_number);
+ #endif
+ 
+-  for (uint i = 0; i < _max_blocks; i++) {
++  uint max = MAX2(_current_block_limit, set->_current_block_limit);
++  for (uint i = 0; i < max; i++) {
+     BitBlock *temp = _blocks[i];
+     set_block(i, set->_blocks[i]);
+     set->set_block(i, temp);
+@@ -368,6 +379,10 @@ void IndexSet::swap(IndexSet *set) {
+   uint temp = _count;
+   _count = set->_count;
+   set->_count = temp;
++
++  temp = _current_block_limit;
++  _current_block_limit = set->_current_block_limit;
++  set->_current_block_limit = temp;
+ }
+ 
+ //---------------------------- IndexSet::dump() -----------------------------
+@@ -375,12 +390,13 @@ void IndexSet::swap(IndexSet *set) {
+ 
+ #ifndef PRODUCT
+ void IndexSet::dump() const {
+-  IndexSetIterator elements(this);
+-
+   tty->print("{");
+-  uint i;
+-  while ((i = elements.next()) != 0) {
+-    tty->print("L%d ", i);
++  if (!this->is_empty()) {
++    IndexSetIterator elements(this);
++    uint i;
++    while ((i = elements.next()) != 0) {
++      tty->print("L%d ", i);
++    }
+   }
+   tty->print_cr("}");
+ }
+@@ -435,12 +451,14 @@ void IndexSet::verify() const {
+     }
+   }
+ 
+-  IndexSetIterator elements(this);
+-  count = 0;
+-  while ((i = elements.next()) != 0) {
+-    count++;
+-    assert(member(i), "returned a non member");
+-    assert(count <= _count, "iterator returned wrong number of elements");
++  if (!this->is_empty()) {
++    IndexSetIterator elements(this);
++    count = 0;
++    while ((i = elements.next()) != 0) {
++      count++;
++      assert(member(i), "returned a non member");
++      assert(count <= _count, "iterator returned wrong number of elements");
++    }
+   }
+ }
+ #endif
+@@ -449,44 +467,35 @@ void IndexSet::verify() const {
+ // Create an iterator for a set.  If empty blocks are detected when iterating
+ // over the set, these blocks are replaced.
+ 
+-IndexSetIterator::IndexSetIterator(IndexSet *set) {
++IndexSetIterator::IndexSetIterator(IndexSet *set) :
++  _current(0),
++  _value(0),
++  _next_word(IndexSet::words_per_block),
++  _next_block(set->is_empty() ? 1 : 0),
++  _max_blocks(set->is_empty() ? 1 : set->_current_block_limit),
++  _words(NULL),
++  _blocks(set->_blocks),
++  _set(set) {
+ #ifdef ASSERT
+   if (CollectIndexSetStatistics) {
+     set->tally_iteration_statistics();
+   }
+   set->check_watch("traversed", set->count());
+ #endif
+-  if (set->is_empty()) {
+-    _current = 0;
+-    _next_word = IndexSet::words_per_block;
+-    _next_block = 1;
+-    _max_blocks = 1;
+-
+-    // We don't need the following values when we iterate over an empty set.
+-    // The commented out code is left here to document that the omission
+-    // is intentional.
+-    //
+-    //_value = 0;
+-    //_words = NULL;
+-    //_blocks = NULL;
+-    //_set = NULL;
+-  } else {
+-    _current = 0;
+-    _value = 0;
+-    _next_block = 0;
+-    _next_word = IndexSet::words_per_block;
+-
+-    _max_blocks = set->_max_blocks;
+-    _words = NULL;
+-    _blocks = set->_blocks;
+-    _set = set;
+-  }
+ }
+ 
+ //---------------------------- IndexSetIterator(const) -----------------------------
+ // Iterate over a constant IndexSet.
+ 
+-IndexSetIterator::IndexSetIterator(const IndexSet *set) {
++IndexSetIterator::IndexSetIterator(const IndexSet *set) :
++  _current(0),
++  _value(0),
++  _next_word(IndexSet::words_per_block),
++  _next_block(set->is_empty() ? 1 : 0),
++  _max_blocks(set->is_empty() ? 1 : set->_current_block_limit),
++  _words(NULL),
++  _blocks(set->_blocks),
++  _set(NULL) {
+ #ifdef ASSERT
+   if (CollectIndexSetStatistics) {
+     set->tally_iteration_statistics();
+@@ -494,31 +503,6 @@ IndexSetIterator::IndexSetIterator(const IndexSet *set) {
+   // We don't call check_watch from here to avoid bad recursion.
+   //   set->check_watch("traversed const", set->count());
+ #endif
+-  if (set->is_empty()) {
+-    _current = 0;
+-    _next_word = IndexSet::words_per_block;
+-    _next_block = 1;
+-    _max_blocks = 1;
+-
+-    // We don't need the following values when we iterate over an empty set.
+-    // The commented out code is left here to document that the omission
+-    // is intentional.
+-    //
+-    //_value = 0;
+-    //_words = NULL;
+-    //_blocks = NULL;
+-    //_set = NULL;
+-  } else {
+-    _current = 0;
+-    _value = 0;
+-    _next_block = 0;
+-    _next_word = IndexSet::words_per_block;
+-
+-    _max_blocks = set->_max_blocks;
+-    _words = NULL;
+-    _blocks = set->_blocks;
+-    _set = NULL;
+-  }
+ }
+ 
+ //---------------------------- List16Iterator::advance_and_next() -----------------------------
+@@ -536,7 +520,7 @@ uint IndexSetIterator::advance_and_next() {
+ 
+       _next_word = wi+1;
+ 
+-      return next();
++      return next_value();
+     }
+   }
+ 
+@@ -555,7 +539,7 @@ uint IndexSetIterator::advance_and_next() {
+           _next_block = bi+1;
+           _next_word = wi+1;
+ 
+-          return next();
++          return next_value();
+         }
+       }
+ 
+diff --git a/hotspot/src/share/vm/opto/indexSet.hpp b/hotspot/src/share/vm/opto/indexSet.hpp
+index ef5aed18b..6a15fa02d 100644
+--- a/hotspot/src/share/vm/opto/indexSet.hpp
++++ b/hotspot/src/share/vm/opto/indexSet.hpp
+@@ -189,14 +189,17 @@ class IndexSet : public ResourceObj {
+   // The number of elements in the set
+   uint      _count;
+ 
++  // The current upper limit of blocks that has been allocated and might be in use
++  uint      _current_block_limit;
++
++  // The number of top level array entries in use
++  uint       _max_blocks;
++
+   // Our top level array of bitvector segments
+   BitBlock **_blocks;
+ 
+   BitBlock  *_preallocated_block_list[preallocated_block_list_size];
+ 
+-  // The number of top level array entries in use
+-  uint       _max_blocks;
+-
+   // Our assertions need to know the maximum number allowed in the set
+ #ifdef ASSERT
+   uint       _max_elements;
+@@ -263,12 +266,13 @@ class IndexSet : public ResourceObj {
+       check_watch("clear");
+ #endif
+     _count = 0;
+-    for (uint i = 0; i < _max_blocks; i++) {
++    for (uint i = 0; i < _current_block_limit; i++) {
+       BitBlock *block = _blocks[i];
+       if (block != &_empty_block) {
+         free_block(i);
+       }
+     }
++    _current_block_limit = 0;
+   }
+ 
+   uint count() const { return _count; }
+@@ -419,18 +423,18 @@ class IndexSetIterator VALUE_OBJ_CLASS_SPEC {
+   // The index of the next word we will inspect
+   uint                  _next_word;
+ 
++ // The index of the next block we will inspect
++ uint                  _next_block;
++
++ // The number of blocks in the set
++ uint                  _max_blocks;
++
+   // A pointer to the contents of the current block
+   uint32               *_words;
+ 
+-  // The index of the next block we will inspect
+-  uint                  _next_block;
+-
+   // A pointer to the blocks in our set
+   IndexSet::BitBlock **_blocks;
+ 
+-  // The number of blocks in the set
+-  uint                  _max_blocks;
+-
+   // If the iterator was created from a non-const set, we replace
+   // non-canonical empty blocks with the _empty_block pointer.  If
+   // _set is NULL, we do no replacement.
+@@ -448,20 +452,26 @@ class IndexSetIterator VALUE_OBJ_CLASS_SPEC {
+   IndexSetIterator(IndexSet *set);
+   IndexSetIterator(const IndexSet *set);
+ 
++  // Return the next element of the set.
++  uint next_value() {
++    uint current = _current;
++    uint value = _value;
++    while (mask_bits(current,window_mask) == 0) {
++      current >>= window_size;
++      value += window_size;
++    }
++
++    uint advance = _second_bit[mask_bits(current,window_mask)];
++    _current = current >> advance;
++    _value = value + advance;
++    return value + _first_bit[mask_bits(current,window_mask)];
++  }
++
+   // Return the next element of the set.  Return 0 when done.
+   uint next() {
+     uint current = _current;
+     if (current != 0) {
+-      uint value = _value;
+-      while (mask_bits(current,window_mask) == 0) {
+-        current >>= window_size;
+-        value += window_size;
+-      }
+-
+-      uint advance = _second_bit[mask_bits(current,window_mask)];
+-      _current = current >> advance;
+-      _value = value + advance;
+-      return value + _first_bit[mask_bits(current,window_mask)];
++      return next_value();
+     } else {
+       return advance_and_next();
+     }
+diff --git a/hotspot/src/share/vm/opto/live.cpp b/hotspot/src/share/vm/opto/live.cpp
+index 787f5ab88..53599162e 100644
+--- a/hotspot/src/share/vm/opto/live.cpp
++++ b/hotspot/src/share/vm/opto/live.cpp
+@@ -69,7 +69,7 @@ void PhaseLive::compute(uint maxlrg) {
+ 
+   // Array of delta-set pointers, indexed by block pre_order-1.
+   _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks());
+-  memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks());
++  memset(_deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks());
+ 
+   _free_IndexSet = NULL;
+ 
+@@ -93,8 +93,8 @@ void PhaseLive::compute(uint maxlrg) {
+ 
+       uint r = _names.at(n->_idx);
+       assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
+-      def->insert( r );
+-      use->remove( r );
++      def->insert(r);
++      use->remove(r);
+       uint cnt = n->req();
+       for (uint k = 1; k < cnt; k++) {
+         Node *nk = n->in(k);
+@@ -134,7 +134,7 @@ void PhaseLive::compute(uint maxlrg) {
+     while (_worklist->size()) {
+       Block* block = _worklist->pop();
+       IndexSet *delta = getset(block);
+-      assert( delta->count(), "missing delta set" );
++      assert(delta->count(), "missing delta set");
+ 
+       // Add new-live-in to predecessors live-out sets
+       for (uint l = 1; l < block->num_preds(); l++) {
+@@ -173,34 +173,32 @@ void PhaseLive::stats(uint iters) const {
+ 
+ // Get an IndexSet for a block.  Return existing one, if any.  Make a new
+ // empty one if a prior one does not exist.
+-IndexSet *PhaseLive::getset( Block *p ) {
++IndexSet *PhaseLive::getset(Block *p) {
+   IndexSet *delta = _deltas[p->_pre_order-1];
+-  if( !delta )                  // Not on worklist?
++  if( !delta ) {                  // Not on worklist?
+     // Get a free set; flag as being on worklist
+-    delta = _deltas[p->_pre_order-1] = getfreeset();
++    delta = _deltas[p->_pre_order - 1] = getfreeset();
++  }
+   return delta;                 // Return set of new live-out items
+ }
+ 
+ // Pull from free list, or allocate.  Internal allocation on the returned set
+ // is always from thread local storage.
+-IndexSet *PhaseLive::getfreeset( ) {
++IndexSet *PhaseLive::getfreeset() {
+   IndexSet *f = _free_IndexSet;
+-  if( !f ) {
++  if (!f) {
+     f = new IndexSet;
+-//    f->set_arena(Thread::current()->resource_area());
+     f->initialize(_maxlrg, Thread::current()->resource_area());
+   } else {
+     // Pull from free list
+     _free_IndexSet = f->next();
+-  //f->_cnt = 0;                        // Reset to empty
+-//    f->set_arena(Thread::current()->resource_area());
+     f->initialize(_maxlrg, Thread::current()->resource_area());
+   }
+   return f;
+ }
+ 
+ // Free an IndexSet from a block.
+-void PhaseLive::freeset( const Block *p ) {
++void PhaseLive::freeset(const Block *p) {
+   IndexSet *f = _deltas[p->_pre_order-1];
+   f->set_next(_free_IndexSet);
+   _free_IndexSet = f;           // Drop onto free list
+@@ -209,53 +207,58 @@ void PhaseLive::freeset( const Block *p ) {
+ 
+ // Add a live-out value to a given blocks live-out set.  If it is new, then
+ // also add it to the delta set and stick the block on the worklist.
+-void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
++void PhaseLive::add_liveout(Block *p, uint r, VectorSet &first_pass) {
+   IndexSet *live = &_live[p->_pre_order-1];
+   if( live->insert(r) ) {       // If actually inserted...
+     // We extended the live-out set.  See if the value is generated locally.
+     // If it is not, then we must extend the live-in set.
+     if( !_defs[p->_pre_order-1].member( r ) ) {
+       if( !_deltas[p->_pre_order-1] && // Not on worklist?
+-          first_pass.test(p->_pre_order) )
++          first_pass.test(p->_pre_order)) {
+         _worklist->push(p);     // Actually go on worklist if already 1st pass
++      }
+       getset(p)->insert(r);
+     }
+   }
+ }
+ 
+ // Add a vector of live-out values to a given blocks live-out set.
+-void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
++void PhaseLive::add_liveout(Block *p, IndexSet *lo, VectorSet &first_pass) {
+   IndexSet *live = &_live[p->_pre_order-1];
+   IndexSet *defs = &_defs[p->_pre_order-1];
+   IndexSet *on_worklist = _deltas[p->_pre_order-1];
+   IndexSet *delta = on_worklist ? on_worklist : getfreeset();
+ 
+-  IndexSetIterator elements(lo);
+-  uint r;
+-  while ((r = elements.next()) != 0) {
+-    if( live->insert(r) &&      // If actually inserted...
+-        !defs->member( r ) )    // and not defined locally
+-      delta->insert(r);         // Then add to live-in set
++  if (!lo->is_empty()) {
++    IndexSetIterator elements(lo);
++    uint r;
++    while ((r = elements.next()) != 0) {
++      if (live->insert(r) &&      // If actually inserted...
++          !defs->member(r)) {     // and not defined locally
++        delta->insert(r);         // Then add to live-in set
++      }
++    }
+   }
+ 
+-  if( delta->count() ) {                // If actually added things
++  if (delta->count()) {               // If actually added things
+     _deltas[p->_pre_order-1] = delta; // Flag as on worklist now
+-    if( !on_worklist &&         // Not on worklist?
+-        first_pass.test(p->_pre_order) )
+-      _worklist->push(p);       // Actually go on worklist if already 1st pass
+-  } else {                      // Nothing there; just free it
++    if (!on_worklist &&               // Not on worklist?
++        first_pass.test(p->_pre_order)) {
++      _worklist->push(p);             // Actually go on worklist if already 1st pass
++    }
++  } else {                            // Nothing there; just free it
+     delta->set_next(_free_IndexSet);
+-    _free_IndexSet = delta;     // Drop onto free list
++    _free_IndexSet = delta;           // Drop onto free list
+   }
+ }
+ 
+ #ifndef PRODUCT
+ // Dump the live-out set for a block
+-void PhaseLive::dump( const Block *b ) const {
++void PhaseLive::dump(const Block *b) const {
+   tty->print("Block %d: ",b->_pre_order);
+   tty->print("LiveOut: ");  _live[b->_pre_order-1].dump();
+   uint cnt = b->number_of_nodes();
+-  for( uint i=0; i<cnt; i++ ) {
++  for (uint i=0; i < cnt; i++) {
+     tty->print("L%d/", _names.at(b->get_node(i)->_idx));
+     b->get_node(i)->dump();
+   }
+@@ -263,7 +266,7 @@ void PhaseLive::dump( const Block *b ) const {
+ }
+ 
+ // Verify that base pointers and derived pointers are still sane.
+-void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
++void PhaseChaitin::verify_base_ptrs(ResourceArea *a) const {
+ #ifdef ASSERT
+   Unique_Node_List worklist(a);
+   for (uint i = 0; i < _cfg.number_of_blocks(); i++) {
+@@ -288,17 +291,18 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
+               worklist.clear();
+               worklist.push(check);
+               uint k = 0;
+-              while( k < worklist.size() ) {
++              while (k < worklist.size()) {
+                 check = worklist.at(k);
+                 assert(check,"Bad base or derived pointer");
+                 // See PhaseChaitin::find_base_for_derived() for all cases.
+                 int isc = check->is_Copy();
+-                if( isc ) {
++                if (isc) {
+                   worklist.push(check->in(isc));
+-                } else if( check->is_Phi() ) {
+-                  for (uint m = 1; m < check->req(); m++)
++                } else if (check->is_Phi()) {
++                  for (uint m = 1; m < check->req(); m++) {
+                     worklist.push(check->in(m));
+-                } else if( check->is_Con() ) {
++                  }
++                } else if (check->is_Con()) {
+                   if (is_derived) {
+                     // Derived is NULL+offset
+                     assert(!is_derived || check->bottom_type()->is_ptr()->ptr() == TypePtr::Null,"Bad derived pointer");
+@@ -312,8 +316,8 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
+                              check->bottom_type()->is_ptr()->ptr() == TypePtr::Null,"Bad base pointer");
+                     }
+                   }
+-                } else if( check->bottom_type()->is_ptr()->_offset == 0 ) {
+-                  if(check->is_Proj() || check->is_Mach() &&
++                } else if (check->bottom_type()->is_ptr()->_offset == 0) {
++                  if (check->is_Proj() || check->is_Mach() &&
+                      (check->as_Mach()->ideal_Opcode() == Op_CreateEx ||
+                       check->as_Mach()->ideal_Opcode() == Op_ThreadLocal ||
+                       check->as_Mach()->ideal_Opcode() == Op_CMoveP ||
+@@ -347,7 +351,7 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
+ }
+ 
+ // Verify that graphs and base pointers are still sane.
+-void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const {
++void PhaseChaitin::verify(ResourceArea *a, bool verify_ifg) const {
+ #ifdef ASSERT
+   if( VerifyOpto || VerifyRegisterAllocator ) {
+     _cfg.verify();
+diff --git a/hotspot/src/share/vm/opto/reg_split.cpp b/hotspot/src/share/vm/opto/reg_split.cpp
+index a132f1f9f..de0c9fc7f 100644
+--- a/hotspot/src/share/vm/opto/reg_split.cpp
++++ b/hotspot/src/share/vm/opto/reg_split.cpp
+@@ -1250,10 +1250,12 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) {
+         // it contains no members which compress to defidx.  Finding such an
+         // instance may be a case to add liveout adjustment in compress_uf_map().
+         // See 5063219.
+-        uint member;
+-        IndexSetIterator isi(liveout);
+-        while ((member = isi.next()) != 0) {
+-          assert(defidx != _lrg_map.find_const(member), "Live out member has not been compressed");
++        if (!liveout->is_empty()) {
++          uint member;
++          IndexSetIterator isi(liveout);
++          while ((member = isi.next()) != 0) {
++            assert(defidx != _lrg_map.find_const(member), "Live out member has not been compressed");
++          }
+         }
+ #endif
+         Reachblock[slidx] = NULL;
diff --git a/Ddot-intrinsic-implement.patch b/Ddot-intrinsic-implement.patch
new file mode 100644
index 0000000000000000000000000000000000000000..2b091a517c5c4b04ee7549857a4bad3d4a7415d4
--- /dev/null
+++ b/Ddot-intrinsic-implement.patch
@@ -0,0 +1,479 @@
+diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+index 1e9b1cb91..c0fd37d05 100644
+--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
++++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp
+@@ -2061,6 +2061,14 @@ public:
+     ld_st(Vt, T, a, op1, op2);						\
+   }
+ 
++  void ld1_d(FloatRegister Vt, int index, const Address &a) {
++    starti;
++    assert(index == 0 || index == 1, "Index must be 0 or 1 for Vx.2D");
++    f(0, 31), f(index & 1, 30);
++    f(0b001101110, 29, 21), rf(a.index(), 16), f(0b1000, 15, 12);
++    f(0b01, 11, 10), rf(a.base(), 5), rf(Vt, 0);
++  }
++
+   INSN1(ld1,  0b001100010, 0b0111);
+   INSN2(ld1,  0b001100010, 0b1010);
+   INSN3(ld1,  0b001100010, 0b0110);
+@@ -2186,6 +2194,13 @@ public:
+ 
+ #undef INSN
+ 
++  void faddp_d(FloatRegister Vd, FloatRegister Vn) {
++    starti;
++    f(0b01, 31, 30), f(0b1111100, 29, 23), f(0b1, 22), f(0b11000, 21, 17);
++    f(0b0110110, 16, 10);
++    rf(Vn, 5), rf(Vd, 0);
++  }
++
+ #define INSN(NAME, opc)                                                                 \
+   void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \
+     starti;                                                                             \
+diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+index f2f85df60..873da580b 100644
+--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
++++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp
+@@ -2853,6 +2853,124 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp,
+   eor(crc, crc, tmp);
+ }
+ 
++/**
++ * Multiply and summation of 1 double-precision floating number pairs(sparse)
++ */
++void MacroAssembler::f2j_ddot_s1(Register dx, Register incx,
++                                 Register dy, Register incy) {
++  const FloatRegister tmpx = v2;
++  const FloatRegister tmpy = v3;
++
++  ld1_d(tmpx, 0, Address(dx, incx));
++  ld1_d(tmpy, 0, Address(dy, incy));
++  fmaddd(v0, tmpx, tmpy, v0);
++}
++
++/**
++ * Multiply and summation of 1 double-precision floating number pairs(dense)
++ */
++void MacroAssembler::f2j_ddot_d1(Register dx, Register dy, int size) {
++  const FloatRegister tmpx = v2;
++  const FloatRegister tmpy = v3;
++
++  ldrd(tmpx, post(dx, size));
++  ldrd(tmpy, post(dy, size));
++  fmaddd(v0, tmpx, tmpy, v0);
++}
++
++/**
++ * Multiply and summation of 4 double-precision floating numbers
++ */
++void MacroAssembler::f2j_ddot_d4(Register dx, Register dy) {
++  ld1(v2, v3, T2D, post(dx, 32));
++  ld1(v4, v5, T2D, post(dy, 32));
++  fmul(v2, T2D, v2, v4);
++  fmul(v3, T2D, v3, v5);
++  fadd(v0, T2D, v0, v2);
++  fadd(v6, T2D, v6, v3);
++}
++
++/**
++ * @param n         register containing the number of doubles in array
++ * @param dx        register pointing to input array
++ * @param incx      register containing step len for dx
++ * @param dy        register pointing to another input array
++ * @param incy      register containing step len for dy
++ * @param temp_reg  register containing loop variable
++ */
++void MacroAssembler::f2j_ddot(Register n, Register dx, Register incx,
++                              Register dy, Register incy, Register temp_reg) {
++  Label Ldot_EXIT, Ldot_S_BEGIN, Ldot_S1, Ldot_S10, Ldot_S4, Ldot_D_BEGIN,
++        Ldot_D1, Ldot_D10, Ldot_D4;
++
++  const int SZ = 8;
++
++    enter();
++    fmovd(v0, zr);
++    fmovd(v6, v0);
++
++    cmp(n, zr);
++    br(Assembler::LE, Ldot_EXIT);
++
++    cmp(incx, 1);
++    br(Assembler::NE, Ldot_S_BEGIN);
++    cmp(incy, 1);
++    br(Assembler::NE, Ldot_S_BEGIN);
++
++  BIND(Ldot_D_BEGIN);
++    asr(temp_reg, n, 2);
++    cmp(temp_reg, zr);
++    br(Assembler::LE, Ldot_D1);
++
++  BIND(Ldot_D4);
++    f2j_ddot_d4(dx, dy);
++    subs(temp_reg, temp_reg, 1);
++    br(Assembler::NE, Ldot_D4);
++
++    fadd(v0, T2D, v0, v6);
++    faddp_d(v0, v0);
++
++  BIND(Ldot_D1);
++    ands(temp_reg, n, 3);
++    br(Assembler::LE, Ldot_EXIT);
++
++  BIND(Ldot_D10);
++    f2j_ddot_d1(dx, dy, SZ);
++    subs(temp_reg, temp_reg, 1);
++    br(Assembler::NE, Ldot_D10);
++    leave();
++    ret(lr);
++
++  BIND(Ldot_S_BEGIN);
++    lsl(incx, incx, 3);
++    lsl(incy, incy, 3);
++
++    asr(temp_reg, n, 2);
++    cmp(temp_reg, zr);
++    br(Assembler::LE, Ldot_S1);
++
++  BIND(Ldot_S4);
++    f2j_ddot_s1(dx, incx, dy, incy);
++    f2j_ddot_s1(dx, incx, dy, incy);
++    f2j_ddot_s1(dx, incx, dy, incy);
++    f2j_ddot_s1(dx, incx, dy, incy);
++    subs(temp_reg, temp_reg, 1);
++    br(Assembler::NE, Ldot_S4);
++
++  BIND(Ldot_S1);
++    ands(temp_reg, n, 3);
++    br(Assembler::LE, Ldot_EXIT);
++
++  BIND(Ldot_S10);
++    f2j_ddot_s1(dx, incx, dy, incy);
++    subs(temp_reg, temp_reg, 1);
++    br(Assembler::NE, Ldot_S10);
++
++  BIND(Ldot_EXIT);
++    leave();
++    ret(lr);
++}
++
+ /**
+  * @param crc   register containing existing CRC (32-bit)
+  * @param buf   register pointing to input byte buffer (byte*)
+diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+index 388177589..1abc7e3b0 100644
+--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
++++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp
+@@ -1180,6 +1180,9 @@ public:
+         Register table0, Register table1, Register table2, Register table3,
+         bool upper = false);
+ 
++  void f2j_ddot(Register n, Register dx, Register incx,
++                  Register dy, Register incy, Register temp_reg);
++
+   void string_compare(Register str1, Register str2,
+ 		      Register cnt1, Register cnt2, Register result,
+ 		      Register tmp1);
+@@ -1236,6 +1239,11 @@ private:
+   // Uses rscratch2 if the address is not directly reachable
+   Address spill_address(int size, int offset, Register tmp=rscratch2);
+ 
++private:
++  void f2j_ddot_s1(Register dx, Register incx, Register dy, Register incy);
++  void f2j_ddot_d1(Register dx, Register dy, int size);
++  void f2j_ddot_d4(Register dx, Register dy);
++
+ public:
+   void spill(Register Rx, bool is64, int offset) {
+     if (is64) {
+diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+index 0d73c0c0c..337d5c1dd 100644
+--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
++++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp
+@@ -45,6 +45,7 @@
+ 
+ #include "stubRoutines_aarch64.hpp"
+ 
++
+ #ifdef COMPILER2
+ #include "opto/runtime.hpp"
+ #endif
+@@ -3220,6 +3221,39 @@ class StubGenerator: public StubCodeGenerator {
+     return start;
+   }
+ 
++  /**
++   *  Arguments:
++   *
++   * Inputs:
++   *   c_rarg0   - int n
++   *   c_rarg1   - double[] dx
++   *   c_rarg2   - int incx
++   *   c_rarg3   - double[] dy
++   *   c_rarg4   - int incy
++   *
++   * Output:
++   *       d0   - ddot result
++   *
++   */
++  address generate_ddotF2jBLAS() {
++    __ align(CodeEntryAlignment);
++    StubCodeMark mark(this, "StubRoutines", "f2jblas_ddot");
++
++    address start = __ pc();
++
++    const Register n    = c_rarg0;
++    const Register dx   = c_rarg1;
++    const Register incx = c_rarg2;
++    const Register dy   = c_rarg3;
++    const Register incy = c_rarg4;
++
++    BLOCK_COMMENT("Entry:");
++
++    __ f2j_ddot(n, dx, incx, dy, incy, rscratch2);
++
++    return start;
++  }
++
+   /**
+    *  Arguments:
+    *
+@@ -4262,6 +4296,10 @@ class StubGenerator: public StubCodeGenerator {
+       StubRoutines::_montgomerySquare = g.generate_multiply();
+     }
+ 
++    if (UseF2jBLASIntrinsics) {
++      StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS();
++    }
++
+     if (UseAESIntrinsics) {
+       StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock();
+       StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock();
+diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp
+index 148f9212e..6bd8dbedd 100644
+--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp
++++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp
+@@ -852,6 +852,12 @@
+    do_name(     implCompress_name,                                 "implCompress0")                                     \
+    do_signature(implCompress_signature,                            "([BI)V")                                            \
+                                                                                                                         \
++  /* support for com.github.fommil.netlib.F2jBLAS */                                                                    \
++  do_class(com_github_fommil_netlib_f2jblas,                       "com/github/fommil/netlib/F2jBLAS")                  \
++  do_intrinsic(_f2jblas_ddot, com_github_fommil_netlib_f2jblas, ddot_name, ddot_signature, F_R)                         \
++   do_name(     ddot_name,                                         "ddot")                                              \
++   do_signature(ddot_signature,                                    "(I[DI[DI)D")                                        \
++                                                                                                                        \
+   /* support for sun.security.provider.SHA2 */                                                                          \
+   do_class(sun_security_provider_sha2,                             "sun/security/provider/SHA2")                        \
+   do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R)          \
+diff --git a/hotspot/src/share/vm/oops/method.cpp b/hotspot/src/share/vm/oops/method.cpp
+index 24fae4d30..64cdae9c7 100644
+--- a/hotspot/src/share/vm/oops/method.cpp
++++ b/hotspot/src/share/vm/oops/method.cpp
+@@ -1281,7 +1281,9 @@ vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) {
+   // which does not use the class default class loader so we check for its loader here
+   InstanceKlass* ik = InstanceKlass::cast(holder);
+   if ((ik->class_loader() != NULL) && !SystemDictionary::is_ext_class_loader(ik->class_loader())) {
+-    return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
++    if (!EnableIntrinsicExternal) {
++      return vmSymbols::NO_SID;   // regardless of name, no intrinsics here
++    }
+   }
+ 
+   // see if the klass name is well-known:
+diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp
+index 9ef1c5e69..aa1b1ac3a 100644
+--- a/hotspot/src/share/vm/opto/escape.cpp
++++ b/hotspot/src/share/vm/opto/escape.cpp
+@@ -978,7 +978,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) {
+                   strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 ||
+                   strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 ||
+                   strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 ||
+-                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0)
++                  strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 ||
++                  strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0)
+                  ))) {
+             call->dump();
+             fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name));
+diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp
+index 89ebabe6f..5cbc0f012 100644
+--- a/hotspot/src/share/vm/opto/library_call.cpp
++++ b/hotspot/src/share/vm/opto/library_call.cpp
+@@ -335,6 +335,7 @@ class LibraryCallKit : public GraphKit {
+   bool inline_mulAdd();
+   bool inline_montgomeryMultiply();
+   bool inline_montgomerySquare();
++  bool inline_ddotF2jBLAS();
+ 
+   bool inline_profileBoolean();
+ };
+@@ -587,6 +588,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
+     if (!UseCRC32Intrinsics) return NULL;
+     break;
+ 
++  case vmIntrinsics::_f2jblas_ddot:
++    if (!UseF2jBLASIntrinsics) return NULL;
++    break;
++
+   case vmIntrinsics::_incrementExactI:
+   case vmIntrinsics::_addExactI:
+     if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL;
+@@ -983,6 +988,8 @@ bool LibraryCallKit::try_to_inline(int predicate) {
+ 
+   case vmIntrinsics::_profileBoolean:
+     return inline_profileBoolean();
++  case vmIntrinsics::_f2jblas_ddot:
++    return inline_ddotF2jBLAS();
+ 
+   default:
+     // If you get here, it may be that someone has added a new intrinsic
+@@ -6303,6 +6310,49 @@ bool LibraryCallKit::inline_updateBytesCRC32() {
+   return true;
+ }
+ 
++/**
++ * double com.github.fommil.netlib.F2jBLAS.ddot(int n, double[] dx, int incx, double[] dy, int incy)
++ */
++bool LibraryCallKit::inline_ddotF2jBLAS() {
++  assert(callee()->signature()->size() == 5, "update has 5 parameters");
++  Node* n    = argument(1);       // type: int
++  Node* dx   = argument(2);       // type: double[]
++  Node* incx = argument(3);       // type: int
++  Node* dy   = argument(4);       // type: double[]
++  Node* incy = argument(5);       // type: int
++
++  const Type* dx_type = dx->Value(&_gvn);
++  const Type* dy_type = dy->Value(&_gvn);
++  const TypeAryPtr* dx_top_src = dx_type->isa_aryptr();
++  const TypeAryPtr* dy_top_src = dy_type->isa_aryptr();
++  if (dx_top_src == NULL || dx_top_src->klass() == NULL ||
++      dy_top_src == NULL || dy_top_src->klass() == NULL) {
++    // failed array check
++    return false;
++  }
++
++  // Figure out the size and type of the elements we will be copying.
++  BasicType dx_elem = dx_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
++  BasicType dy_elem = dy_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type();
++  if (dx_elem != T_DOUBLE || dy_elem != T_DOUBLE) {
++    return false;
++  }
++
++  // 'dx_start' points to dx array + scaled offset
++  Node* dx_start = array_element_address(dx, intcon(0), dx_elem);
++  Node* dy_start = array_element_address(dy, intcon(0), dy_elem);
++
++  address stubAddr = StubRoutines::ddotF2jBLAS();
++  const char *stubName = "f2jblas_ddot";
++  Node* call;
++  call = make_runtime_call(RC_LEAF, OptoRuntime::ddotF2jBLAS_Type(),
++                           stubAddr, stubName, TypePtr::BOTTOM,
++                           n, dx_start, incx, dy_start, incy);
++  Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms));
++  set_result(result);
++  return true;
++}
++
+ /**
+  * Calculate CRC32 for ByteBuffer.
+  * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len)
+diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp
+index ba8f42e49..f1fe4d666 100644
+--- a/hotspot/src/share/vm/opto/runtime.cpp
++++ b/hotspot/src/share/vm/opto/runtime.cpp
+@@ -920,6 +920,30 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() {
+   return TypeFunc::make(domain, range);
+ }
+ 
++/**
++ * double ddot(int n, double *dx, int incx, double *dy, int incy)
++ */
++const TypeFunc* OptoRuntime::ddotF2jBLAS_Type() {
++  // create input type (domain)
++  int num_args = 5;
++  int argcnt = num_args;
++  const Type** fields = TypeTuple::fields(argcnt);
++  int argp = TypeFunc::Parms;
++  fields[argp++] = TypeInt::INT;        // n
++  fields[argp++] = TypeAryPtr::DOUBLES;    // dx
++  fields[argp++] = TypeInt::INT;        // incx
++  fields[argp++] = TypeAryPtr::DOUBLES;    // dy
++  fields[argp++] = TypeInt::INT;        // incy
++  assert(argp == TypeFunc::Parms + argcnt, "correct decoding");
++  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields);
++
++  // result type needed
++  fields = TypeTuple::fields(1);
++  fields[TypeFunc::Parms + 0] = Type::DOUBLE;
++  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields);
++  return TypeFunc::make(domain, range);
++}
++
+ // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int
+ const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() {
+   // create input type (domain)
+diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp
+index e3bdfdf9c..66d393c5c 100644
+--- a/hotspot/src/share/vm/opto/runtime.hpp
++++ b/hotspot/src/share/vm/opto/runtime.hpp
+@@ -317,6 +317,8 @@ private:
+ 
+   static const TypeFunc* updateBytesCRC32_Type();
+ 
++  static const TypeFunc* ddotF2jBLAS_Type();
++
+   // leaf on stack replacement interpreter accessor types
+   static const TypeFunc* osr_end_Type();
+ 
+diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp
+index 7b17e623b..520cc3187 100644
+--- a/hotspot/src/share/vm/runtime/globals.hpp
++++ b/hotspot/src/share/vm/runtime/globals.hpp
+@@ -743,6 +743,12 @@ class CommandLineFlags {
+   product(bool, UseCRC32Intrinsics, false,                                  \
+           "use intrinsics for java.util.zip.CRC32")                         \
+                                                                             \
++  experimental(bool, UseF2jBLASIntrinsics, false,                           \
++          "use intrinsics for com.github.fommil.netlib.F2jBLAS on aarch64") \
++                                                                            \
++  experimental(bool, EnableIntrinsicExternal, false,                        \
++          "enable intrinsics for methods of external packages")             \
++                                                                            \
+   develop(bool, TraceCallFixup, false,                                      \
+           "Trace all call fixups")                                          \
+                                                                             \
+diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp
+index d943248da..10f438bc5 100644
+--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp
++++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp
+@@ -136,6 +136,8 @@ address StubRoutines::_sha512_implCompressMB = NULL;
+ address StubRoutines::_updateBytesCRC32 = NULL;
+ address StubRoutines::_crc_table_adr = NULL;
+ 
++address StubRoutines::_ddotF2jBLAS = NULL;
++
+ address StubRoutines::_multiplyToLen = NULL;
+ address StubRoutines::_squareToLen = NULL;
+ address StubRoutines::_mulAdd = NULL;
+diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp
+index e18b9127d..a4eeb910d 100644
+--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp
++++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp
+@@ -214,6 +214,8 @@ class StubRoutines: AllStatic {
+   static address _updateBytesCRC32;
+   static address _crc_table_adr;
+ 
++  static address _ddotF2jBLAS;
++
+   static address _multiplyToLen;
+   static address _squareToLen;
+   static address _mulAdd;
+@@ -377,6 +379,8 @@ class StubRoutines: AllStatic {
+   static address updateBytesCRC32()    { return _updateBytesCRC32; }
+   static address crc_table_addr()      { return _crc_table_adr; }
+ 
++  static address ddotF2jBLAS()         { return _ddotF2jBLAS; }
++
+   static address multiplyToLen()       {return _multiplyToLen; }
+   static address squareToLen()         {return _squareToLen; }
+   static address mulAdd()              {return _mulAdd; }
diff --git a/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch b/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
new file mode 100644
index 0000000000000000000000000000000000000000..64e48a35b54811e73a848c976054413196dd6fa2
--- /dev/null
+++ b/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
@@ -0,0 +1,46 @@
+diff --git a/jdk/src/share/classes/java/lang/Long.java b/jdk/src/share/classes/java/lang/Long.java
+index 58c2cc3ba..7b6e14a97 100644
+--- a/jdk/src/share/classes/java/lang/Long.java
++++ b/jdk/src/share/classes/java/lang/Long.java
+@@ -812,12 +812,11 @@ public final class Long extends Number implements Comparable<Long> {
+         static final Long cache[];
+ 
+         static {
+-
++            int h = 127;
+             String longCacheHighPropValue =
+                 sun.misc.VM.getSavedProperty("java.lang.Long.LongCache.high");
+             if (longCacheHighPropValue != null) {
+                 // high value may be configured by property
+-                int h = 0;
+                 try {
+                     int i = Integer.parseInt(longCacheHighPropValue);
+                     i = Math.max(i, 127);
+@@ -826,21 +825,13 @@ public final class Long extends Number implements Comparable<Long> {
+                 } catch( NumberFormatException nfe) {
+                     // If the property cannot be parsed into an int, ignore it.
+                 }
+-                high = h;
+-                low = -h - 1;
+-                cache = new Long[(high - low) + 1];
+-                int j = low;
+-                for(int k = 0; k < cache.length; k++)
+-                    cache[k] = new Long(j++);
+-
+-            } else {
+-                low = -128;
+-                high = 127;
+-                cache = new Long[(high - low) + 1];
+-                int j = low;
+-                for(int k = 0; k < cache.length; k++)
+-                    cache[k] = new Long(j++);
+             }
++            high = h;
++            low = -h - 1;
++            cache = new Long[(high - low) + 1];
++            int j = low;
++            for(int k = 0; k < cache.length; k++)
++               cache[k] = new Long(j++);
+         }
+     }
+ 
diff --git a/java-1.8.0-openjdk.spec b/java-1.8.0-openjdk.spec
index 268ef8b9c3c874352e5dacce062e8212783a183c..fa6f155b51390bd82652f74139d10500ae66411f 100644
--- a/java-1.8.0-openjdk.spec
+++ b/java-1.8.0-openjdk.spec
@@ -915,7 +915,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r
 
 Name:    java-%{javaver}-%{origin}
 Version: %{javaver}.%{updatever}.%{buildver}
-Release: 0
+Release: 1
 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons
 # and this change was brought into RHEL-4. java-1.5.0-ibm packages
 # also included the epoch in their virtual provides. This created a
@@ -1037,6 +1037,13 @@ Patch95: 8205921-Optimizing-best-of-2-work-stealing-queue-selection.patch
 
 # 8u265
 Patch96: fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch
+Patch97: leaf-optimize-in-ParallelScanvageGC.patch
+Patch98: 8046294-Generate-the-4-byte-timestamp-randomly.patch
+Patch100: 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
+Patch102: fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
+Patch103: Ddot-intrinsic-implement.patch
+Patch104: 8234003-Improve-IndexSet-iteration.patch
+Patch105: 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
 
 #############################################
 #
@@ -1432,6 +1439,14 @@ pushd %{top_level_dir_name}
 %patch94 -p1
 %patch95 -p1
 %patch96 -p1
+%patch97 -p1
+%patch98 -p1
+%patch100 -p1
+%patch102 -p1
+%patch103 -p1
+%patch104 -p1
+%patch105 -p1
+
 
 popd
 
@@ -2051,6 +2066,16 @@ require "copy_jdk_configs.lua"
 %endif
 
 %changelog
+* Tue Aug 29 2020 jdkboy <guoge1@huawei.com> - 1:1.8.0.265-b10.1
+- Add leaf-optimize-in-ParallelScanvageGC.patch
+- Add 8046294-Generate-the-4-byte-timestamp-randomly.patch
+- Add 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch
+- Add fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch
+- Add Ddot-intrinsic-implement.patch
+- Add 8234003-Improve-IndexSet-iteration.patch
+- Add 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch
+- Remove prohibition-of-irreducible-loop-in-mergers.patch 
+
 * Tue Aug 25 2020 noah <hedongbo@huawei.com> - 1:1.8.0.265-b10.0
 - Update to aarch64-shenandoah-jdk8u-8u265-b01
 - add fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch
diff --git a/leaf-optimize-in-ParallelScanvageGC.patch b/leaf-optimize-in-ParallelScanvageGC.patch
new file mode 100644
index 0000000000000000000000000000000000000000..425f06d19ca42be693c71524dc725563c96e5316
--- /dev/null
+++ b/leaf-optimize-in-ParallelScanvageGC.patch
@@ -0,0 +1,210 @@
+diff --git a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
+index fba64e15f..1c92314f9 100644
+--- a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
++++ b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp
+@@ -131,6 +131,14 @@ inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t*
+  return __sync_val_compare_and_swap(dest, compare_value, exchange_value);
+ }
+ 
++inline intptr_t Atomic::relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value)
++{
++  intptr_t value = compare_value;
++  __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */false,
++                              __ATOMIC_RELAXED, __ATOMIC_RELAXED);
++  return value;
++}
++
+ inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value)
+ {
+   return (void *) cmpxchg_ptr((intptr_t) exchange_value,
+diff --git a/hotspot/src/share/vm/classfile/classFileParser.cpp b/hotspot/src/share/vm/classfile/classFileParser.cpp
+index 07d07e4f2..f001a94e7 100644
+--- a/hotspot/src/share/vm/classfile/classFileParser.cpp
++++ b/hotspot/src/share/vm/classfile/classFileParser.cpp
+@@ -4393,6 +4393,11 @@ void ClassFileParser::fill_oop_maps(instanceKlassHandle k,
+   OopMapBlock* this_oop_map = k->start_of_nonstatic_oop_maps();
+   const InstanceKlass* const super = k->superklass();
+   const unsigned int super_count = super ? super->nonstatic_oop_map_count() : 0;
++
++  const bool super_is_gc_leaf = super ? super->oop_is_gc_leaf() : true;
++  bool this_is_gc_leaf = super_is_gc_leaf && (nonstatic_oop_map_count == 0);
++  k->set_oop_is_gc_leaf(this_is_gc_leaf);
++
+   if (super_count > 0) {
+     // Copy maps from superklass
+     OopMapBlock* super_oop_map = super->start_of_nonstatic_oop_maps();
+diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+index b2de74d41..dde9ac426 100644
+--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp
+@@ -49,7 +49,12 @@ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) {
+       }
+       oopDesc::encode_store_heap_oop_not_null(p, o);
+     } else {
+-      push_depth(p);
++      // leaf object copy in advanced, reduce cost of push and pop
++      if (!o->klass()->oop_is_gc_leaf()) {
++        push_depth(p);
++      } else {
++        PSScavenge::copy_and_push_safe_barrier<T, false>(this, p);
++      }
+     }
+   }
+ }
+@@ -171,7 +176,15 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+     Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size);
+ 
+     // Now we have to CAS in the header.
++#ifdef AARCH64
++    // CAS with memory fence cost a lot within copy_to_survivor_space on aarch64.
++    // To minimize the cost, we use a normal CAS to do object forwarding, plus a
++    // memory fence only upon CAS succeeds. To further reduce the fence insertion,
++    // we can skip the fence insertion for leaf objects (objects don't have reference fields).
++    if (o->relax_cas_forward_to(new_obj, test_mark)) {
++#else
+     if (o->cas_forward_to(new_obj, test_mark)) {
++#endif
+       // We won any races, we "own" this object.
+       assert(new_obj == o->forwardee(), "Sanity");
+ 
+@@ -195,10 +208,13 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) {
+         push_depth(masked_o);
+         TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes);
+       } else {
+-        // we'll just push its contents
+-        new_obj->push_contents(this);
++        // leaf object don't have contents, never need push_contents
++        if (!o->klass()->oop_is_gc_leaf()) {
++          // we'll just push its contents
++          new_obj->push_contents(this);
++        }
+       }
+-    }  else {
++    } else {
+       // We lost, someone else "owns" this object
+       guarantee(o->is_forwarded(), "Object must be forwarded if the cas failed.");
+ 
+diff --git a/hotspot/src/share/vm/oops/klass.cpp b/hotspot/src/share/vm/oops/klass.cpp
+index 7fda7ce62..6e8f9acde 100644
+--- a/hotspot/src/share/vm/oops/klass.cpp
++++ b/hotspot/src/share/vm/oops/klass.cpp
+@@ -207,6 +207,8 @@ Klass::Klass() {
+   clear_modified_oops();
+   clear_accumulated_modified_oops();
+   _shared_class_path_index = -1;
++
++  set_oop_is_gc_leaf(false);
+ }
+ 
+ jint Klass::array_layout_helper(BasicType etype) {
+diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp
+index 22ae48f5c..4aea54795 100644
+--- a/hotspot/src/share/vm/oops/klass.hpp
++++ b/hotspot/src/share/vm/oops/klass.hpp
+@@ -177,6 +177,8 @@ class Klass : public Metadata {
+   jbyte _modified_oops;             // Card Table Equivalent (YC/CMS support)
+   jbyte _accumulated_modified_oops; // Mod Union Equivalent (CMS support)
+ 
++  bool _is_gc_leaf;
++
+ private:
+   // This is an index into FileMapHeader::_classpath_entry_table[], to
+   // associate this class with the JAR file where it's loaded from during
+@@ -569,6 +571,9 @@ protected:
+                                                     oop_is_typeArray_slow()); }
+   #undef assert_same_query
+ 
++  void set_oop_is_gc_leaf(bool is_gc_leaf)        { _is_gc_leaf = is_gc_leaf; }
++  inline bool oop_is_gc_leaf()              const { return _is_gc_leaf; }
++
+   // Access flags
+   AccessFlags access_flags() const         { return _access_flags;  }
+   void set_access_flags(AccessFlags flags) { _access_flags = flags; }
+diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp
+index a703a54ef..41a7bce4d 100644
+--- a/hotspot/src/share/vm/oops/oop.hpp
++++ b/hotspot/src/share/vm/oops/oop.hpp
+@@ -76,6 +76,9 @@ class oopDesc {
+ 
+   void    release_set_mark(markOop m);
+   markOop cas_set_mark(markOop new_mark, markOop old_mark);
++#ifdef AARCH64
++  markOop relax_cas_set_mark(markOop new_mark, markOop old_mark);
++#endif
+ 
+   // Used only to re-initialize the mark word (e.g., of promoted
+   // objects during a GC) -- requires a valid klass pointer
+@@ -317,6 +320,10 @@ class oopDesc {
+   void forward_to(oop p);
+   bool cas_forward_to(oop p, markOop compare);
+ 
++#ifdef AARCH64
++  bool relax_cas_forward_to(oop p, markOop compare);
++#endif
++
+ #if INCLUDE_ALL_GCS
+   // Like "forward_to", but inserts the forwarding pointer atomically.
+   // Exactly one thread succeeds in inserting the forwarding pointer, and
+diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp
+index d4c4d75c0..c3abdb128 100644
+--- a/hotspot/src/share/vm/oops/oop.inline.hpp
++++ b/hotspot/src/share/vm/oops/oop.inline.hpp
+@@ -76,6 +76,12 @@ inline markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) {
+   return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark);
+ }
+ 
++#ifdef AARCH64
++inline markOop oopDesc::relax_cas_set_mark(markOop new_mark, markOop old_mark) {
++  return (markOop)Atomic::relax_cmpxchg_ptr((intptr_t)new_mark, (volatile intptr_t*)&_mark, (intptr_t)old_mark);
++}
++#endif
++
+ inline Klass* oopDesc::klass() const {
+   if (UseCompressedClassPointers) {
+     return Klass::decode_klass_not_null(_metadata._compressed_klass);
+@@ -715,6 +721,30 @@ inline bool oopDesc::cas_forward_to(oop p, markOop compare) {
+   return cas_set_mark(m, compare) == compare;
+ }
+ 
++#ifdef AARCH64
++inline bool oopDesc::relax_cas_forward_to(oop p, markOop compare) {
++  assert(check_obj_alignment(p),
++         "forwarding to something not aligned");
++  assert(Universe::heap()->is_in_reserved(p),
++         "forwarding to something not in heap");
++  markOop m = markOopDesc::encode_pointer_as_mark(p);
++  assert(m->decode_pointer() == p, "encoding must be reversable");
++  markOop old_markoop = relax_cas_set_mark(m, compare);
++  // If CAS succeeded, we must ensure the copy visible to threads reading the forwardee.
++  // (We might delay the fence insertion till pushing contents to task stack as other threads
++  // only need to touch the copied object after stolen the task.)
++  if (old_markoop == compare) {
++    // Once the CAS succeeds, leaf object never needs to be visible to other threads (finished
++    // collection by current thread), so we can save the fence.
++    if (!p->klass()->oop_is_gc_leaf()) {
++      OrderAccess::fence();
++    }
++    return true;
++  }
++  return false;
++}
++#endif
++
+ // Note that the forwardee is not the same thing as the displaced_mark.
+ // The forwardee is used when copying during scavenge and mark-sweep.
+ // It does need to clear the low two locking- and GC-related bits.
+diff --git a/hotspot/src/share/vm/runtime/atomic.hpp b/hotspot/src/share/vm/runtime/atomic.hpp
+index 9ca5fce97..015178b61 100644
+--- a/hotspot/src/share/vm/runtime/atomic.hpp
++++ b/hotspot/src/share/vm/runtime/atomic.hpp
+@@ -94,6 +94,10 @@ class Atomic : AllStatic {
+                                      unsigned int compare_value);
+ 
+   inline static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
++#ifdef AARCH64
++  inline static intptr_t relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value);
++#endif
++
+   inline static void*    cmpxchg_ptr(void*    exchange_value, volatile void*     dest, void*    compare_value);
+ };
+ 
diff --git a/prohibition-of-irreducible-loop-in-mergers.patch b/prohibition-of-irreducible-loop-in-mergers.patch
deleted file mode 100644
index 7ed9665ab57c35ad90ecd0a71a498dfedf2e5a35..0000000000000000000000000000000000000000
--- a/prohibition-of-irreducible-loop-in-mergers.patch
+++ /dev/null
@@ -1,27 +0,0 @@
-From 34712f6bbc3c2c664ee641c78d4a2f8cfe427880 Mon Sep 17 00:00:00 2001
-Date: Fri, 28 Feb 2020 15:17:44 +0000
-Subject: [PATCH] prohibition of irreducible loop in mergers
-
-Summary: C2Compiler: irreducible loop should not enter merge_many_backedges
-LLT: NA
-Bug url: NA
----
- hotspot/src/share/vm/opto/loopnode.cpp | 2 +-
- 1 file changed, 1 insertion(+), 1 deletion(-)
-
-diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp
-index e2c0645cf8..bbb2e2bf98 100644
---- a/hotspot/src/share/vm/opto/loopnode.cpp
-+++ b/hotspot/src/share/vm/opto/loopnode.cpp
-@@ -1542,7 +1542,7 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
-   // If I am a shared header (multiple backedges), peel off the many
-   // backedges into a private merge point and use the merge point as
-   // the one true backedge.
--  if( _head->req() > 3 ) {
-+  if( _head->req() > 3 && !_irreducible) {
-     // Merge the many backedges into a single backedge but leave
-     // the hottest backedge as separate edge for the following peel.
-     merge_many_backedges( phase );
--- 
-2.12.3
-