diff --git a/8046294-Generate-the-4-byte-timestamp-randomly.patch b/8046294-Generate-the-4-byte-timestamp-randomly.patch new file mode 100644 index 0000000000000000000000000000000000000000..c0477ed090c392f6198df05cbff97293d89b704c --- /dev/null +++ b/8046294-Generate-the-4-byte-timestamp-randomly.patch @@ -0,0 +1,87 @@ +diff --git a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java +index 5f414c408..ce27f0df4 100644 +--- a/jdk/src/share/classes/sun/security/ssl/RandomCookie.java ++++ b/jdk/src/share/classes/sun/security/ssl/RandomCookie.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 1996, 2007, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 1996, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -41,21 +41,8 @@ final class RandomCookie { + byte random_bytes[]; // exactly 32 bytes + + RandomCookie(SecureRandom generator) { +- long temp = System.currentTimeMillis() / 1000; +- int gmt_unix_time; +- if (temp < Integer.MAX_VALUE) { +- gmt_unix_time = (int) temp; +- } else { +- gmt_unix_time = Integer.MAX_VALUE; // Whoops! +- } +- + random_bytes = new byte[32]; + generator.nextBytes(random_bytes); +- +- random_bytes[0] = (byte)(gmt_unix_time >> 24); +- random_bytes[1] = (byte)(gmt_unix_time >> 16); +- random_bytes[2] = (byte)(gmt_unix_time >> 8); +- random_bytes[3] = (byte)gmt_unix_time; + } + + RandomCookie(HandshakeInStream m) throws IOException { +@@ -68,22 +55,15 @@ final class RandomCookie { + } + + void print(PrintStream s) { +- int i, gmt_unix_time; +- +- gmt_unix_time = random_bytes[0] << 24; +- gmt_unix_time += random_bytes[1] << 16; +- gmt_unix_time += random_bytes[2] << 8; +- gmt_unix_time += random_bytes[3]; +- +- s.print("GMT: " + gmt_unix_time + " "); +- s.print("bytes = { "); +- +- for (i = 4; i < 32; i++) { +- if (i != 4) { +- s.print(", "); ++ s.print("random_bytes = {"); ++ for (int i = 0; i < 32; i++) { ++ int k = random_bytes[i] & 0xFF; ++ if (i != 0) { ++ s.print(' '); + } +- s.print(random_bytes[i] & 0x0ff); ++ s.print(Utilities.hexDigits[k >>> 4]); ++ s.print(Utilities.hexDigits[k & 0xf]); + } +- s.println(" }"); ++ s.println("}"); + } + } +diff --git a/jdk/src/share/classes/sun/security/ssl/Utilities.java b/jdk/src/share/classes/sun/security/ssl/Utilities.java +index aefb02c9a..9b267f6e1 100644 +--- a/jdk/src/share/classes/sun/security/ssl/Utilities.java ++++ b/jdk/src/share/classes/sun/security/ssl/Utilities.java +@@ -1,5 +1,5 @@ + /* +- * Copyright (c) 2012, Oracle and/or its affiliates. All rights reserved. ++ * Copyright (c) 2012, 2016, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it +@@ -33,6 +33,11 @@ import sun.net.util.IPAddressUtil; + * A utility class to share the static methods. + */ + final class Utilities { ++ /** ++ * hex digits ++ */ ++ static final char[] hexDigits = "0123456789ABCDEF".toCharArray(); ++ + /** + * Puts {@code hostname} into the {@code serverNames} list. + *

diff --git a/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch b/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch new file mode 100644 index 0000000000000000000000000000000000000000..2c65f8243d8406b2c03da315d0ba72189bd4aa9e --- /dev/null +++ b/8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch @@ -0,0 +1,28 @@ +diff --git a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp +index 65a441240..1e534d3da 100644 +--- a/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/frame_aarch64.cpp +@@ -71,10 +71,20 @@ bool frame::safe_for_sender(JavaThread *thread) { + return false; + } + +- // unextended sp must be within the stack and above or equal sp +- bool unextended_sp_safe = (unextended_sp < thread->stack_base()) && +- (unextended_sp >= sp); ++ // When we are running interpreted code the machine stack pointer, SP, is ++ // set low enough so that the Java expression stack can grow and shrink ++ // without ever exceeding the machine stack bounds. So, ESP >= SP. + ++ // When we call out of an interpreted method, SP is incremented so that ++ // the space between SP and ESP is removed. The SP saved in the callee's ++ // frame is the SP *before* this increment. So, when we walk a stack of ++ // interpreter frames the sender's SP saved in a frame might be less than ++ // the SP at the point of call. ++ ++ // So unextended sp must be within the stack but we need not to check ++ // that unextended sp >= sp ++ ++ bool unextended_sp_safe = (unextended_sp < thread->stack_base()); + if (!unextended_sp_safe) { + return false; + } diff --git a/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch b/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch new file mode 100644 index 0000000000000000000000000000000000000000..175682216553c831c1d0595ffc5ed073911d7c5f --- /dev/null +++ b/8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch @@ -0,0 +1,558 @@ +diff --git a/hotspot/src/share/vm/opto/chaitin.hpp b/hotspot/src/share/vm/opto/chaitin.hpp +index de6d443cd..abbd4449f 100644 +--- a/hotspot/src/share/vm/opto/chaitin.hpp ++++ b/hotspot/src/share/vm/opto/chaitin.hpp +@@ -111,9 +111,9 @@ public: + _msize_valid=1; + if (_is_vector) { + assert(!_fat_proj, "sanity"); +- _mask.verify_sets(_num_regs); ++ assert(_mask.is_aligned_sets(_num_regs), "mask is not aligned, adjacent sets"); + } else if (_num_regs == 2 && !_fat_proj) { +- _mask.verify_pairs(); ++ assert(_mask.is_aligned_pairs(), "mask is not aligned, adjacent pairs"); + } + #endif + } +diff --git a/hotspot/src/share/vm/opto/regmask.cpp b/hotspot/src/share/vm/opto/regmask.cpp +index 352ccfb9d..d92f09eb6 100644 +--- a/hotspot/src/share/vm/opto/regmask.cpp ++++ b/hotspot/src/share/vm/opto/regmask.cpp +@@ -74,7 +74,8 @@ int find_lowest_bit( uint32 mask ) { + } + + // Find highest 1, or return 32 if empty +-int find_hihghest_bit( uint32 mask ) { ++int find_highest_bit( uint32 mask ) { ++ assert(mask != 0, "precondition"); + int n = 0; + if( mask > 0xffff ) { + mask >>= 16; +@@ -167,13 +168,14 @@ OptoReg::Name RegMask::find_first_pair() const { + //------------------------------ClearToPairs----------------------------------- + // Clear out partial bits; leave only bit pairs + void RegMask::clear_to_pairs() { +- for( int i = 0; i < RM_SIZE; i++ ) { ++ assert(valid_watermarks(), "sanity"); ++ for( int i = _lwm; i < _hwm; i++ ) { + int bits = _A[i]; + bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair + bits |= (bits>>1); // Smear 1 hi-bit into a pair + _A[i] = bits; + } +- verify_pairs(); ++ assert(is_aligned_pairs(), "mask is not aligned, adjacent pairs"); + } + + //------------------------------SmearToPairs----------------------------------- +@@ -188,10 +190,14 @@ void RegMask::smear_to_pairs() { + verify_pairs(); + } + +-//------------------------------is_aligned_pairs------------------------------- ++bool RegMask::is_misaligned_pair() const { ++ return Size() == 2 && !is_aligned_pairs(); ++} ++ + bool RegMask::is_aligned_pairs() const { + // Assert that the register mask contains only bit pairs. +- for( int i = 0; i < RM_SIZE; i++ ) { ++ assert(valid_watermarks(), "sanity"); ++ for( int i = _lwm; i < _hwm; i++ ) { + int bits = _A[i]; + while( bits ) { // Check bits for pairing + int bit = bits & -bits; // Extract low bit +@@ -206,39 +212,28 @@ bool RegMask::is_aligned_pairs() const { + return true; + } + +-//------------------------------is_bound1-------------------------------------- +-// Return TRUE if the mask contains a single bit +-int RegMask::is_bound1() const { +- if( is_AllStack() ) return false; +- int bit = -1; // Set to hold the one bit allowed +- for( int i = 0; i < RM_SIZE; i++ ) { +- if( _A[i] ) { // Found some bits +- if( bit != -1 ) return false; // Already had bits, so fail +- bit = _A[i] & -_A[i]; // Extract 1 bit from mask +- if( bit != _A[i] ) return false; // Found many bits, so fail +- } +- } +- // True for both the empty mask and for a single bit +- return true; ++bool RegMask::is_bound1() const { ++ if (is_AllStack()) return false; ++ return Size() == 1; + } + + //------------------------------is_bound2-------------------------------------- + // Return TRUE if the mask contains an adjacent pair of bits and no other bits. +-int RegMask::is_bound_pair() const { ++bool RegMask::is_bound_pair() const { + if( is_AllStack() ) return false; +- ++ assert(valid_watermarks(), "sanity"); + int bit = -1; // Set to hold the one bit allowed +- for( int i = 0; i < RM_SIZE; i++ ) { +- if( _A[i] ) { // Found some bits +- if( bit != -1 ) return false; // Already had bits, so fail +- bit = _A[i] & -(_A[i]); // Extract 1 bit from mask +- if( (bit << 1) != 0 ) { // Bit pair stays in same word? ++ for( int i = _lwm; i <= _hwm; i++ ) { ++ if( _A[i] ) { // Found some bits ++ if( bit != -1) return false; // Already had bits, so fail ++ bit = _A[i] & -(_A[i]); // Extract 1 bit from mask ++ if( (bit << 1) != 0 ) { // Bit pair stays in same word? + if( (bit | (bit<<1)) != _A[i] ) +- return false; // Require adjacent bit pair and no more bits +- } else { // Else its a split-pair case ++ return false; // Require adjacent bit pair and no more bits ++ } else { // Else its a split-pair case + if( bit != _A[i] ) return false; // Found many bits, so fail +- i++; // Skip iteration forward +- if( i >= RM_SIZE || _A[i] != 1 ) ++ i++; // Skip iteration forward ++ if( i > _hwm || _A[i] != 1 ) + return false; // Require 1 lo bit in next word + } + } +@@ -247,31 +242,44 @@ int RegMask::is_bound_pair() const { + return true; + } + ++// Test for a single adjacent set of ideal register's size. ++bool RegMask::is_bound(uint ireg) const { ++ if (is_vector(ireg)) { ++ if (is_bound_set(num_registers(ireg))) ++ return true; ++ } else if (is_bound1() || is_bound_pair()) { ++ return true; ++ } ++ return false; ++} ++ ++ ++ + static int low_bits[3] = { 0x55555555, 0x11111111, 0x01010101 }; +-//------------------------------find_first_set--------------------------------- ++ + // Find the lowest-numbered register set in the mask. Return the + // HIGHEST register number in the set, or BAD if no sets. + // Works also for size 1. + OptoReg::Name RegMask::find_first_set(const int size) const { +- verify_sets(size); +- for (int i = 0; i < RM_SIZE; i++) { ++ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); ++ assert(valid_watermarks(), "sanity"); ++ for (int i = _lwm; i <= _hwm; i++) { + if (_A[i]) { // Found some bits +- int bit = _A[i] & -_A[i]; // Extract low bit + // Convert to bit number, return hi bit in pair +- return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+(size-1)); ++ return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(_A[i])+(size-1)); + } + } + return OptoReg::Bad; + } + +-//------------------------------clear_to_sets---------------------------------- + // Clear out partial bits; leave only aligned adjacent bit pairs + void RegMask::clear_to_sets(const int size) { + if (size == 1) return; + assert(2 <= size && size <= 8, "update low bits table"); + assert(is_power_of_2(size), "sanity"); ++ assert(valid_watermarks(), "sanity"); + int low_bits_mask = low_bits[size>>2]; +- for (int i = 0; i < RM_SIZE; i++) { ++ for (int i = _lwm; i <= _hwm; i++) { + int bits = _A[i]; + int sets = (bits & low_bits_mask); + for (int j = 1; j < size; j++) { +@@ -286,17 +294,17 @@ void RegMask::clear_to_sets(const int size) { + } + _A[i] = sets; + } +- verify_sets(size); ++ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); + } + +-//------------------------------smear_to_sets---------------------------------- + // Smear out partial bits to aligned adjacent bit sets + void RegMask::smear_to_sets(const int size) { + if (size == 1) return; + assert(2 <= size && size <= 8, "update low bits table"); + assert(is_power_of_2(size), "sanity"); ++ assert(valid_watermarks(), "sanity"); + int low_bits_mask = low_bits[size>>2]; +- for (int i = 0; i < RM_SIZE; i++) { ++ for (int i = _lwm; i <= _hwm; i++) { + int bits = _A[i]; + int sets = 0; + for (int j = 0; j < size; j++) { +@@ -312,17 +320,17 @@ void RegMask::smear_to_sets(const int size) { + } + _A[i] = sets; + } +- verify_sets(size); ++ assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); + } + +-//------------------------------is_aligned_set-------------------------------- ++// Assert that the register mask contains only bit sets. + bool RegMask::is_aligned_sets(const int size) const { + if (size == 1) return true; + assert(2 <= size && size <= 8, "update low bits table"); + assert(is_power_of_2(size), "sanity"); + int low_bits_mask = low_bits[size>>2]; +- // Assert that the register mask contains only bit sets. +- for (int i = 0; i < RM_SIZE; i++) { ++ assert(valid_watermarks(), "sanity"); ++ for (int i = _lwm; i <= _hwm; i++) { + int bits = _A[i]; + while (bits) { // Check bits for pairing + int bit = bits & -bits; // Extract low bit +@@ -339,14 +347,14 @@ bool RegMask::is_aligned_sets(const int size) const { + return true; + } + +-//------------------------------is_bound_set----------------------------------- + // Return TRUE if the mask contains one adjacent set of bits and no other bits. + // Works also for size 1. + int RegMask::is_bound_set(const int size) const { + if( is_AllStack() ) return false; + assert(1 <= size && size <= 8, "update low bits table"); ++ assert(valid_watermarks(), "sanity"); + int bit = -1; // Set to hold the one bit allowed +- for (int i = 0; i < RM_SIZE; i++) { ++ for (int i = _lwm; i <= _hwm; i++) { + if (_A[i] ) { // Found some bits + if (bit != -1) + return false; // Already had bits, so fail +@@ -364,7 +372,7 @@ int RegMask::is_bound_set(const int size) const { + int set = bit>>24; + set = set & -set; // Remove sign extension. + set = (((set << size) - 1) >> 8); +- if (i >= RM_SIZE || _A[i] != set) ++ if (i > _hwm || _A[i] != set) + return false; // Require expected low bits in next word + } + } +@@ -373,7 +381,6 @@ int RegMask::is_bound_set(const int size) const { + return true; + } + +-//------------------------------is_UP------------------------------------------ + // UP means register only, Register plus stack, or stack only is DOWN + bool RegMask::is_UP() const { + // Quick common case check for DOWN (any stack slot is legal) +@@ -386,22 +393,22 @@ bool RegMask::is_UP() const { + return true; + } + +-//------------------------------Size------------------------------------------- + // Compute size of register mask in bits + uint RegMask::Size() const { + extern uint8 bitsInByte[256]; + uint sum = 0; +- for( int i = 0; i < RM_SIZE; i++ ) ++ assert(valid_watermarks(), "sanity"); ++ for( int i = _lwm; i <= _hwm; i++ ) { + sum += + bitsInByte[(_A[i]>>24) & 0xff] + + bitsInByte[(_A[i]>>16) & 0xff] + + bitsInByte[(_A[i]>> 8) & 0xff] + + bitsInByte[ _A[i] & 0xff]; ++ } + return sum; + } + + #ifndef PRODUCT +-//------------------------------print------------------------------------------ + void RegMask::dump(outputStream *st) const { + st->print("["); + RegMask rm = *this; // Structure copy into local temp +diff --git a/hotspot/src/share/vm/opto/regmask.hpp b/hotspot/src/share/vm/opto/regmask.hpp +index 5ceebb3fb..6cef16ad7 100644 +--- a/hotspot/src/share/vm/opto/regmask.hpp ++++ b/hotspot/src/share/vm/opto/regmask.hpp +@@ -44,27 +44,12 @@ + # include "adfiles/adGlobals_ppc_64.hpp" + #endif + +-// Some fun naming (textual) substitutions: +-// +-// RegMask::get_low_elem() ==> RegMask::find_first_elem() +-// RegMask::Special ==> RegMask::Empty +-// RegMask::_flags ==> RegMask::is_AllStack() +-// RegMask::operator<<=() ==> RegMask::Insert() +-// RegMask::operator>>=() ==> RegMask::Remove() +-// RegMask::Union() ==> RegMask::OR +-// RegMask::Inter() ==> RegMask::AND +-// +-// OptoRegister::RegName ==> OptoReg::Name +-// +-// OptoReg::stack0() ==> _last_Mach_Reg or ZERO in core version +-// +-// numregs in chaitin ==> proper degree in chaitin + + //-------------Non-zero bit search methods used by RegMask--------------------- + // Find lowest 1, or return 32 if empty + int find_lowest_bit( uint32 mask ); + // Find highest 1, or return 32 if empty +-int find_hihghest_bit( uint32 mask ); ++int find_highest_bit( uint32 mask ); + + //------------------------------RegMask---------------------------------------- + // The ADL file describes how to print the machine-specific registers, as well +@@ -97,6 +82,12 @@ class RegMask VALUE_OBJ_CLASS_SPEC { + + public: + enum { CHUNK_SIZE = RM_SIZE*_WordBits }; ++ // The low and high water marks represents the lowest and highest word ++ // that might contain set register mask bits, respectively. We guarantee ++ // that there are no bits in words outside this range, but any word at ++ // and between the two marks can still be 0. ++ int _lwm; ++ int _hwm; + + // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits. + // Also, consider the maximum alignment size for a normally allocated +@@ -126,13 +117,21 @@ public: + # define BODY(I) _A[I] = a##I; + FORALL_BODY + # undef BODY ++ _lwm = 0; ++ _hwm = RM_SIZE - 1; ++ while (_hwm > 0 && _A[_hwm] == 0) _hwm--; ++ while ((_lwm < _hwm) && _A[_lwm] == 0) _lwm++; ++ assert(valid_watermarks(), "post-condition"); + } + + // Handy copying constructor + RegMask( RegMask *rm ) { +-# define BODY(I) _A[I] = rm->_A[I]; +- FORALL_BODY +-# undef BODY ++ _hwm = rm->_hwm; ++ _lwm = rm->_lwm; ++ for (int i = 0; i < RM_SIZE; i++) { ++ _A[i] = rm->_A[i]; ++ } ++ assert(valid_watermarks(), "post-condition"); + } + + // Construct an empty mask +@@ -162,30 +161,36 @@ public: + + // Test for being a not-empty mask. + int is_NotEmpty( ) const { ++ assert(valid_watermarks(), "sanity"); + int tmp = 0; +-# define BODY(I) tmp |= _A[I]; +- FORALL_BODY +-# undef BODY ++ for (int i = _lwm; i <= _hwm; i++) { ++ tmp |= _A[i]; ++ } + return tmp; + } + + // Find lowest-numbered register from mask, or BAD if mask is empty. + OptoReg::Name find_first_elem() const { +- int base, bits; +-# define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else +- FORALL_BODY +-# undef BODY +- { base = OptoReg::Bad; bits = 1<<0; } +- return OptoReg::Name(base + find_lowest_bit(bits)); ++ assert(valid_watermarks(), "sanity"); ++ for (int i = _lwm; i <= _hwm; i++) { ++ int bits = _A[i]; ++ if (bits) { ++ return OptoReg::Name((i<<_LogWordBits) + find_lowest_bit(bits)); ++ } ++ } ++ return OptoReg::Name(OptoReg::Bad); + } ++ + // Get highest-numbered register from mask, or BAD if mask is empty. + OptoReg::Name find_last_elem() const { +- int base, bits; +-# define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else +- FORALL_BODY +-# undef BODY +- { base = OptoReg::Bad; bits = 1<<0; } +- return OptoReg::Name(base + find_hihghest_bit(bits)); ++ assert(valid_watermarks(), "sanity"); ++ for (int i = _hwm; i >= _lwm; i--) { ++ int bits = _A[i]; ++ if (bits) { ++ return OptoReg::Name((i<<_LogWordBits) + find_highest_bit(bits)); ++ } ++ } ++ return OptoReg::Name(OptoReg::Bad); + } + + // Find the lowest-numbered register pair in the mask. Return the +@@ -199,25 +204,34 @@ public: + void smear_to_pairs(); + // Verify that the mask contains only aligned adjacent bit pairs + void verify_pairs() const { assert( is_aligned_pairs(), "mask is not aligned, adjacent pairs" ); } ++ ++#ifdef ASSERT ++ // Verify watermarks are sane, i.e., within bounds and that no ++ // register words below or above the watermarks have bits set. ++ bool valid_watermarks() const { ++ assert(_hwm >= 0 && _hwm < RM_SIZE, err_msg("_hwm out of range: %d", _hwm)); ++ assert(_lwm >= 0 && _lwm < RM_SIZE, err_msg("_lwm out of range: %d", _lwm)); ++ for (int i = 0; i < _lwm; i++) { ++ assert(_A[i] == 0, err_msg("_lwm too high: %d regs at: %d", _lwm, i)); ++ } ++ for (int i = _hwm + 1; i < RM_SIZE; i++) { ++ assert(_A[i] == 0, err_msg("_hwm too low: %d regs at: %d", _hwm, i)); ++ } ++ return true; ++ } ++#endif // !ASSERT ++ + // Test that the mask contains only aligned adjacent bit pairs + bool is_aligned_pairs() const; + + // mask is a pair of misaligned registers +- bool is_misaligned_pair() const { return Size()==2 && !is_aligned_pairs(); } ++ bool is_misaligned_pair() const; + // Test for single register +- int is_bound1() const; ++ bool is_bound1() const; + // Test for a single adjacent pair +- int is_bound_pair() const; ++ bool is_bound_pair() const; + // Test for a single adjacent set of ideal register's size. +- int is_bound(uint ireg) const { +- if (is_vector(ireg)) { +- if (is_bound_set(num_registers(ireg))) +- return true; +- } else if (is_bound1() || is_bound_pair()) { +- return true; +- } +- return false; +- } ++ bool is_bound(uint ireg) const; + + // Find the lowest-numbered register set in the mask. Return the + // HIGHEST register number in the set, or BAD if no sets. +@@ -228,8 +242,6 @@ public: + void clear_to_sets(const int size); + // Smear out partial bits to aligned adjacent bit sets. + void smear_to_sets(const int size); +- // Verify that the mask contains only aligned adjacent bit sets +- void verify_sets(int size) const { assert(is_aligned_sets(size), "mask is not aligned, adjacent sets"); } + // Test that the mask contains only aligned adjacent bit sets + bool is_aligned_sets(const int size) const; + +@@ -244,11 +256,14 @@ public: + + // Fast overlap test. Non-zero if any registers in common. + int overlap( const RegMask &rm ) const { +- return +-# define BODY(I) (_A[I] & rm._A[I]) | +- FORALL_BODY +-# undef BODY +- 0 ; ++ assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); ++ int hwm = MIN2(_hwm, rm._hwm); ++ int lwm = MAX2(_lwm, rm._lwm); ++ int result = 0; ++ for (int i = lwm; i <= hwm; i++) { ++ result |= _A[i] & rm._A[i]; ++ } ++ return result; + } + + // Special test for register pressure based splitting +@@ -257,22 +272,29 @@ public: + + // Clear a register mask + void Clear( ) { +-# define BODY(I) _A[I] = 0; +- FORALL_BODY +-# undef BODY ++ _lwm = RM_SIZE - 1; ++ _hwm = 0; ++ memset(_A, 0, sizeof(int)*RM_SIZE); ++ assert(valid_watermarks(), "sanity"); + } + + // Fill a register mask with 1's + void Set_All( ) { +-# define BODY(I) _A[I] = -1; +- FORALL_BODY +-# undef BODY ++ _lwm = 0; ++ _hwm = RM_SIZE - 1; ++ memset(_A, 0xFF, sizeof(int)*RM_SIZE); ++ assert(valid_watermarks(), "sanity"); + } + + // Insert register into mask + void Insert( OptoReg::Name reg ) { +- assert( reg < CHUNK_SIZE, "" ); +- _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1))); ++ assert(reg < CHUNK_SIZE, "sanity"); ++ assert(valid_watermarks(), "pre-condition"); ++ int index = reg>>_LogWordBits; ++ if (index > _hwm) _hwm = index; ++ if (index < _lwm) _lwm = index; ++ _A[index] |= (1<<(reg&(_WordBits-1))); ++ assert(valid_watermarks(), "post-condition"); + } + + // Remove register from mask +@@ -283,23 +305,38 @@ public: + + // OR 'rm' into 'this' + void OR( const RegMask &rm ) { +-# define BODY(I) this->_A[I] |= rm._A[I]; +- FORALL_BODY +-# undef BODY ++ assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); ++ // OR widens the live range ++ if (_lwm > rm._lwm) _lwm = rm._lwm; ++ if (_hwm < rm._hwm) _hwm = rm._hwm; ++ for (int i = _lwm; i <= _hwm; i++) { ++ _A[i] |= rm._A[i]; ++ } ++ assert(valid_watermarks(), "sanity"); + } + + // AND 'rm' into 'this' + void AND( const RegMask &rm ) { +-# define BODY(I) this->_A[I] &= rm._A[I]; +- FORALL_BODY +-# undef BODY ++ assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); ++ // Do not evaluate words outside the current watermark range, as they are ++ // already zero and an &= would not change that ++ for (int i = _lwm; i <= _hwm; i++) { ++ _A[i] &= rm._A[i]; ++ } ++ // Narrow the watermarks if &rm spans a narrower range. ++ // Update after to ensure non-overlapping words are zeroed out. ++ if (_lwm < rm._lwm) _lwm = rm._lwm; ++ if (_hwm > rm._hwm) _hwm = rm._hwm; + } + + // Subtract 'rm' from 'this' + void SUBTRACT( const RegMask &rm ) { +-# define BODY(I) _A[I] &= ~rm._A[I]; +- FORALL_BODY +-# undef BODY ++ assert(valid_watermarks() && rm.valid_watermarks(), "sanity"); ++ int hwm = MIN2(_hwm, rm._hwm); ++ int lwm = MAX2(_lwm, rm._lwm); ++ for (int i = lwm; i <= hwm; i++) { ++ _A[i] &= ~rm._A[i]; ++ } + } + + // Compute size of register mask: number of bits diff --git a/8234003-Improve-IndexSet-iteration.patch b/8234003-Improve-IndexSet-iteration.patch new file mode 100644 index 0000000000000000000000000000000000000000..aaf31c7473d0bb247acbc2a2e1d49c04995b2a81 --- /dev/null +++ b/8234003-Improve-IndexSet-iteration.patch @@ -0,0 +1,1186 @@ +diff --git a/hotspot/src/share/vm/opto/chaitin.cpp b/hotspot/src/share/vm/opto/chaitin.cpp +index ec318c515..b3b9eb39a 100644 +--- a/hotspot/src/share/vm/opto/chaitin.cpp ++++ b/hotspot/src/share/vm/opto/chaitin.cpp +@@ -1038,11 +1038,13 @@ void PhaseChaitin::set_was_low() { + // low-degree neighbors when determining if this guy colors. + int briggs_degree = 0; + IndexSet *s = _ifg->neighbors(i); +- IndexSetIterator elements(s); +- uint lidx; +- while((lidx = elements.next()) != 0) { +- if( !lrgs(lidx).lo_degree() ) +- briggs_degree += MAX2(size,lrgs(lidx).num_regs()); ++ if (!s->is_empty()) { ++ IndexSetIterator elements(s); ++ uint lidx; ++ while((lidx = elements.next()) != 0) { ++ if( !lrgs(lidx).lo_degree() ) ++ briggs_degree += MAX2(size,lrgs(lidx).num_regs()); ++ } + } + if( briggs_degree < lrgs(i).degrees_of_freedom() ) + lrgs(i)._was_lo = 1; // Low degree via the briggs assertion +@@ -1118,18 +1120,20 @@ void PhaseChaitin::Pre_Simplify( ) { + // list. Note that 'degree' can only fall and 'numregs' is + // unchanged by this action. Thus the two are equal at most once, + // so LRGs hit the lo-degree worklists at most once. +- IndexSetIterator elements(adj); +- uint neighbor; +- while ((neighbor = elements.next()) != 0) { +- LRG *n = &lrgs(neighbor); +- assert( _ifg->effective_degree(neighbor) == n->degree(), "" ); +- +- // Check for just becoming of-low-degree +- if( n->just_lo_degree() && !n->_has_copy ) { +- assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); +- // Put on lo-degree list +- n->_next = lo_no_copy; +- lo_no_copy = neighbor; ++ if (!adj->is_empty()) { ++ IndexSetIterator elements(adj); ++ uint neighbor; ++ while ((neighbor = elements.next()) != 0) { ++ LRG *n = &lrgs(neighbor); ++ assert(_ifg->effective_degree(neighbor) == n->degree(), ""); ++ ++ // Check for just becoming of-low-degree ++ if (n->just_lo_degree() && !n->_has_copy) { ++ assert(!(*_ifg->_yanked)[neighbor], "Cannot move to lo degree twice"); ++ // Put on lo-degree list ++ n->_next = lo_no_copy; ++ lo_no_copy = neighbor; ++ } + } + } + } // End of while lo-degree no_copy worklist not empty +@@ -1159,7 +1163,7 @@ void PhaseChaitin::Simplify( ) { + lrgs(lo)._next = _simplified; + _simplified = lo; + // If this guy is "at risk" then mark his current neighbors +- if( lrgs(lo)._at_risk ) { ++ if (lrgs(lo)._at_risk && !_ifg->neighbors(lo)->is_empty()) { + IndexSetIterator elements(_ifg->neighbors(lo)); + uint datum; + while ((datum = elements.next()) != 0) { +@@ -1168,7 +1172,10 @@ void PhaseChaitin::Simplify( ) { + } + + // Yank this guy from the IFG. +- IndexSet *adj = _ifg->remove_node( lo ); ++ IndexSet *adj = _ifg->remove_node(lo); ++ if (adj->is_empty()) { ++ continue; ++ } + + // If any neighbors' degrees fall below their number of + // allowed registers, then put that neighbor on the low degree +@@ -1187,13 +1194,16 @@ void PhaseChaitin::Simplify( ) { + + // Check for just becoming of-low-degree just counting registers. + // _must_spill live ranges are already on the low degree list. +- if( n->just_lo_degree() && !n->_must_spill ) { +- assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice"); ++ if (n->just_lo_degree() && !n->_must_spill) { ++ assert(!(*_ifg->_yanked)[neighbor], "Cannot move to lo degree twice"); + // Pull from hi-degree list + uint prev = n->_prev; + uint next = n->_next; +- if( prev ) lrgs(prev)._next = next; +- else _hi_degree = next; ++ if (prev) { ++ lrgs(prev)._next = next; ++ } else { ++ _hi_degree = next; ++ } + lrgs(next)._prev = prev; + n->_next = _lo_degree; + _lo_degree = neighbor; +@@ -1304,7 +1314,7 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + + // Check for "at_risk" LRG's + uint risk_lrg = _lrg_map.find(lrg._risk_bias); +- if( risk_lrg != 0 ) { ++ if( risk_lrg != 0 && !_ifg->neighbors(risk_lrg)->is_empty()) { + // Walk the colored neighbors of the "at_risk" candidate + // Choose a color which is both legal and already taken by a neighbor + // of the "at_risk" candidate in order to improve the chances of the +@@ -1320,9 +1330,9 @@ OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) { + } + + uint copy_lrg = _lrg_map.find(lrg._copy_bias); +- if( copy_lrg != 0 ) { ++ if (copy_lrg != 0) { + // If he has a color, +- if( !(*(_ifg->_yanked))[copy_lrg] ) { ++ if (!(*(_ifg->_yanked))[copy_lrg]) { + OptoReg::Name reg = lrgs(copy_lrg).reg(); + // And it is legal for you, + if (is_legal_reg(lrg, reg, chunk)) +@@ -1420,41 +1430,43 @@ uint PhaseChaitin::Select( ) { + + // Remove neighbor colors + IndexSet *s = _ifg->neighbors(lidx); +- + debug_only(RegMask orig_mask = lrg->mask();) +- IndexSetIterator elements(s); +- uint neighbor; +- while ((neighbor = elements.next()) != 0) { +- // Note that neighbor might be a spill_reg. In this case, exclusion +- // of its color will be a no-op, since the spill_reg chunk is in outer +- // space. Also, if neighbor is in a different chunk, this exclusion +- // will be a no-op. (Later on, if lrg runs out of possible colors in +- // its chunk, a new chunk of color may be tried, in which case +- // examination of neighbors is started again, at retry_next_chunk.) +- LRG &nlrg = lrgs(neighbor); +- OptoReg::Name nreg = nlrg.reg(); +- // Only subtract masks in the same chunk +- if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) { ++ ++ if (!s->is_empty()) { ++ IndexSetIterator elements(s); ++ uint neighbor; ++ while ((neighbor = elements.next()) != 0) { ++ // Note that neighbor might be a spill_reg. In this case, exclusion ++ // of its color will be a no-op, since the spill_reg chunk is in outer ++ // space. Also, if neighbor is in a different chunk, this exclusion ++ // will be a no-op. (Later on, if lrg runs out of possible colors in ++ // its chunk, a new chunk of color may be tried, in which case ++ // examination of neighbors is started again, at retry_next_chunk.) ++ LRG &nlrg = lrgs(neighbor); ++ OptoReg::Name nreg = nlrg.reg(); ++ // Only subtract masks in the same chunk ++ if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) { + #ifndef PRODUCT +- uint size = lrg->mask().Size(); +- RegMask rm = lrg->mask(); ++ uint size = lrg->mask().Size(); ++ RegMask rm = lrg->mask(); + #endif +- lrg->SUBTRACT(nlrg.mask()); ++ lrg->SUBTRACT(nlrg.mask()); + #ifndef PRODUCT +- if (trace_spilling() && lrg->mask().Size() != size) { +- ttyLocker ttyl; +- tty->print("L%d ", lidx); +- rm.dump(); +- tty->print(" intersected L%d ", neighbor); +- nlrg.mask().dump(); +- tty->print(" removed "); +- rm.SUBTRACT(lrg->mask()); +- rm.dump(); +- tty->print(" leaving "); +- lrg->mask().dump(); +- tty->cr(); +- } ++ if (trace_spilling() && lrg->mask().Size() != size) { ++ ttyLocker ttyl; ++ tty->print("L%d ", lidx); ++ rm.dump(); ++ tty->print(" intersected L%d ", neighbor); ++ nlrg.mask().dump(); ++ tty->print(" removed "); ++ rm.SUBTRACT(lrg->mask()); ++ rm.dump(); ++ tty->print(" leaving "); ++ lrg->mask().dump(); ++ tty->cr(); ++ } + #endif ++ } + } + } + //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness"); +@@ -1827,7 +1839,7 @@ bool PhaseChaitin::stretch_base_pointer_live_ranges(ResourceArea *a) { + + // Found a safepoint? + JVMState *jvms = n->jvms(); +- if( jvms ) { ++ if (jvms && !liveout.is_empty()) { + // Now scan for a live derived pointer + IndexSetIterator elements(&liveout); + uint neighbor; +@@ -1983,12 +1995,14 @@ void PhaseChaitin::dump(const Block *b) const { + // Print live-out info at end of block + if( _live ) { + tty->print("Liveout: "); +- IndexSet *live = _live->live(b); +- IndexSetIterator elements(live); + tty->print("{"); +- uint i; +- while ((i = elements.next()) != 0) { +- tty->print("L%d ", _lrg_map.find_const(i)); ++ IndexSet *live = _live->live(b); ++ if (!live->is_empty()) { ++ IndexSetIterator elements(live); ++ uint i; ++ while ((i = elements.next()) != 0) { ++ tty->print("L%d ", _lrg_map.find_const(i)); ++ } + } + tty->print_cr("}"); + } +diff --git a/hotspot/src/share/vm/opto/coalesce.cpp b/hotspot/src/share/vm/opto/coalesce.cpp +index c675445bf..988a45ec2 100644 +--- a/hotspot/src/share/vm/opto/coalesce.cpp ++++ b/hotspot/src/share/vm/opto/coalesce.cpp +@@ -602,29 +602,40 @@ void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, + // Some original neighbors of lr1 might have gone away + // because the constrained register mask prevented them. + // Remove lr1 from such neighbors. +- IndexSetIterator one(n_lr1); +- uint neighbor; ++ uint neighbor = 0; + LRG &lrg1 = lrgs(lr1); +- while ((neighbor = one.next()) != 0) +- if( !_ulr.member(neighbor) ) +- if( _phc._ifg->neighbors(neighbor)->remove(lr1) ) +- lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) ); ++ ++ if (!n_lr1->is_empty()) { ++ IndexSetIterator one(n_lr1); ++ while ((neighbor = one.next()) != 0) ++ if (!_ulr.member(neighbor)) ++ if (_phc._ifg->neighbors(neighbor)->remove(lr1)) ++ lrgs(neighbor).inc_degree(-lrg1.compute_degree(lrgs(neighbor))); ++ } + + + // lr2 is now called (coalesced into) lr1. + // Remove lr2 from the IFG. +- IndexSetIterator two(n_lr2); + LRG &lrg2 = lrgs(lr2); +- while ((neighbor = two.next()) != 0) +- if( _phc._ifg->neighbors(neighbor)->remove(lr2) ) +- lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) ); ++ if (!n_lr2->is_empty()) { ++ IndexSetIterator two(n_lr2); ++ while ((neighbor = two.next()) != 0) { ++ if (_phc._ifg->neighbors(neighbor)->remove(lr2)) { ++ lrgs(neighbor).inc_degree(-lrg2.compute_degree(lrgs(neighbor))); ++ } ++ } ++ } + + // Some neighbors of intermediate copies now interfere with the + // combined live range. +- IndexSetIterator three(&_ulr); +- while ((neighbor = three.next()) != 0) +- if( _phc._ifg->neighbors(neighbor)->insert(lr1) ) +- lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) ); ++ if (!_ulr.is_empty()) { ++ IndexSetIterator three(&_ulr); ++ while ((neighbor = three.next()) != 0) { ++ if (_phc._ifg->neighbors(neighbor)->insert(lr1)) { ++ lrgs(neighbor).inc_degree(lrg1.compute_degree(lrgs(neighbor))); ++ } ++ } ++ } + } + + static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) { +diff --git a/hotspot/src/share/vm/opto/ifg.cpp b/hotspot/src/share/vm/opto/ifg.cpp +index 3b33aa7a9..39c0e0155 100644 +--- a/hotspot/src/share/vm/opto/ifg.cpp ++++ b/hotspot/src/share/vm/opto/ifg.cpp +@@ -94,11 +94,13 @@ void PhaseIFG::SquareUp() { + assert( !_is_square, "only on triangular" ); + + // Simple transpose +- for( uint i = 0; i < _maxlrg; i++ ) { +- IndexSetIterator elements(&_adjs[i]); +- uint datum; +- while ((datum = elements.next()) != 0) { +- _adjs[datum].insert( i ); ++ for (uint i = 0; i < _maxlrg; i++) { ++ if (!_adjs[i].is_empty()) { ++ IndexSetIterator elements(&_adjs[i]); ++ uint datum; ++ while ((datum = elements.next()) != 0) { ++ _adjs[datum].insert(i); ++ } + } + } + _is_square = true; +@@ -122,44 +124,52 @@ int PhaseIFG::test_edge_sq( uint a, uint b ) const { + } + + // Union edges of B into A +-void PhaseIFG::Union( uint a, uint b ) { ++void PhaseIFG::Union(uint a, uint b) { + assert( _is_square, "only on square" ); + IndexSet *A = &_adjs[a]; +- IndexSetIterator b_elements(&_adjs[b]); +- uint datum; +- while ((datum = b_elements.next()) != 0) { +- if(A->insert(datum)) { +- _adjs[datum].insert(a); +- lrgs(a).invalid_degree(); +- lrgs(datum).invalid_degree(); ++ if (!_adjs[b].is_empty()) { ++ IndexSetIterator b_elements(&_adjs[b]); ++ uint datum; ++ while ((datum = b_elements.next()) != 0) { ++ if (A->insert(datum)) { ++ _adjs[datum].insert(a); ++ lrgs(a).invalid_degree(); ++ lrgs(datum).invalid_degree(); ++ } + } + } + } + + // Yank a Node and all connected edges from the IFG. Return a + // list of neighbors (edges) yanked. +-IndexSet *PhaseIFG::remove_node( uint a ) { ++IndexSet *PhaseIFG::remove_node(uint a) { + assert( _is_square, "only on square" ); + assert( !_yanked->test(a), "" ); + _yanked->set(a); + + // I remove the LRG from all neighbors. +- IndexSetIterator elements(&_adjs[a]); + LRG &lrg_a = lrgs(a); +- uint datum; +- while ((datum = elements.next()) != 0) { +- _adjs[datum].remove(a); +- lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) ); ++ if (!_adjs[a].is_empty()) { ++ IndexSetIterator elements(&_adjs[a]); ++ uint datum; ++ while ((datum = elements.next()) != 0) { ++ _adjs[datum].remove(a); ++ lrgs(datum).inc_degree(-lrg_a.compute_degree(lrgs(datum))); ++ } + } + return neighbors(a); + } + + // Re-insert a yanked Node. +-void PhaseIFG::re_insert( uint a ) { ++void PhaseIFG::re_insert(uint a) { + assert( _is_square, "only on square" ); + assert( _yanked->test(a), "" ); + (*_yanked) >>= a; + ++ if (_adjs[a].is_empty()) { ++ return; ++ } ++ + IndexSetIterator elements(&_adjs[a]); + uint datum; + while ((datum = elements.next()) != 0) { +@@ -173,7 +183,7 @@ void PhaseIFG::re_insert( uint a ) { + // mis-aligned (or for Fat-Projections, not-adjacent) then we have to + // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why + // this is so. +-int LRG::compute_degree( LRG &l ) const { ++int LRG::compute_degree(LRG &l) const { + int tmp; + int num_regs = _num_regs; + int nregs = l.num_regs(); +@@ -188,14 +198,18 @@ int LRG::compute_degree( LRG &l ) const { + // mis-aligned (or for Fat-Projections, not-adjacent) then we have to + // MULTIPLY the sizes. Inspect Brigg's thesis on register pairs to see why + // this is so. +-int PhaseIFG::effective_degree( uint lidx ) const { ++int PhaseIFG::effective_degree(uint lidx) const { ++ IndexSet *s = neighbors(lidx); ++ if (s->is_empty()) { ++ return 0; ++ } ++ + int eff = 0; + int num_regs = lrgs(lidx).num_regs(); + int fat_proj = lrgs(lidx)._fat_proj; +- IndexSet *s = neighbors(lidx); + IndexSetIterator elements(s); + uint nidx; +- while((nidx = elements.next()) != 0) { ++ while ((nidx = elements.next()) != 0) { + LRG &lrgn = lrgs(nidx); + int nregs = lrgn.num_regs(); + eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj? +@@ -210,14 +224,16 @@ int PhaseIFG::effective_degree( uint lidx ) const { + void PhaseIFG::dump() const { + tty->print_cr("-- Interference Graph --%s--", + _is_square ? "square" : "triangular" ); +- if( _is_square ) { +- for( uint i = 0; i < _maxlrg; i++ ) { ++ if (_is_square) { ++ for (uint i = 0; i < _maxlrg; i++) { + tty->print( (*_yanked)[i] ? "XX " : " "); + tty->print("L%d: { ",i); +- IndexSetIterator elements(&_adjs[i]); +- uint datum; +- while ((datum = elements.next()) != 0) { +- tty->print("L%d ", datum); ++ if (!_adjs[i].is_empty()) { ++ IndexSetIterator elements(&_adjs[i]); ++ uint datum; ++ while ((datum = elements.next()) != 0) { ++ tty->print("L%d ", datum); ++ } + } + tty->print_cr("}"); + +@@ -235,10 +251,12 @@ void PhaseIFG::dump() const { + tty->print("L%d ",j - 1); + } + tty->print("| "); +- IndexSetIterator elements(&_adjs[i]); +- uint datum; +- while ((datum = elements.next()) != 0) { +- tty->print("L%d ", datum); ++ if (!_adjs[i].is_empty()) { ++ IndexSetIterator elements(&_adjs[i]); ++ uint datum; ++ while ((datum = elements.next()) != 0) { ++ tty->print("L%d ", datum); ++ } + } + tty->print("}\n"); + } +@@ -265,16 +283,18 @@ void PhaseIFG::verify( const PhaseChaitin *pc ) const { + for( uint i = 0; i < _maxlrg; i++ ) { + assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" ); + IndexSet *set = &_adjs[i]; +- IndexSetIterator elements(set); +- uint idx; +- uint last = 0; +- while ((idx = elements.next()) != 0) { +- assert(idx != i, "Must have empty diagonal"); +- assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find"); +- assert(_adjs[idx].member(i), "IFG not square"); +- assert(!(*_yanked)[idx], "No yanked neighbors"); +- assert(last < idx, "not sorted increasing"); +- last = idx; ++ if (!set->is_empty()) { ++ IndexSetIterator elements(set); ++ uint idx; ++ uint last = 0; ++ while ((idx = elements.next()) != 0) { ++ assert(idx != i, "Must have empty diagonal"); ++ assert(pc->_lrg_map.find_const(idx) == idx, "Must not need Find"); ++ assert(_adjs[idx].member(i), "IFG not square"); ++ assert(!(*_yanked)[idx], "No yanked neighbors"); ++ assert(last < idx, "not sorted increasing"); ++ last = idx; ++ } + } + assert(!lrgs(i)._degree_valid || effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong"); + } +@@ -284,17 +304,21 @@ void PhaseIFG::verify( const PhaseChaitin *pc ) const { + // Interfere this register with everything currently live. Use the RegMasks + // to trim the set of possible interferences. Return a count of register-only + // interferences as an estimate of register pressure. +-void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) { +- uint retval = 0; +- // Interfere with everything live. +- const RegMask &rm = lrgs(r).mask(); +- // Check for interference by checking overlap of regmasks. +- // Only interfere if acceptable register masks overlap. +- IndexSetIterator elements(liveout); +- uint l; +- while( (l = elements.next()) != 0 ) +- if( rm.overlap( lrgs(l).mask() ) ) +- _ifg->add_edge( r, l ); ++void PhaseChaitin::interfere_with_live(uint r, IndexSet *liveout) { ++ if (!liveout->is_empty()) { ++ uint retval = 0; ++ // Interfere with everything live. ++ const RegMask &rm = lrgs(r).mask(); ++ // Check for interference by checking overlap of regmasks. ++ // Only interfere if acceptable register masks overlap. ++ IndexSetIterator elements(liveout); ++ uint l; ++ while ((l = elements.next()) != 0) { ++ if (rm.overlap(lrgs(l).mask())) { ++ _ifg->add_edge(r, l); ++ } ++ } ++ } + } + + // Actually build the interference graph. Uses virtual registers only, no +@@ -390,6 +414,9 @@ void PhaseChaitin::build_ifg_virtual( ) { + } + + uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) { ++ if (liveout->is_empty()) { ++ return 0; ++ } + IndexSetIterator elements(liveout); + uint lidx; + uint cnt = 0; +@@ -405,6 +432,9 @@ uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) { + } + + uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) { ++ if (liveout->is_empty()) { ++ return 0; ++ } + IndexSetIterator elements(liveout); + uint lidx; + uint cnt = 0; +@@ -489,23 +519,25 @@ uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) { + int inst_count = last_inst - first_inst; + double cost = (inst_count <= 0) ? 0.0 : block->_freq * double(inst_count); + assert(!(cost < 0.0), "negative spill cost" ); +- IndexSetIterator elements(&liveout); +- uint lidx; +- while ((lidx = elements.next()) != 0) { +- LRG &lrg = lrgs(lidx); +- lrg._area += cost; +- // Compute initial register pressure +- if (lrg.mask().is_UP() && lrg.mask_size()) { +- if (lrg._is_float || lrg._is_vector) { // Count float pressure +- pressure[1] += lrg.reg_pressure(); +- if (pressure[1] > block->_freg_pressure) { +- block->_freg_pressure = pressure[1]; +- } +- // Count int pressure, but do not count the SP, flags +- } else if(lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { +- pressure[0] += lrg.reg_pressure(); +- if (pressure[0] > block->_reg_pressure) { +- block->_reg_pressure = pressure[0]; ++ if (!liveout.is_empty()) { ++ IndexSetIterator elements(&liveout); ++ uint lidx; ++ while ((lidx = elements.next()) != 0) { ++ LRG &lrg = lrgs(lidx); ++ lrg._area += cost; ++ // Compute initial register pressure ++ if (lrg.mask().is_UP() && lrg.mask_size()) { ++ if (lrg._is_float || lrg._is_vector) { // Count float pressure ++ pressure[1] += lrg.reg_pressure(); ++ if (pressure[1] > block->_freg_pressure) { ++ block->_freg_pressure = pressure[1]; ++ } ++ // Count int pressure, but do not count the SP, flags ++ } else if (lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI])) { ++ pressure[0] += lrg.reg_pressure(); ++ if (pressure[0] > block->_reg_pressure) { ++ block->_reg_pressure = pressure[0]; ++ } + } + } + } +diff --git a/hotspot/src/share/vm/opto/indexSet.cpp b/hotspot/src/share/vm/opto/indexSet.cpp +index 4ba99e727..958901007 100644 +--- a/hotspot/src/share/vm/opto/indexSet.cpp ++++ b/hotspot/src/share/vm/opto/indexSet.cpp +@@ -177,6 +177,9 @@ IndexSet::BitBlock *IndexSet::alloc_block() { + IndexSet::BitBlock *IndexSet::alloc_block_containing(uint element) { + BitBlock *block = alloc_block(); + uint bi = get_block_index(element); ++ if (bi >= _current_block_limit) { ++ _current_block_limit = bi + 1; ++ } + _blocks[bi] = block; + return block; + } +@@ -191,7 +194,7 @@ void IndexSet::free_block(uint i) { + assert(block != &_empty_block, "cannot free the empty block"); + block->set_next((IndexSet::BitBlock*)Compile::current()->indexSet_free_block_list()); + Compile::current()->set_indexSet_free_block_list(block); +- set_block(i,&_empty_block); ++ set_block(i, &_empty_block); + } + + //------------------------------lrg_union-------------------------------------- +@@ -234,38 +237,42 @@ uint IndexSet::lrg_union(uint lr1, uint lr2, + // other color. (A variant of the Briggs assertion) + uint reg_degree = 0; + +- uint element; ++ uint element = 0; + // Load up the combined interference set with the neighbors of one +- IndexSetIterator elements(one); +- while ((element = elements.next()) != 0) { +- LRG &lrg = ifg->lrgs(element); +- if (mask.overlap(lrg.mask())) { +- insert(element); +- if( !lrg.mask().is_AllStack() ) { +- reg_degree += lrg1.compute_degree(lrg); +- if( reg_degree >= fail_degree ) return reg_degree; +- } else { +- // !!!!! Danger! No update to reg_degree despite having a neighbor. +- // A variant of the Briggs assertion. +- // Not needed if I simplify during coalesce, ala George/Appel. +- assert( lrg.lo_degree(), "" ); +- } +- } +- } +- // Add neighbors of two as well +- IndexSetIterator elements2(two); +- while ((element = elements2.next()) != 0) { +- LRG &lrg = ifg->lrgs(element); +- if (mask.overlap(lrg.mask())) { +- if (insert(element)) { +- if( !lrg.mask().is_AllStack() ) { +- reg_degree += lrg2.compute_degree(lrg); +- if( reg_degree >= fail_degree ) return reg_degree; ++ if (!one->is_empty()) { ++ IndexSetIterator elements(one); ++ while ((element = elements.next()) != 0) { ++ LRG &lrg = ifg->lrgs(element); ++ if (mask.overlap(lrg.mask())) { ++ insert(element); ++ if (!lrg.mask().is_AllStack()) { ++ reg_degree += lrg1.compute_degree(lrg); ++ if (reg_degree >= fail_degree) return reg_degree; + } else { + // !!!!! Danger! No update to reg_degree despite having a neighbor. + // A variant of the Briggs assertion. + // Not needed if I simplify during coalesce, ala George/Appel. +- assert( lrg.lo_degree(), "" ); ++ assert(lrg.lo_degree(), ""); ++ } ++ } ++ } ++ } ++ // Add neighbors of two as well ++ if (!two->is_empty()) { ++ IndexSetIterator elements2(two); ++ while ((element = elements2.next()) != 0) { ++ LRG &lrg = ifg->lrgs(element); ++ if (mask.overlap(lrg.mask())) { ++ if (insert(element)) { ++ if (!lrg.mask().is_AllStack()) { ++ reg_degree += lrg2.compute_degree(lrg); ++ if (reg_degree >= fail_degree) return reg_degree; ++ } else { ++ // !!!!! Danger! No update to reg_degree despite having a neighbor. ++ // A variant of the Briggs assertion. ++ // Not needed if I simplify during coalesce, ala George/Appel. ++ assert(lrg.lo_degree(), ""); ++ } + } + } + } +@@ -285,6 +292,7 @@ IndexSet::IndexSet (IndexSet *set) { + _max_elements = set->_max_elements; + #endif + _count = set->_count; ++ _current_block_limit = set->_current_block_limit; + _max_blocks = set->_max_blocks; + if (_max_blocks <= preallocated_block_list_size) { + _blocks = _preallocated_block_list; +@@ -314,6 +322,7 @@ void IndexSet::initialize(uint max_elements) { + _max_elements = max_elements; + #endif + _count = 0; ++ _current_block_limit = 0; + _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block; + + if (_max_blocks <= preallocated_block_list_size) { +@@ -338,6 +347,7 @@ void IndexSet::initialize(uint max_elements, Arena *arena) { + _max_elements = max_elements; + #endif // ASSERT + _count = 0; ++ _current_block_limit = 0; + _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block; + + if (_max_blocks <= preallocated_block_list_size) { +@@ -360,7 +370,8 @@ void IndexSet::swap(IndexSet *set) { + set->check_watch("swap", _serial_number); + #endif + +- for (uint i = 0; i < _max_blocks; i++) { ++ uint max = MAX2(_current_block_limit, set->_current_block_limit); ++ for (uint i = 0; i < max; i++) { + BitBlock *temp = _blocks[i]; + set_block(i, set->_blocks[i]); + set->set_block(i, temp); +@@ -368,6 +379,10 @@ void IndexSet::swap(IndexSet *set) { + uint temp = _count; + _count = set->_count; + set->_count = temp; ++ ++ temp = _current_block_limit; ++ _current_block_limit = set->_current_block_limit; ++ set->_current_block_limit = temp; + } + + //---------------------------- IndexSet::dump() ----------------------------- +@@ -375,12 +390,13 @@ void IndexSet::swap(IndexSet *set) { + + #ifndef PRODUCT + void IndexSet::dump() const { +- IndexSetIterator elements(this); +- + tty->print("{"); +- uint i; +- while ((i = elements.next()) != 0) { +- tty->print("L%d ", i); ++ if (!this->is_empty()) { ++ IndexSetIterator elements(this); ++ uint i; ++ while ((i = elements.next()) != 0) { ++ tty->print("L%d ", i); ++ } + } + tty->print_cr("}"); + } +@@ -435,12 +451,14 @@ void IndexSet::verify() const { + } + } + +- IndexSetIterator elements(this); +- count = 0; +- while ((i = elements.next()) != 0) { +- count++; +- assert(member(i), "returned a non member"); +- assert(count <= _count, "iterator returned wrong number of elements"); ++ if (!this->is_empty()) { ++ IndexSetIterator elements(this); ++ count = 0; ++ while ((i = elements.next()) != 0) { ++ count++; ++ assert(member(i), "returned a non member"); ++ assert(count <= _count, "iterator returned wrong number of elements"); ++ } + } + } + #endif +@@ -449,44 +467,35 @@ void IndexSet::verify() const { + // Create an iterator for a set. If empty blocks are detected when iterating + // over the set, these blocks are replaced. + +-IndexSetIterator::IndexSetIterator(IndexSet *set) { ++IndexSetIterator::IndexSetIterator(IndexSet *set) : ++ _current(0), ++ _value(0), ++ _next_word(IndexSet::words_per_block), ++ _next_block(set->is_empty() ? 1 : 0), ++ _max_blocks(set->is_empty() ? 1 : set->_current_block_limit), ++ _words(NULL), ++ _blocks(set->_blocks), ++ _set(set) { + #ifdef ASSERT + if (CollectIndexSetStatistics) { + set->tally_iteration_statistics(); + } + set->check_watch("traversed", set->count()); + #endif +- if (set->is_empty()) { +- _current = 0; +- _next_word = IndexSet::words_per_block; +- _next_block = 1; +- _max_blocks = 1; +- +- // We don't need the following values when we iterate over an empty set. +- // The commented out code is left here to document that the omission +- // is intentional. +- // +- //_value = 0; +- //_words = NULL; +- //_blocks = NULL; +- //_set = NULL; +- } else { +- _current = 0; +- _value = 0; +- _next_block = 0; +- _next_word = IndexSet::words_per_block; +- +- _max_blocks = set->_max_blocks; +- _words = NULL; +- _blocks = set->_blocks; +- _set = set; +- } + } + + //---------------------------- IndexSetIterator(const) ----------------------------- + // Iterate over a constant IndexSet. + +-IndexSetIterator::IndexSetIterator(const IndexSet *set) { ++IndexSetIterator::IndexSetIterator(const IndexSet *set) : ++ _current(0), ++ _value(0), ++ _next_word(IndexSet::words_per_block), ++ _next_block(set->is_empty() ? 1 : 0), ++ _max_blocks(set->is_empty() ? 1 : set->_current_block_limit), ++ _words(NULL), ++ _blocks(set->_blocks), ++ _set(NULL) { + #ifdef ASSERT + if (CollectIndexSetStatistics) { + set->tally_iteration_statistics(); +@@ -494,31 +503,6 @@ IndexSetIterator::IndexSetIterator(const IndexSet *set) { + // We don't call check_watch from here to avoid bad recursion. + // set->check_watch("traversed const", set->count()); + #endif +- if (set->is_empty()) { +- _current = 0; +- _next_word = IndexSet::words_per_block; +- _next_block = 1; +- _max_blocks = 1; +- +- // We don't need the following values when we iterate over an empty set. +- // The commented out code is left here to document that the omission +- // is intentional. +- // +- //_value = 0; +- //_words = NULL; +- //_blocks = NULL; +- //_set = NULL; +- } else { +- _current = 0; +- _value = 0; +- _next_block = 0; +- _next_word = IndexSet::words_per_block; +- +- _max_blocks = set->_max_blocks; +- _words = NULL; +- _blocks = set->_blocks; +- _set = NULL; +- } + } + + //---------------------------- List16Iterator::advance_and_next() ----------------------------- +@@ -536,7 +520,7 @@ uint IndexSetIterator::advance_and_next() { + + _next_word = wi+1; + +- return next(); ++ return next_value(); + } + } + +@@ -555,7 +539,7 @@ uint IndexSetIterator::advance_and_next() { + _next_block = bi+1; + _next_word = wi+1; + +- return next(); ++ return next_value(); + } + } + +diff --git a/hotspot/src/share/vm/opto/indexSet.hpp b/hotspot/src/share/vm/opto/indexSet.hpp +index ef5aed18b..6a15fa02d 100644 +--- a/hotspot/src/share/vm/opto/indexSet.hpp ++++ b/hotspot/src/share/vm/opto/indexSet.hpp +@@ -189,14 +189,17 @@ class IndexSet : public ResourceObj { + // The number of elements in the set + uint _count; + ++ // The current upper limit of blocks that has been allocated and might be in use ++ uint _current_block_limit; ++ ++ // The number of top level array entries in use ++ uint _max_blocks; ++ + // Our top level array of bitvector segments + BitBlock **_blocks; + + BitBlock *_preallocated_block_list[preallocated_block_list_size]; + +- // The number of top level array entries in use +- uint _max_blocks; +- + // Our assertions need to know the maximum number allowed in the set + #ifdef ASSERT + uint _max_elements; +@@ -263,12 +266,13 @@ class IndexSet : public ResourceObj { + check_watch("clear"); + #endif + _count = 0; +- for (uint i = 0; i < _max_blocks; i++) { ++ for (uint i = 0; i < _current_block_limit; i++) { + BitBlock *block = _blocks[i]; + if (block != &_empty_block) { + free_block(i); + } + } ++ _current_block_limit = 0; + } + + uint count() const { return _count; } +@@ -419,18 +423,18 @@ class IndexSetIterator VALUE_OBJ_CLASS_SPEC { + // The index of the next word we will inspect + uint _next_word; + ++ // The index of the next block we will inspect ++ uint _next_block; ++ ++ // The number of blocks in the set ++ uint _max_blocks; ++ + // A pointer to the contents of the current block + uint32 *_words; + +- // The index of the next block we will inspect +- uint _next_block; +- + // A pointer to the blocks in our set + IndexSet::BitBlock **_blocks; + +- // The number of blocks in the set +- uint _max_blocks; +- + // If the iterator was created from a non-const set, we replace + // non-canonical empty blocks with the _empty_block pointer. If + // _set is NULL, we do no replacement. +@@ -448,20 +452,26 @@ class IndexSetIterator VALUE_OBJ_CLASS_SPEC { + IndexSetIterator(IndexSet *set); + IndexSetIterator(const IndexSet *set); + ++ // Return the next element of the set. ++ uint next_value() { ++ uint current = _current; ++ uint value = _value; ++ while (mask_bits(current,window_mask) == 0) { ++ current >>= window_size; ++ value += window_size; ++ } ++ ++ uint advance = _second_bit[mask_bits(current,window_mask)]; ++ _current = current >> advance; ++ _value = value + advance; ++ return value + _first_bit[mask_bits(current,window_mask)]; ++ } ++ + // Return the next element of the set. Return 0 when done. + uint next() { + uint current = _current; + if (current != 0) { +- uint value = _value; +- while (mask_bits(current,window_mask) == 0) { +- current >>= window_size; +- value += window_size; +- } +- +- uint advance = _second_bit[mask_bits(current,window_mask)]; +- _current = current >> advance; +- _value = value + advance; +- return value + _first_bit[mask_bits(current,window_mask)]; ++ return next_value(); + } else { + return advance_and_next(); + } +diff --git a/hotspot/src/share/vm/opto/live.cpp b/hotspot/src/share/vm/opto/live.cpp +index 787f5ab88..53599162e 100644 +--- a/hotspot/src/share/vm/opto/live.cpp ++++ b/hotspot/src/share/vm/opto/live.cpp +@@ -69,7 +69,7 @@ void PhaseLive::compute(uint maxlrg) { + + // Array of delta-set pointers, indexed by block pre_order-1. + _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg.number_of_blocks()); +- memset( _deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks()); ++ memset(_deltas, 0, sizeof(IndexSet*)* _cfg.number_of_blocks()); + + _free_IndexSet = NULL; + +@@ -93,8 +93,8 @@ void PhaseLive::compute(uint maxlrg) { + + uint r = _names.at(n->_idx); + assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block"); +- def->insert( r ); +- use->remove( r ); ++ def->insert(r); ++ use->remove(r); + uint cnt = n->req(); + for (uint k = 1; k < cnt; k++) { + Node *nk = n->in(k); +@@ -134,7 +134,7 @@ void PhaseLive::compute(uint maxlrg) { + while (_worklist->size()) { + Block* block = _worklist->pop(); + IndexSet *delta = getset(block); +- assert( delta->count(), "missing delta set" ); ++ assert(delta->count(), "missing delta set"); + + // Add new-live-in to predecessors live-out sets + for (uint l = 1; l < block->num_preds(); l++) { +@@ -173,34 +173,32 @@ void PhaseLive::stats(uint iters) const { + + // Get an IndexSet for a block. Return existing one, if any. Make a new + // empty one if a prior one does not exist. +-IndexSet *PhaseLive::getset( Block *p ) { ++IndexSet *PhaseLive::getset(Block *p) { + IndexSet *delta = _deltas[p->_pre_order-1]; +- if( !delta ) // Not on worklist? ++ if( !delta ) { // Not on worklist? + // Get a free set; flag as being on worklist +- delta = _deltas[p->_pre_order-1] = getfreeset(); ++ delta = _deltas[p->_pre_order - 1] = getfreeset(); ++ } + return delta; // Return set of new live-out items + } + + // Pull from free list, or allocate. Internal allocation on the returned set + // is always from thread local storage. +-IndexSet *PhaseLive::getfreeset( ) { ++IndexSet *PhaseLive::getfreeset() { + IndexSet *f = _free_IndexSet; +- if( !f ) { ++ if (!f) { + f = new IndexSet; +-// f->set_arena(Thread::current()->resource_area()); + f->initialize(_maxlrg, Thread::current()->resource_area()); + } else { + // Pull from free list + _free_IndexSet = f->next(); +- //f->_cnt = 0; // Reset to empty +-// f->set_arena(Thread::current()->resource_area()); + f->initialize(_maxlrg, Thread::current()->resource_area()); + } + return f; + } + + // Free an IndexSet from a block. +-void PhaseLive::freeset( const Block *p ) { ++void PhaseLive::freeset(const Block *p) { + IndexSet *f = _deltas[p->_pre_order-1]; + f->set_next(_free_IndexSet); + _free_IndexSet = f; // Drop onto free list +@@ -209,53 +207,58 @@ void PhaseLive::freeset( const Block *p ) { + + // Add a live-out value to a given blocks live-out set. If it is new, then + // also add it to the delta set and stick the block on the worklist. +-void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) { ++void PhaseLive::add_liveout(Block *p, uint r, VectorSet &first_pass) { + IndexSet *live = &_live[p->_pre_order-1]; + if( live->insert(r) ) { // If actually inserted... + // We extended the live-out set. See if the value is generated locally. + // If it is not, then we must extend the live-in set. + if( !_defs[p->_pre_order-1].member( r ) ) { + if( !_deltas[p->_pre_order-1] && // Not on worklist? +- first_pass.test(p->_pre_order) ) ++ first_pass.test(p->_pre_order)) { + _worklist->push(p); // Actually go on worklist if already 1st pass ++ } + getset(p)->insert(r); + } + } + } + + // Add a vector of live-out values to a given blocks live-out set. +-void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) { ++void PhaseLive::add_liveout(Block *p, IndexSet *lo, VectorSet &first_pass) { + IndexSet *live = &_live[p->_pre_order-1]; + IndexSet *defs = &_defs[p->_pre_order-1]; + IndexSet *on_worklist = _deltas[p->_pre_order-1]; + IndexSet *delta = on_worklist ? on_worklist : getfreeset(); + +- IndexSetIterator elements(lo); +- uint r; +- while ((r = elements.next()) != 0) { +- if( live->insert(r) && // If actually inserted... +- !defs->member( r ) ) // and not defined locally +- delta->insert(r); // Then add to live-in set ++ if (!lo->is_empty()) { ++ IndexSetIterator elements(lo); ++ uint r; ++ while ((r = elements.next()) != 0) { ++ if (live->insert(r) && // If actually inserted... ++ !defs->member(r)) { // and not defined locally ++ delta->insert(r); // Then add to live-in set ++ } ++ } + } + +- if( delta->count() ) { // If actually added things ++ if (delta->count()) { // If actually added things + _deltas[p->_pre_order-1] = delta; // Flag as on worklist now +- if( !on_worklist && // Not on worklist? +- first_pass.test(p->_pre_order) ) +- _worklist->push(p); // Actually go on worklist if already 1st pass +- } else { // Nothing there; just free it ++ if (!on_worklist && // Not on worklist? ++ first_pass.test(p->_pre_order)) { ++ _worklist->push(p); // Actually go on worklist if already 1st pass ++ } ++ } else { // Nothing there; just free it + delta->set_next(_free_IndexSet); +- _free_IndexSet = delta; // Drop onto free list ++ _free_IndexSet = delta; // Drop onto free list + } + } + + #ifndef PRODUCT + // Dump the live-out set for a block +-void PhaseLive::dump( const Block *b ) const { ++void PhaseLive::dump(const Block *b) const { + tty->print("Block %d: ",b->_pre_order); + tty->print("LiveOut: "); _live[b->_pre_order-1].dump(); + uint cnt = b->number_of_nodes(); +- for( uint i=0; iprint("L%d/", _names.at(b->get_node(i)->_idx)); + b->get_node(i)->dump(); + } +@@ -263,7 +266,7 @@ void PhaseLive::dump( const Block *b ) const { + } + + // Verify that base pointers and derived pointers are still sane. +-void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const { ++void PhaseChaitin::verify_base_ptrs(ResourceArea *a) const { + #ifdef ASSERT + Unique_Node_List worklist(a); + for (uint i = 0; i < _cfg.number_of_blocks(); i++) { +@@ -288,17 +291,18 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const { + worklist.clear(); + worklist.push(check); + uint k = 0; +- while( k < worklist.size() ) { ++ while (k < worklist.size()) { + check = worklist.at(k); + assert(check,"Bad base or derived pointer"); + // See PhaseChaitin::find_base_for_derived() for all cases. + int isc = check->is_Copy(); +- if( isc ) { ++ if (isc) { + worklist.push(check->in(isc)); +- } else if( check->is_Phi() ) { +- for (uint m = 1; m < check->req(); m++) ++ } else if (check->is_Phi()) { ++ for (uint m = 1; m < check->req(); m++) { + worklist.push(check->in(m)); +- } else if( check->is_Con() ) { ++ } ++ } else if (check->is_Con()) { + if (is_derived) { + // Derived is NULL+offset + assert(!is_derived || check->bottom_type()->is_ptr()->ptr() == TypePtr::Null,"Bad derived pointer"); +@@ -312,8 +316,8 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const { + check->bottom_type()->is_ptr()->ptr() == TypePtr::Null,"Bad base pointer"); + } + } +- } else if( check->bottom_type()->is_ptr()->_offset == 0 ) { +- if(check->is_Proj() || check->is_Mach() && ++ } else if (check->bottom_type()->is_ptr()->_offset == 0) { ++ if (check->is_Proj() || check->is_Mach() && + (check->as_Mach()->ideal_Opcode() == Op_CreateEx || + check->as_Mach()->ideal_Opcode() == Op_ThreadLocal || + check->as_Mach()->ideal_Opcode() == Op_CMoveP || +@@ -347,7 +351,7 @@ void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const { + } + + // Verify that graphs and base pointers are still sane. +-void PhaseChaitin::verify( ResourceArea *a, bool verify_ifg ) const { ++void PhaseChaitin::verify(ResourceArea *a, bool verify_ifg) const { + #ifdef ASSERT + if( VerifyOpto || VerifyRegisterAllocator ) { + _cfg.verify(); +diff --git a/hotspot/src/share/vm/opto/reg_split.cpp b/hotspot/src/share/vm/opto/reg_split.cpp +index a132f1f9f..de0c9fc7f 100644 +--- a/hotspot/src/share/vm/opto/reg_split.cpp ++++ b/hotspot/src/share/vm/opto/reg_split.cpp +@@ -1250,10 +1250,12 @@ uint PhaseChaitin::Split(uint maxlrg, ResourceArea* split_arena) { + // it contains no members which compress to defidx. Finding such an + // instance may be a case to add liveout adjustment in compress_uf_map(). + // See 5063219. +- uint member; +- IndexSetIterator isi(liveout); +- while ((member = isi.next()) != 0) { +- assert(defidx != _lrg_map.find_const(member), "Live out member has not been compressed"); ++ if (!liveout->is_empty()) { ++ uint member; ++ IndexSetIterator isi(liveout); ++ while ((member = isi.next()) != 0) { ++ assert(defidx != _lrg_map.find_const(member), "Live out member has not been compressed"); ++ } + } + #endif + Reachblock[slidx] = NULL; diff --git a/Ddot-intrinsic-implement.patch b/Ddot-intrinsic-implement.patch new file mode 100644 index 0000000000000000000000000000000000000000..2b091a517c5c4b04ee7549857a4bad3d4a7415d4 --- /dev/null +++ b/Ddot-intrinsic-implement.patch @@ -0,0 +1,479 @@ +diff --git a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp +index 1e9b1cb91..c0fd37d05 100644 +--- a/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp ++++ b/hotspot/src/cpu/aarch64/vm/assembler_aarch64.hpp +@@ -2061,6 +2061,14 @@ public: + ld_st(Vt, T, a, op1, op2); \ + } + ++ void ld1_d(FloatRegister Vt, int index, const Address &a) { ++ starti; ++ assert(index == 0 || index == 1, "Index must be 0 or 1 for Vx.2D"); ++ f(0, 31), f(index & 1, 30); ++ f(0b001101110, 29, 21), rf(a.index(), 16), f(0b1000, 15, 12); ++ f(0b01, 11, 10), rf(a.base(), 5), rf(Vt, 0); ++ } ++ + INSN1(ld1, 0b001100010, 0b0111); + INSN2(ld1, 0b001100010, 0b1010); + INSN3(ld1, 0b001100010, 0b0110); +@@ -2186,6 +2194,13 @@ public: + + #undef INSN + ++ void faddp_d(FloatRegister Vd, FloatRegister Vn) { ++ starti; ++ f(0b01, 31, 30), f(0b1111100, 29, 23), f(0b1, 22), f(0b11000, 21, 17); ++ f(0b0110110, 16, 10); ++ rf(Vn, 5), rf(Vd, 0); ++ } ++ + #define INSN(NAME, opc) \ + void NAME(FloatRegister Vd, SIMD_Arrangement T, FloatRegister Vn, FloatRegister Vm) { \ + starti; \ +diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +index f2f85df60..873da580b 100644 +--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.cpp +@@ -2853,6 +2853,124 @@ void MacroAssembler::update_word_crc32(Register crc, Register v, Register tmp, + eor(crc, crc, tmp); + } + ++/** ++ * Multiply and summation of 1 double-precision floating number pairs(sparse) ++ */ ++void MacroAssembler::f2j_ddot_s1(Register dx, Register incx, ++ Register dy, Register incy) { ++ const FloatRegister tmpx = v2; ++ const FloatRegister tmpy = v3; ++ ++ ld1_d(tmpx, 0, Address(dx, incx)); ++ ld1_d(tmpy, 0, Address(dy, incy)); ++ fmaddd(v0, tmpx, tmpy, v0); ++} ++ ++/** ++ * Multiply and summation of 1 double-precision floating number pairs(dense) ++ */ ++void MacroAssembler::f2j_ddot_d1(Register dx, Register dy, int size) { ++ const FloatRegister tmpx = v2; ++ const FloatRegister tmpy = v3; ++ ++ ldrd(tmpx, post(dx, size)); ++ ldrd(tmpy, post(dy, size)); ++ fmaddd(v0, tmpx, tmpy, v0); ++} ++ ++/** ++ * Multiply and summation of 4 double-precision floating numbers ++ */ ++void MacroAssembler::f2j_ddot_d4(Register dx, Register dy) { ++ ld1(v2, v3, T2D, post(dx, 32)); ++ ld1(v4, v5, T2D, post(dy, 32)); ++ fmul(v2, T2D, v2, v4); ++ fmul(v3, T2D, v3, v5); ++ fadd(v0, T2D, v0, v2); ++ fadd(v6, T2D, v6, v3); ++} ++ ++/** ++ * @param n register containing the number of doubles in array ++ * @param dx register pointing to input array ++ * @param incx register containing step len for dx ++ * @param dy register pointing to another input array ++ * @param incy register containing step len for dy ++ * @param temp_reg register containing loop variable ++ */ ++void MacroAssembler::f2j_ddot(Register n, Register dx, Register incx, ++ Register dy, Register incy, Register temp_reg) { ++ Label Ldot_EXIT, Ldot_S_BEGIN, Ldot_S1, Ldot_S10, Ldot_S4, Ldot_D_BEGIN, ++ Ldot_D1, Ldot_D10, Ldot_D4; ++ ++ const int SZ = 8; ++ ++ enter(); ++ fmovd(v0, zr); ++ fmovd(v6, v0); ++ ++ cmp(n, zr); ++ br(Assembler::LE, Ldot_EXIT); ++ ++ cmp(incx, 1); ++ br(Assembler::NE, Ldot_S_BEGIN); ++ cmp(incy, 1); ++ br(Assembler::NE, Ldot_S_BEGIN); ++ ++ BIND(Ldot_D_BEGIN); ++ asr(temp_reg, n, 2); ++ cmp(temp_reg, zr); ++ br(Assembler::LE, Ldot_D1); ++ ++ BIND(Ldot_D4); ++ f2j_ddot_d4(dx, dy); ++ subs(temp_reg, temp_reg, 1); ++ br(Assembler::NE, Ldot_D4); ++ ++ fadd(v0, T2D, v0, v6); ++ faddp_d(v0, v0); ++ ++ BIND(Ldot_D1); ++ ands(temp_reg, n, 3); ++ br(Assembler::LE, Ldot_EXIT); ++ ++ BIND(Ldot_D10); ++ f2j_ddot_d1(dx, dy, SZ); ++ subs(temp_reg, temp_reg, 1); ++ br(Assembler::NE, Ldot_D10); ++ leave(); ++ ret(lr); ++ ++ BIND(Ldot_S_BEGIN); ++ lsl(incx, incx, 3); ++ lsl(incy, incy, 3); ++ ++ asr(temp_reg, n, 2); ++ cmp(temp_reg, zr); ++ br(Assembler::LE, Ldot_S1); ++ ++ BIND(Ldot_S4); ++ f2j_ddot_s1(dx, incx, dy, incy); ++ f2j_ddot_s1(dx, incx, dy, incy); ++ f2j_ddot_s1(dx, incx, dy, incy); ++ f2j_ddot_s1(dx, incx, dy, incy); ++ subs(temp_reg, temp_reg, 1); ++ br(Assembler::NE, Ldot_S4); ++ ++ BIND(Ldot_S1); ++ ands(temp_reg, n, 3); ++ br(Assembler::LE, Ldot_EXIT); ++ ++ BIND(Ldot_S10); ++ f2j_ddot_s1(dx, incx, dy, incy); ++ subs(temp_reg, temp_reg, 1); ++ br(Assembler::NE, Ldot_S10); ++ ++ BIND(Ldot_EXIT); ++ leave(); ++ ret(lr); ++} ++ + /** + * @param crc register containing existing CRC (32-bit) + * @param buf register pointing to input byte buffer (byte*) +diff --git a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +index 388177589..1abc7e3b0 100644 +--- a/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp ++++ b/hotspot/src/cpu/aarch64/vm/macroAssembler_aarch64.hpp +@@ -1180,6 +1180,9 @@ public: + Register table0, Register table1, Register table2, Register table3, + bool upper = false); + ++ void f2j_ddot(Register n, Register dx, Register incx, ++ Register dy, Register incy, Register temp_reg); ++ + void string_compare(Register str1, Register str2, + Register cnt1, Register cnt2, Register result, + Register tmp1); +@@ -1236,6 +1239,11 @@ private: + // Uses rscratch2 if the address is not directly reachable + Address spill_address(int size, int offset, Register tmp=rscratch2); + ++private: ++ void f2j_ddot_s1(Register dx, Register incx, Register dy, Register incy); ++ void f2j_ddot_d1(Register dx, Register dy, int size); ++ void f2j_ddot_d4(Register dx, Register dy); ++ + public: + void spill(Register Rx, bool is64, int offset) { + if (is64) { +diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +index 0d73c0c0c..337d5c1dd 100644 +--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +@@ -45,6 +45,7 @@ + + #include "stubRoutines_aarch64.hpp" + ++ + #ifdef COMPILER2 + #include "opto/runtime.hpp" + #endif +@@ -3220,6 +3221,39 @@ class StubGenerator: public StubCodeGenerator { + return start; + } + ++ /** ++ * Arguments: ++ * ++ * Inputs: ++ * c_rarg0 - int n ++ * c_rarg1 - double[] dx ++ * c_rarg2 - int incx ++ * c_rarg3 - double[] dy ++ * c_rarg4 - int incy ++ * ++ * Output: ++ * d0 - ddot result ++ * ++ */ ++ address generate_ddotF2jBLAS() { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "f2jblas_ddot"); ++ ++ address start = __ pc(); ++ ++ const Register n = c_rarg0; ++ const Register dx = c_rarg1; ++ const Register incx = c_rarg2; ++ const Register dy = c_rarg3; ++ const Register incy = c_rarg4; ++ ++ BLOCK_COMMENT("Entry:"); ++ ++ __ f2j_ddot(n, dx, incx, dy, incy, rscratch2); ++ ++ return start; ++ } ++ + /** + * Arguments: + * +@@ -4262,6 +4296,10 @@ class StubGenerator: public StubCodeGenerator { + StubRoutines::_montgomerySquare = g.generate_multiply(); + } + ++ if (UseF2jBLASIntrinsics) { ++ StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS(); ++ } ++ + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); +diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp +index 148f9212e..6bd8dbedd 100644 +--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp ++++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp +@@ -852,6 +852,12 @@ + do_name( implCompress_name, "implCompress0") \ + do_signature(implCompress_signature, "([BI)V") \ + \ ++ /* support for com.github.fommil.netlib.F2jBLAS */ \ ++ do_class(com_github_fommil_netlib_f2jblas, "com/github/fommil/netlib/F2jBLAS") \ ++ do_intrinsic(_f2jblas_ddot, com_github_fommil_netlib_f2jblas, ddot_name, ddot_signature, F_R) \ ++ do_name( ddot_name, "ddot") \ ++ do_signature(ddot_signature, "(I[DI[DI)D") \ ++ \ + /* support for sun.security.provider.SHA2 */ \ + do_class(sun_security_provider_sha2, "sun/security/provider/SHA2") \ + do_intrinsic(_sha2_implCompress, sun_security_provider_sha2, implCompress_name, implCompress_signature, F_R) \ +diff --git a/hotspot/src/share/vm/oops/method.cpp b/hotspot/src/share/vm/oops/method.cpp +index 24fae4d30..64cdae9c7 100644 +--- a/hotspot/src/share/vm/oops/method.cpp ++++ b/hotspot/src/share/vm/oops/method.cpp +@@ -1281,7 +1281,9 @@ vmSymbols::SID Method::klass_id_for_intrinsics(Klass* holder) { + // which does not use the class default class loader so we check for its loader here + InstanceKlass* ik = InstanceKlass::cast(holder); + if ((ik->class_loader() != NULL) && !SystemDictionary::is_ext_class_loader(ik->class_loader())) { +- return vmSymbols::NO_SID; // regardless of name, no intrinsics here ++ if (!EnableIntrinsicExternal) { ++ return vmSymbols::NO_SID; // regardless of name, no intrinsics here ++ } + } + + // see if the klass name is well-known: +diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp +index 9ef1c5e69..aa1b1ac3a 100644 +--- a/hotspot/src/share/vm/opto/escape.cpp ++++ b/hotspot/src/share/vm/opto/escape.cpp +@@ -978,7 +978,8 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { + strcmp(call->as_CallLeaf()->_name, "squareToLen") == 0 || + strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 || + strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 || +- strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0) ++ strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 || ++ strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0) + ))) { + call->dump(); + fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); +diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp +index 89ebabe6f..5cbc0f012 100644 +--- a/hotspot/src/share/vm/opto/library_call.cpp ++++ b/hotspot/src/share/vm/opto/library_call.cpp +@@ -335,6 +335,7 @@ class LibraryCallKit : public GraphKit { + bool inline_mulAdd(); + bool inline_montgomeryMultiply(); + bool inline_montgomerySquare(); ++ bool inline_ddotF2jBLAS(); + + bool inline_profileBoolean(); + }; +@@ -587,6 +588,10 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { + if (!UseCRC32Intrinsics) return NULL; + break; + ++ case vmIntrinsics::_f2jblas_ddot: ++ if (!UseF2jBLASIntrinsics) return NULL; ++ break; ++ + case vmIntrinsics::_incrementExactI: + case vmIntrinsics::_addExactI: + if (!Matcher::match_rule_supported(Op_OverflowAddI) || !UseMathExactIntrinsics) return NULL; +@@ -983,6 +988,8 @@ bool LibraryCallKit::try_to_inline(int predicate) { + + case vmIntrinsics::_profileBoolean: + return inline_profileBoolean(); ++ case vmIntrinsics::_f2jblas_ddot: ++ return inline_ddotF2jBLAS(); + + default: + // If you get here, it may be that someone has added a new intrinsic +@@ -6303,6 +6310,49 @@ bool LibraryCallKit::inline_updateBytesCRC32() { + return true; + } + ++/** ++ * double com.github.fommil.netlib.F2jBLAS.ddot(int n, double[] dx, int incx, double[] dy, int incy) ++ */ ++bool LibraryCallKit::inline_ddotF2jBLAS() { ++ assert(callee()->signature()->size() == 5, "update has 5 parameters"); ++ Node* n = argument(1); // type: int ++ Node* dx = argument(2); // type: double[] ++ Node* incx = argument(3); // type: int ++ Node* dy = argument(4); // type: double[] ++ Node* incy = argument(5); // type: int ++ ++ const Type* dx_type = dx->Value(&_gvn); ++ const Type* dy_type = dy->Value(&_gvn); ++ const TypeAryPtr* dx_top_src = dx_type->isa_aryptr(); ++ const TypeAryPtr* dy_top_src = dy_type->isa_aryptr(); ++ if (dx_top_src == NULL || dx_top_src->klass() == NULL || ++ dy_top_src == NULL || dy_top_src->klass() == NULL) { ++ // failed array check ++ return false; ++ } ++ ++ // Figure out the size and type of the elements we will be copying. ++ BasicType dx_elem = dx_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); ++ BasicType dy_elem = dy_type->isa_aryptr()->klass()->as_array_klass()->element_type()->basic_type(); ++ if (dx_elem != T_DOUBLE || dy_elem != T_DOUBLE) { ++ return false; ++ } ++ ++ // 'dx_start' points to dx array + scaled offset ++ Node* dx_start = array_element_address(dx, intcon(0), dx_elem); ++ Node* dy_start = array_element_address(dy, intcon(0), dy_elem); ++ ++ address stubAddr = StubRoutines::ddotF2jBLAS(); ++ const char *stubName = "f2jblas_ddot"; ++ Node* call; ++ call = make_runtime_call(RC_LEAF, OptoRuntime::ddotF2jBLAS_Type(), ++ stubAddr, stubName, TypePtr::BOTTOM, ++ n, dx_start, incx, dy_start, incy); ++ Node* result = _gvn.transform(new (C) ProjNode(call, TypeFunc::Parms)); ++ set_result(result); ++ return true; ++} ++ + /** + * Calculate CRC32 for ByteBuffer. + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) +diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp +index ba8f42e49..f1fe4d666 100644 +--- a/hotspot/src/share/vm/opto/runtime.cpp ++++ b/hotspot/src/share/vm/opto/runtime.cpp +@@ -920,6 +920,30 @@ const TypeFunc* OptoRuntime::updateBytesCRC32_Type() { + return TypeFunc::make(domain, range); + } + ++/** ++ * double ddot(int n, double *dx, int incx, double *dy, int incy) ++ */ ++const TypeFunc* OptoRuntime::ddotF2jBLAS_Type() { ++ // create input type (domain) ++ int num_args = 5; ++ int argcnt = num_args; ++ const Type** fields = TypeTuple::fields(argcnt); ++ int argp = TypeFunc::Parms; ++ fields[argp++] = TypeInt::INT; // n ++ fields[argp++] = TypeAryPtr::DOUBLES; // dx ++ fields[argp++] = TypeInt::INT; // incx ++ fields[argp++] = TypeAryPtr::DOUBLES; // dy ++ fields[argp++] = TypeInt::INT; // incy ++ assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); ++ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields); ++ ++ // result type needed ++ fields = TypeTuple::fields(1); ++ fields[TypeFunc::Parms + 0] = Type::DOUBLE; ++ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms + 1, fields); ++ return TypeFunc::make(domain, range); ++} ++ + // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int + const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { + // create input type (domain) +diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp +index e3bdfdf9c..66d393c5c 100644 +--- a/hotspot/src/share/vm/opto/runtime.hpp ++++ b/hotspot/src/share/vm/opto/runtime.hpp +@@ -317,6 +317,8 @@ private: + + static const TypeFunc* updateBytesCRC32_Type(); + ++ static const TypeFunc* ddotF2jBLAS_Type(); ++ + // leaf on stack replacement interpreter accessor types + static const TypeFunc* osr_end_Type(); + +diff --git a/hotspot/src/share/vm/runtime/globals.hpp b/hotspot/src/share/vm/runtime/globals.hpp +index 7b17e623b..520cc3187 100644 +--- a/hotspot/src/share/vm/runtime/globals.hpp ++++ b/hotspot/src/share/vm/runtime/globals.hpp +@@ -743,6 +743,12 @@ class CommandLineFlags { + product(bool, UseCRC32Intrinsics, false, \ + "use intrinsics for java.util.zip.CRC32") \ + \ ++ experimental(bool, UseF2jBLASIntrinsics, false, \ ++ "use intrinsics for com.github.fommil.netlib.F2jBLAS on aarch64") \ ++ \ ++ experimental(bool, EnableIntrinsicExternal, false, \ ++ "enable intrinsics for methods of external packages") \ ++ \ + develop(bool, TraceCallFixup, false, \ + "Trace all call fixups") \ + \ +diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp +index d943248da..10f438bc5 100644 +--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp ++++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp +@@ -136,6 +136,8 @@ address StubRoutines::_sha512_implCompressMB = NULL; + address StubRoutines::_updateBytesCRC32 = NULL; + address StubRoutines::_crc_table_adr = NULL; + ++address StubRoutines::_ddotF2jBLAS = NULL; ++ + address StubRoutines::_multiplyToLen = NULL; + address StubRoutines::_squareToLen = NULL; + address StubRoutines::_mulAdd = NULL; +diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp +index e18b9127d..a4eeb910d 100644 +--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp ++++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp +@@ -214,6 +214,8 @@ class StubRoutines: AllStatic { + static address _updateBytesCRC32; + static address _crc_table_adr; + ++ static address _ddotF2jBLAS; ++ + static address _multiplyToLen; + static address _squareToLen; + static address _mulAdd; +@@ -377,6 +379,8 @@ class StubRoutines: AllStatic { + static address updateBytesCRC32() { return _updateBytesCRC32; } + static address crc_table_addr() { return _crc_table_adr; } + ++ static address ddotF2jBLAS() { return _ddotF2jBLAS; } ++ + static address multiplyToLen() {return _multiplyToLen; } + static address squareToLen() {return _squareToLen; } + static address mulAdd() {return _mulAdd; } diff --git a/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch b/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch new file mode 100644 index 0000000000000000000000000000000000000000..64e48a35b54811e73a848c976054413196dd6fa2 --- /dev/null +++ b/fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch @@ -0,0 +1,46 @@ +diff --git a/jdk/src/share/classes/java/lang/Long.java b/jdk/src/share/classes/java/lang/Long.java +index 58c2cc3ba..7b6e14a97 100644 +--- a/jdk/src/share/classes/java/lang/Long.java ++++ b/jdk/src/share/classes/java/lang/Long.java +@@ -812,12 +812,11 @@ public final class Long extends Number implements Comparable { + static final Long cache[]; + + static { +- ++ int h = 127; + String longCacheHighPropValue = + sun.misc.VM.getSavedProperty("java.lang.Long.LongCache.high"); + if (longCacheHighPropValue != null) { + // high value may be configured by property +- int h = 0; + try { + int i = Integer.parseInt(longCacheHighPropValue); + i = Math.max(i, 127); +@@ -826,21 +825,13 @@ public final class Long extends Number implements Comparable { + } catch( NumberFormatException nfe) { + // If the property cannot be parsed into an int, ignore it. + } +- high = h; +- low = -h - 1; +- cache = new Long[(high - low) + 1]; +- int j = low; +- for(int k = 0; k < cache.length; k++) +- cache[k] = new Long(j++); +- +- } else { +- low = -128; +- high = 127; +- cache = new Long[(high - low) + 1]; +- int j = low; +- for(int k = 0; k < cache.length; k++) +- cache[k] = new Long(j++); + } ++ high = h; ++ low = -h - 1; ++ cache = new Long[(high - low) + 1]; ++ int j = low; ++ for(int k = 0; k < cache.length; k++) ++ cache[k] = new Long(j++); + } + } + diff --git a/java-1.8.0-openjdk.spec b/java-1.8.0-openjdk.spec index 268ef8b9c3c874352e5dacce062e8212783a183c..fa6f155b51390bd82652f74139d10500ae66411f 100644 --- a/java-1.8.0-openjdk.spec +++ b/java-1.8.0-openjdk.spec @@ -915,7 +915,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r Name: java-%{javaver}-%{origin} Version: %{javaver}.%{updatever}.%{buildver} -Release: 0 +Release: 1 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -1037,6 +1037,13 @@ Patch95: 8205921-Optimizing-best-of-2-work-stealing-queue-selection.patch # 8u265 Patch96: fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch +Patch97: leaf-optimize-in-ParallelScanvageGC.patch +Patch98: 8046294-Generate-the-4-byte-timestamp-randomly.patch +Patch100: 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch +Patch102: fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch +Patch103: Ddot-intrinsic-implement.patch +Patch104: 8234003-Improve-IndexSet-iteration.patch +Patch105: 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch ############################################# # @@ -1432,6 +1439,14 @@ pushd %{top_level_dir_name} %patch94 -p1 %patch95 -p1 %patch96 -p1 +%patch97 -p1 +%patch98 -p1 +%patch100 -p1 +%patch102 -p1 +%patch103 -p1 +%patch104 -p1 +%patch105 -p1 + popd @@ -2051,6 +2066,16 @@ require "copy_jdk_configs.lua" %endif %changelog +* Tue Aug 29 2020 jdkboy - 1:1.8.0.265-b10.1 +- Add leaf-optimize-in-ParallelScanvageGC.patch +- Add 8046294-Generate-the-4-byte-timestamp-randomly.patch +- Add 8203481-Incorrect-constraint-for-unextended_sp-in-frame-safe_for_sender.patch +- Add fix-LongCache-s-range-when-BoxTypeCachedMax-number-is-bigger-than-Integer.MAX_VALUE.patch +- Add Ddot-intrinsic-implement.patch +- Add 8234003-Improve-IndexSet-iteration.patch +- Add 8220159-Optimize-various-RegMask-operations-by-introducing-watermarks.patch +- Remove prohibition-of-irreducible-loop-in-mergers.patch + * Tue Aug 25 2020 noah - 1:1.8.0.265-b10.0 - Update to aarch64-shenandoah-jdk8u-8u265-b01 - add fix-Long-cache-range-and-remove-VM-option-java.lang.IntegerCache.high-by-default.patch diff --git a/leaf-optimize-in-ParallelScanvageGC.patch b/leaf-optimize-in-ParallelScanvageGC.patch new file mode 100644 index 0000000000000000000000000000000000000000..425f06d19ca42be693c71524dc725563c96e5316 --- /dev/null +++ b/leaf-optimize-in-ParallelScanvageGC.patch @@ -0,0 +1,210 @@ +diff --git a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp +index fba64e15f..1c92314f9 100644 +--- a/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp ++++ b/hotspot/src/os_cpu/linux_aarch64/vm/atomic_linux_aarch64.inline.hpp +@@ -131,6 +131,14 @@ inline intptr_t Atomic::cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* + return __sync_val_compare_and_swap(dest, compare_value, exchange_value); + } + ++inline intptr_t Atomic::relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value) ++{ ++ intptr_t value = compare_value; ++ __atomic_compare_exchange(dest, &value, &exchange_value, /* weak */false, ++ __ATOMIC_RELAXED, __ATOMIC_RELAXED); ++ return value; ++} ++ + inline void* Atomic::cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value) + { + return (void *) cmpxchg_ptr((intptr_t) exchange_value, +diff --git a/hotspot/src/share/vm/classfile/classFileParser.cpp b/hotspot/src/share/vm/classfile/classFileParser.cpp +index 07d07e4f2..f001a94e7 100644 +--- a/hotspot/src/share/vm/classfile/classFileParser.cpp ++++ b/hotspot/src/share/vm/classfile/classFileParser.cpp +@@ -4393,6 +4393,11 @@ void ClassFileParser::fill_oop_maps(instanceKlassHandle k, + OopMapBlock* this_oop_map = k->start_of_nonstatic_oop_maps(); + const InstanceKlass* const super = k->superklass(); + const unsigned int super_count = super ? super->nonstatic_oop_map_count() : 0; ++ ++ const bool super_is_gc_leaf = super ? super->oop_is_gc_leaf() : true; ++ bool this_is_gc_leaf = super_is_gc_leaf && (nonstatic_oop_map_count == 0); ++ k->set_oop_is_gc_leaf(this_is_gc_leaf); ++ + if (super_count > 0) { + // Copy maps from superklass + OopMapBlock* super_oop_map = super->start_of_nonstatic_oop_maps(); +diff --git a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp +index b2de74d41..dde9ac426 100644 +--- a/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp ++++ b/hotspot/src/share/vm/gc_implementation/parallelScavenge/psPromotionManager.inline.hpp +@@ -49,7 +49,12 @@ inline void PSPromotionManager::claim_or_forward_internal_depth(T* p) { + } + oopDesc::encode_store_heap_oop_not_null(p, o); + } else { +- push_depth(p); ++ // leaf object copy in advanced, reduce cost of push and pop ++ if (!o->klass()->oop_is_gc_leaf()) { ++ push_depth(p); ++ } else { ++ PSScavenge::copy_and_push_safe_barrier(this, p); ++ } + } + } + } +@@ -171,7 +176,15 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { + Copy::aligned_disjoint_words((HeapWord*)o, (HeapWord*)new_obj, new_obj_size); + + // Now we have to CAS in the header. ++#ifdef AARCH64 ++ // CAS with memory fence cost a lot within copy_to_survivor_space on aarch64. ++ // To minimize the cost, we use a normal CAS to do object forwarding, plus a ++ // memory fence only upon CAS succeeds. To further reduce the fence insertion, ++ // we can skip the fence insertion for leaf objects (objects don't have reference fields). ++ if (o->relax_cas_forward_to(new_obj, test_mark)) { ++#else + if (o->cas_forward_to(new_obj, test_mark)) { ++#endif + // We won any races, we "own" this object. + assert(new_obj == o->forwardee(), "Sanity"); + +@@ -195,10 +208,13 @@ oop PSPromotionManager::copy_to_survivor_space(oop o) { + push_depth(masked_o); + TASKQUEUE_STATS_ONLY(++_arrays_chunked; ++_masked_pushes); + } else { +- // we'll just push its contents +- new_obj->push_contents(this); ++ // leaf object don't have contents, never need push_contents ++ if (!o->klass()->oop_is_gc_leaf()) { ++ // we'll just push its contents ++ new_obj->push_contents(this); ++ } + } +- } else { ++ } else { + // We lost, someone else "owns" this object + guarantee(o->is_forwarded(), "Object must be forwarded if the cas failed."); + +diff --git a/hotspot/src/share/vm/oops/klass.cpp b/hotspot/src/share/vm/oops/klass.cpp +index 7fda7ce62..6e8f9acde 100644 +--- a/hotspot/src/share/vm/oops/klass.cpp ++++ b/hotspot/src/share/vm/oops/klass.cpp +@@ -207,6 +207,8 @@ Klass::Klass() { + clear_modified_oops(); + clear_accumulated_modified_oops(); + _shared_class_path_index = -1; ++ ++ set_oop_is_gc_leaf(false); + } + + jint Klass::array_layout_helper(BasicType etype) { +diff --git a/hotspot/src/share/vm/oops/klass.hpp b/hotspot/src/share/vm/oops/klass.hpp +index 22ae48f5c..4aea54795 100644 +--- a/hotspot/src/share/vm/oops/klass.hpp ++++ b/hotspot/src/share/vm/oops/klass.hpp +@@ -177,6 +177,8 @@ class Klass : public Metadata { + jbyte _modified_oops; // Card Table Equivalent (YC/CMS support) + jbyte _accumulated_modified_oops; // Mod Union Equivalent (CMS support) + ++ bool _is_gc_leaf; ++ + private: + // This is an index into FileMapHeader::_classpath_entry_table[], to + // associate this class with the JAR file where it's loaded from during +@@ -569,6 +571,9 @@ protected: + oop_is_typeArray_slow()); } + #undef assert_same_query + ++ void set_oop_is_gc_leaf(bool is_gc_leaf) { _is_gc_leaf = is_gc_leaf; } ++ inline bool oop_is_gc_leaf() const { return _is_gc_leaf; } ++ + // Access flags + AccessFlags access_flags() const { return _access_flags; } + void set_access_flags(AccessFlags flags) { _access_flags = flags; } +diff --git a/hotspot/src/share/vm/oops/oop.hpp b/hotspot/src/share/vm/oops/oop.hpp +index a703a54ef..41a7bce4d 100644 +--- a/hotspot/src/share/vm/oops/oop.hpp ++++ b/hotspot/src/share/vm/oops/oop.hpp +@@ -76,6 +76,9 @@ class oopDesc { + + void release_set_mark(markOop m); + markOop cas_set_mark(markOop new_mark, markOop old_mark); ++#ifdef AARCH64 ++ markOop relax_cas_set_mark(markOop new_mark, markOop old_mark); ++#endif + + // Used only to re-initialize the mark word (e.g., of promoted + // objects during a GC) -- requires a valid klass pointer +@@ -317,6 +320,10 @@ class oopDesc { + void forward_to(oop p); + bool cas_forward_to(oop p, markOop compare); + ++#ifdef AARCH64 ++ bool relax_cas_forward_to(oop p, markOop compare); ++#endif ++ + #if INCLUDE_ALL_GCS + // Like "forward_to", but inserts the forwarding pointer atomically. + // Exactly one thread succeeds in inserting the forwarding pointer, and +diff --git a/hotspot/src/share/vm/oops/oop.inline.hpp b/hotspot/src/share/vm/oops/oop.inline.hpp +index d4c4d75c0..c3abdb128 100644 +--- a/hotspot/src/share/vm/oops/oop.inline.hpp ++++ b/hotspot/src/share/vm/oops/oop.inline.hpp +@@ -76,6 +76,12 @@ inline markOop oopDesc::cas_set_mark(markOop new_mark, markOop old_mark) { + return (markOop) Atomic::cmpxchg_ptr(new_mark, &_mark, old_mark); + } + ++#ifdef AARCH64 ++inline markOop oopDesc::relax_cas_set_mark(markOop new_mark, markOop old_mark) { ++ return (markOop)Atomic::relax_cmpxchg_ptr((intptr_t)new_mark, (volatile intptr_t*)&_mark, (intptr_t)old_mark); ++} ++#endif ++ + inline Klass* oopDesc::klass() const { + if (UseCompressedClassPointers) { + return Klass::decode_klass_not_null(_metadata._compressed_klass); +@@ -715,6 +721,30 @@ inline bool oopDesc::cas_forward_to(oop p, markOop compare) { + return cas_set_mark(m, compare) == compare; + } + ++#ifdef AARCH64 ++inline bool oopDesc::relax_cas_forward_to(oop p, markOop compare) { ++ assert(check_obj_alignment(p), ++ "forwarding to something not aligned"); ++ assert(Universe::heap()->is_in_reserved(p), ++ "forwarding to something not in heap"); ++ markOop m = markOopDesc::encode_pointer_as_mark(p); ++ assert(m->decode_pointer() == p, "encoding must be reversable"); ++ markOop old_markoop = relax_cas_set_mark(m, compare); ++ // If CAS succeeded, we must ensure the copy visible to threads reading the forwardee. ++ // (We might delay the fence insertion till pushing contents to task stack as other threads ++ // only need to touch the copied object after stolen the task.) ++ if (old_markoop == compare) { ++ // Once the CAS succeeds, leaf object never needs to be visible to other threads (finished ++ // collection by current thread), so we can save the fence. ++ if (!p->klass()->oop_is_gc_leaf()) { ++ OrderAccess::fence(); ++ } ++ return true; ++ } ++ return false; ++} ++#endif ++ + // Note that the forwardee is not the same thing as the displaced_mark. + // The forwardee is used when copying during scavenge and mark-sweep. + // It does need to clear the low two locking- and GC-related bits. +diff --git a/hotspot/src/share/vm/runtime/atomic.hpp b/hotspot/src/share/vm/runtime/atomic.hpp +index 9ca5fce97..015178b61 100644 +--- a/hotspot/src/share/vm/runtime/atomic.hpp ++++ b/hotspot/src/share/vm/runtime/atomic.hpp +@@ -94,6 +94,10 @@ class Atomic : AllStatic { + unsigned int compare_value); + + inline static intptr_t cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value); ++#ifdef AARCH64 ++ inline static intptr_t relax_cmpxchg_ptr(intptr_t exchange_value, volatile intptr_t* dest, intptr_t compare_value); ++#endif ++ + inline static void* cmpxchg_ptr(void* exchange_value, volatile void* dest, void* compare_value); + }; + diff --git a/prohibition-of-irreducible-loop-in-mergers.patch b/prohibition-of-irreducible-loop-in-mergers.patch deleted file mode 100644 index 7ed9665ab57c35ad90ecd0a71a498dfedf2e5a35..0000000000000000000000000000000000000000 --- a/prohibition-of-irreducible-loop-in-mergers.patch +++ /dev/null @@ -1,27 +0,0 @@ -From 34712f6bbc3c2c664ee641c78d4a2f8cfe427880 Mon Sep 17 00:00:00 2001 -Date: Fri, 28 Feb 2020 15:17:44 +0000 -Subject: [PATCH] prohibition of irreducible loop in mergers - -Summary: C2Compiler: irreducible loop should not enter merge_many_backedges -LLT: NA -Bug url: NA ---- - hotspot/src/share/vm/opto/loopnode.cpp | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/hotspot/src/share/vm/opto/loopnode.cpp b/hotspot/src/share/vm/opto/loopnode.cpp -index e2c0645cf8..bbb2e2bf98 100644 ---- a/hotspot/src/share/vm/opto/loopnode.cpp -+++ b/hotspot/src/share/vm/opto/loopnode.cpp -@@ -1542,7 +1542,7 @@ bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) { - // If I am a shared header (multiple backedges), peel off the many - // backedges into a private merge point and use the merge point as - // the one true backedge. -- if( _head->req() > 3 ) { -+ if( _head->req() > 3 && !_irreducible) { - // Merge the many backedges into a single backedge but leave - // the hottest backedge as separate edge for the following peel. - merge_many_backedges( phase ); --- -2.12.3 -