From 1abe9b3b43101067b004512d962580e966480eec Mon Sep 17 00:00:00 2001 From: hu_bo_dao Date: Fri, 11 Jun 2021 09:48:31 +0800 Subject: [PATCH 1/6] I3V9SK: jmap support CMS parallel inspection --- openjdk-1.8.0.spec | 8 +- support_CMS_parallel_inspection.patch | 298 ++++++++++++++++++++++++++ 2 files changed, 304 insertions(+), 2 deletions(-) create mode 100755 support_CMS_parallel_inspection.patch diff --git a/openjdk-1.8.0.spec b/openjdk-1.8.0.spec index 5d5277e..83df46b 100644 --- a/openjdk-1.8.0.spec +++ b/openjdk-1.8.0.spec @@ -918,7 +918,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r Name: java-%{javaver}-%{origin} Version: %{javaver}.%{updatever}.%{buildver} -Release: 9 +Release: 10 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -1105,6 +1105,7 @@ Patch191: 8264640.patch Patch192: add_kae_implementation_add_default_conf_file.patch Patch193: improve_algorithmConstraints_checkAlgorithm_performance.patch Patch194: modify_the_default_iteration_time_and_forks_in_the_JMH_of_KAEProvider.patch +Patch195: support_CMS_parallel_inspection.patch ############################################# # @@ -1558,7 +1559,7 @@ pushd %{top_level_dir_name} %patch191 -p1 %patch192 -p1 %patch194 -p1 - +%patch195 -p1 popd # System library fixes @@ -2175,6 +2176,9 @@ require "copy_jdk_configs.lua" %endif %changelog +* Wed Jun 10 2021 hu_bo_dao - 1:1.8.0.292-b10.10 +- add support_CMS_parallel_inspection.patch + * Wed Jun 9 2021 noah - 1:1.8.0.292-b10.9 - add modify_the_default_iteration_time_and_forks_in_the_JMH_of_KAEProvider.patch diff --git a/support_CMS_parallel_inspection.patch b/support_CMS_parallel_inspection.patch new file mode 100755 index 0000000..a5a5b9d --- /dev/null +++ b/support_CMS_parallel_inspection.patch @@ -0,0 +1,298 @@ +commit 6128a6c319f9d10c604bf7d4049ef68b7fd11b27 +Author: hubodao +Date: Tue Jun 8 07:37:02 2021 +0000 + + support CMS Parallel inspection + +diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +index 53b75a4ca..3c3deab28 100644 +--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp ++++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.cpp +@@ -2871,6 +2871,47 @@ void ConcurrentMarkSweepGeneration::gc_epilogue(bool full) { + } + } + ++size_t ConcurrentMarkSweepGeneration::num_iterable_blocks() const ++{ ++ return (used_stable() + CMSIterateBlockSize - 1) / CMSIterateBlockSize; ++} ++ ++void ConcurrentMarkSweepGeneration::object_iterate_block(ObjectClosure *cl, size_t block_index) ++{ ++ size_t block_word_size = CMSIterateBlockSize / HeapWordSize; ++ MemRegion span = MemRegion(cmsSpace()->bottom() + block_index * block_word_size, ++ cmsSpace()->bottom() + (block_index + 1) * block_word_size); ++ if (!span.is_empty()) { // Non-null task ++ HeapWord *prev_obj; ++ if (block_index == 0) { ++ prev_obj = span.start(); ++ } else { ++ prev_obj = cmsSpace()->block_start_careful(span.start()); ++ while (prev_obj < span.start()) { ++ size_t sz = cmsSpace()->block_size_no_stall(prev_obj, _collector); ++ if (sz > 0) { ++ prev_obj += sz; ++ } else { ++ break; ++ } ++ } ++ } ++ if (prev_obj < span.end()) { ++ HeapWord *cur, *limit; ++ size_t curSize; ++ for (cur = prev_obj, limit = span.end(); cur < limit; cur += curSize) { ++ curSize = cmsSpace()->block_size_no_stall(cur, _collector); ++ if (curSize == 0) { ++ break; ++ } ++ if (cmsSpace()->block_is_obj(cur)) { ++ cl->do_object(oop(cur)); ++ } ++ } ++ } ++ } ++} ++ + void ConcurrentMarkSweepGeneration::gc_epilogue_work(bool full) { + assert(!incremental_collection_failed(), "Should have been cleared"); + cmsSpace()->setPreconsumptionDirtyCardClosure(NULL); +diff --git a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp +index ca3fee21b..7d05410fe 100644 +--- a/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp ++++ b/hotspot/src/share/vm/gc_implementation/concurrentMarkSweep/concurrentMarkSweepGeneration.hpp +@@ -1154,9 +1154,10 @@ class ConcurrentMarkSweepGeneration: public CardGeneration { + + // Adaptive size policy + CMSAdaptiveSizePolicy* size_policy(); +- ++ static const size_t CMSIterateBlockSize = 1024 * 1024; + void set_did_compact(bool v) { _did_compact = v; } +- ++ virtual size_t num_iterable_blocks() const; ++ virtual void object_iterate_block(ObjectClosure *cl, size_t block_index); + bool refs_discovery_is_atomic() const { return false; } + bool refs_discovery_is_mt() const { + // Note: CMS does MT-discovery during the parallel-remark +diff --git a/hotspot/src/share/vm/memory/genCollectedHeap.cpp b/hotspot/src/share/vm/memory/genCollectedHeap.cpp +index ed2c0afb7..20fbbfd8e 100644 +--- a/hotspot/src/share/vm/memory/genCollectedHeap.cpp ++++ b/hotspot/src/share/vm/memory/genCollectedHeap.cpp +@@ -1272,6 +1272,73 @@ void GenCollectedHeap::print_heap_change(size_t prev_used) const { + } + } + ++// The CMSHeapBlockClaimer is used during parallel iteration over the heap, ++// allowing workers to claim heap areas ("blocks"), gaining exclusive rights to these. ++// The eden and survivor spaces are treated as single blocks as it is hard to divide ++// these spaces. ++// The old space is divided into fixed-size blocks. ++class CMSHeapBlockClaimer : public StackObj { ++ size_t _claimed_index; ++ ++public: ++ static const size_t InvalidIndex = SIZE_MAX; ++ static const size_t EdenIndex = 0; ++ static const size_t SurvivorIndex = 1; ++ static const size_t NumNonOldGenClaims = 2; ++ ++ CMSHeapBlockClaimer() : _claimed_index(EdenIndex) { } ++ // Claim the block and get the block index. ++ size_t claim_and_get_block() ++ { ++ size_t block_index; ++ block_index = Atomic::add(1u, reinterpret_cast(&_claimed_index)) - 1; ++ Generation *old_gen = GenCollectedHeap::heap()->get_gen(1); ++ size_t num_claims = old_gen->num_iterable_blocks() + NumNonOldGenClaims; ++ return block_index < num_claims ? block_index : InvalidIndex; ++ } ++ ~CMSHeapBlockClaimer() {} ++}; ++ ++void GenCollectedHeap::object_iterate_parallel(ObjectClosure *cl, CMSHeapBlockClaimer *claimer) ++{ ++ size_t block_index = claimer->claim_and_get_block(); ++ DefNewGeneration *def_new_gen = (DefNewGeneration*) get_gen(0); ++ // Iterate until all blocks are claimed ++ if (block_index == CMSHeapBlockClaimer::EdenIndex) { ++ def_new_gen->eden()->object_iterate(cl); ++ block_index = claimer->claim_and_get_block(); ++ } ++ if (block_index == CMSHeapBlockClaimer::SurvivorIndex) { ++ def_new_gen->from()->object_iterate(cl); ++ def_new_gen->to()->object_iterate(cl); ++ block_index = claimer->claim_and_get_block(); ++ } ++ while (block_index != CMSHeapBlockClaimer::InvalidIndex) { ++ get_gen(1)->object_iterate_block(cl, block_index - CMSHeapBlockClaimer::NumNonOldGenClaims); ++ block_index = claimer->claim_and_get_block(); ++ } ++} ++ ++class GenParallelObjectIterator : public ParallelObjectIterator { ++private: ++ GenCollectedHeap *_heap; ++ CMSHeapBlockClaimer _claimer; ++ ++public: ++ GenParallelObjectIterator(uint thread_num) : _heap(GenCollectedHeap::heap()),_claimer(){} ++ ++ virtual void object_iterate(ObjectClosure *cl, uint worker_id) ++ { ++ _heap->object_iterate_parallel(cl, &_claimer); ++ } ++ ~GenParallelObjectIterator() {} ++}; ++ ++ParallelObjectIterator* GenCollectedHeap::parallel_object_iterator(uint thread_num) ++{ ++ return new GenParallelObjectIterator(thread_num); ++} ++ + class GenGCPrologueClosure: public GenCollectedHeap::GenClosure { + private: + bool _full; +@@ -1415,6 +1482,7 @@ void GenCollectedHeap::stop() { + #endif + } + +-void GenCollectedHeap::run_task(AbstractGangTask *task) { +- ++void GenCollectedHeap::run_task(AbstractGangTask *task) ++{ ++ workers()->run_task(task); + } +diff --git a/hotspot/src/share/vm/memory/genCollectedHeap.hpp b/hotspot/src/share/vm/memory/genCollectedHeap.hpp +index 2c78ea15a..9e5405e28 100644 +--- a/hotspot/src/share/vm/memory/genCollectedHeap.hpp ++++ b/hotspot/src/share/vm/memory/genCollectedHeap.hpp +@@ -30,6 +30,7 @@ + #include "memory/generation.hpp" + #include "memory/sharedHeap.hpp" + ++class CMSHeapBlockClaimer; + class SubTasksDone; + + // A "GenCollectedHeap" is a SharedHeap that uses generational +@@ -213,7 +214,14 @@ public: + // Iteration functions. + void oop_iterate(ExtendedOopClosure* cl); + void object_iterate(ObjectClosure* cl); ++ virtual ParallelObjectIterator* parallel_object_iterator(uint thread_num); ++ // Iteration functions. ++ void object_iterate_parallel(ObjectClosure *cl, CMSHeapBlockClaimer *claimer); + void safe_object_iterate(ObjectClosure* cl); ++ virtual FlexibleWorkGang* get_safepoint_workers() ++ { ++ return workers(); ++ } + Space* space_containing(const void* addr) const; + + // A CollectedHeap is divided into a dense sequence of "blocks"; that is, +diff --git a/hotspot/src/share/vm/memory/generation.cpp b/hotspot/src/share/vm/memory/generation.cpp +index dc4ac0869..9d6c926e1 100644 +--- a/hotspot/src/share/vm/memory/generation.cpp ++++ b/hotspot/src/share/vm/memory/generation.cpp +@@ -103,6 +103,12 @@ void Generation::ref_processor_init() { + } + } + ++size_t Generation::num_iterable_blocks() const ++{ ++ return 0; ++} ++void Generation::object_iterate_block(ObjectClosure *cl, size_t block_index){}; ++ + void Generation::print() const { print_on(tty); } + + void Generation::print_on(outputStream* st) const { +diff --git a/hotspot/src/share/vm/memory/generation.hpp b/hotspot/src/share/vm/memory/generation.hpp +index ef5457890..eeb9fa691 100644 +--- a/hotspot/src/share/vm/memory/generation.hpp ++++ b/hotspot/src/share/vm/memory/generation.hpp +@@ -175,7 +175,8 @@ class Generation: public CHeapObj { + // Returns the total number of bytes available in a generation + // for the allocation of objects. + virtual size_t max_capacity() const; +- ++ virtual size_t num_iterable_blocks() const; ++ virtual void object_iterate_block(ObjectClosure *cl, size_t block_index); + // If this is a young generation, the maximum number of bytes that can be + // allocated in this generation before a GC is triggered. + virtual size_t capacity_before_gc() const { return 0; } +diff --git a/jdk/src/share/classes/sun/tools/jmap/JMap.java b/jdk/src/share/classes/sun/tools/jmap/JMap.java +index e891b6c55..2cb5a5c10 100644 +--- a/jdk/src/share/classes/sun/tools/jmap/JMap.java ++++ b/jdk/src/share/classes/sun/tools/jmap/JMap.java +@@ -220,20 +220,24 @@ public class JMap { + + private static void histo(String pid, String options) throws IOException { + VirtualMachine vm = attach(pid); +- String parallel = null; + String liveopt = "-all"; +- if (options.startsWith("live")) { +- liveopt = "-live"; +- } +- String[] subopts = options.split(","); ++ String parallel = null; ++ String subopts[] = options.split(","); + for (int i = 0; i < subopts.length; i++) { + String subopt = subopts[i]; +- if (subopt.startsWith("parallel=")) { ++ if (subopt.equals("") || subopt.equals("all")) { ++ // pass ++ } else if (subopt.equals("live")) { ++ liveopt = "-live"; ++ } else if (subopt.startsWith("parallel=")) { + parallel = subopt.substring("parallel=".length()); + if (parallel == null) { + System.err.println("Fail: no number provided in option: '" + subopt + "'"); +- System.exit(1); ++ usage(1); + } ++ } else { ++ System.err.println("Fail: invalid option: '" + subopt + "'"); ++ usage(1); + } + } + InputStream in = ((HotSpotVirtualMachine)vm).heapHisto(liveopt,parallel); +diff --git a/jdk/test/sun/tools/jmap/ParallelInspection.sh b/jdk/test/sun/tools/jmap/ParallelInspection.sh +index 69e51a76f..b4add98c0 100644 +--- a/jdk/test/sun/tools/jmap/ParallelInspection.sh ++++ b/jdk/test/sun/tools/jmap/ParallelInspection.sh +@@ -76,4 +76,36 @@ set -e + stopApplication "${PORTFILE}" + waitForApplication + ++# parallel num in CMS GC ++# Start application and use PORTFILE for coordination ++PORTFILE="${TESTCLASSES}"/shutdown.port ++startApplication SimpleApplication "${PORTFILE}" defineGC UseConcMarkSweepGC ++ ++# all return statuses are checked in this test ++set +e ++ ++failed=0 ++ ++${JMAP} -J-XX:+UsePerfData -histo:parallel=0 $appJavaPid ++if [ $? != 0 ]; then failed=1; fi ++ ++${JMAP} -J-XX:+UsePerfData -histo:parallel=1 $appJavaPid ++if [ $? != 0 ]; then failed=1; fi ++ ++${JMAP} -J-XX:+UsePerfData -histo:parallel=2 $appJavaPid ++if [ $? != 0 ]; then failed=1; fi ++ ++${JMAP} -J-XX:+UsePerfData -histo:live,parallel=0 $appJavaPid ++if [ $? != 0 ]; then failed=1; fi ++ ++${JMAP} -J-XX:+UsePerfData -histo:live,parallel=1 $appJavaPid ++if [ $? != 0 ]; then failed=1; fi ++ ++${JMAP} -J-XX:+UsePerfData -histo:live,parallel=2 $appJavaPid ++if [ $? != 0 ]; then failed=1; fi ++set -e ++ ++stopApplication "${PORTFILE}" ++waitForApplication ++ + exit $failed -- Gitee From f2f0467619a51bd85d9c6d608e584968b6cbce42 Mon Sep 17 00:00:00 2001 From: kuenking111 Date: Sat, 12 Jun 2021 11:42:36 +0800 Subject: [PATCH 2/6] I3VF9L: G1 GC adds support for NUMA --- g1gc-numa-aware-Implementation.patch | 3566 ++++++++++++++++++++++++++ openjdk-1.8.0.spec | 7 +- 2 files changed, 3572 insertions(+), 1 deletion(-) create mode 100755 g1gc-numa-aware-Implementation.patch diff --git a/g1gc-numa-aware-Implementation.patch b/g1gc-numa-aware-Implementation.patch new file mode 100755 index 0000000..d9001e2 --- /dev/null +++ b/g1gc-numa-aware-Implementation.patch @@ -0,0 +1,3566 @@ +commit 63c022739be1810316e2504f4abeaa4ca144ef46 +Author: hubodao +Date: Tue Jun 8 07:44:36 2021 +0000 + + numa-aware implementation + +diff --git a/hotspot/src/os/bsd/vm/os_bsd.cpp b/hotspot/src/os/bsd/vm/os_bsd.cpp +index 3e4d8c7e6..340334c47 100644 +--- a/hotspot/src/os/bsd/vm/os_bsd.cpp ++++ b/hotspot/src/os/bsd/vm/os_bsd.cpp +@@ -2290,6 +2290,10 @@ size_t os::numa_get_leaf_groups(int *ids, size_t size) { + return 0; + } + ++int os::numa_get_group_id_for_address(const void* address) { ++ return 0; ++} ++ + bool os::get_page_info(char *start, page_info* info) { + return false; + } +diff --git a/hotspot/src/os/linux/vm/os_linux.cpp b/hotspot/src/os/linux/vm/os_linux.cpp +index 621316b99..f700335a3 100644 +--- a/hotspot/src/os/linux/vm/os_linux.cpp ++++ b/hotspot/src/os/linux/vm/os_linux.cpp +@@ -2908,6 +2908,19 @@ int os::numa_get_group_id() { + return 0; + } + ++int os::numa_get_group_id_for_address(const void* address) { ++ void** pages = const_cast(&address); ++ int id = -1; ++ ++ if (os::Linux::numa_move_pages(0, 1, pages, NULL, &id, 0) == -1) { ++ return -1; ++ } ++ if (id < 0) { ++ return -1; ++ } ++ return id; ++} ++ + int os::Linux::get_existing_num_nodes() { + size_t node; + size_t highest_node_number = Linux::numa_max_node(); +@@ -2930,7 +2943,7 @@ size_t os::numa_get_leaf_groups(int *ids, size_t size) { + // not always consecutively available, i.e. available from 0 to the highest + // node number. + for (size_t node = 0; node <= highest_node_number; node++) { +- if (Linux::isnode_in_configured_nodes(node)) { ++ if (Linux::isnode_in_bound_nodes(node)) { + ids[i++] = node; + } + } +@@ -3023,11 +3036,21 @@ bool os::Linux::libnuma_init() { + libnuma_dlsym(handle, "numa_bitmask_isbitset"))); + set_numa_distance(CAST_TO_FN_PTR(numa_distance_func_t, + libnuma_dlsym(handle, "numa_distance"))); ++ set_numa_get_membind(CAST_TO_FN_PTR(numa_get_membind_func_t, ++ libnuma_v2_dlsym(handle, "numa_get_membind"))); ++ set_numa_get_interleave_mask(CAST_TO_FN_PTR(numa_get_interleave_mask_func_t, ++ libnuma_v2_dlsym(handle, "numa_get_interleave_mask"))); ++ set_numa_move_pages(CAST_TO_FN_PTR(numa_move_pages_func_t, ++ libnuma_dlsym(handle, "numa_move_pages"))); ++ set_numa_run_on_node(CAST_TO_FN_PTR(numa_run_on_node_func_t, ++ libnuma_dlsym(handle, "numa_run_on_node"))); + + if (numa_available() != -1) { + set_numa_all_nodes((unsigned long*)libnuma_dlsym(handle, "numa_all_nodes")); + set_numa_all_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_all_nodes_ptr")); + set_numa_nodes_ptr((struct bitmask **)libnuma_dlsym(handle, "numa_nodes_ptr")); ++ set_numa_interleave_bitmask(_numa_get_interleave_mask()); ++ set_numa_membind_bitmask(_numa_get_membind()); + // Create an index -> node mapping, since nodes are not always consecutive + _nindex_to_node = new (ResourceObj::C_HEAP, mtInternal) GrowableArray(0, true); + rebuild_nindex_to_node_map(); +@@ -3081,12 +3104,15 @@ void os::Linux::rebuild_cpu_to_node_map() { + for (size_t i = 0; i < node_num; i++) { + // Check if node is configured (not a memory-less node). If it is not, find + // the closest configured node. +- if (!isnode_in_configured_nodes(nindex_to_node()->at(i))) { ++ if (!isnode_in_configured_nodes(nindex_to_node()->at(i)) || ++ !isnode_in_bound_nodes(nindex_to_node()->at(i))) { + closest_distance = INT_MAX; + // Check distance from all remaining nodes in the system. Ignore distance + // from itself and from another non-configured node. + for (size_t m = 0; m < node_num; m++) { +- if (m != i && isnode_in_configured_nodes(nindex_to_node()->at(m))) { ++ if (m != i && ++ isnode_in_configured_nodes(nindex_to_node()->at(m)) && ++ isnode_in_bound_nodes(nindex_to_node()->at(m))) { + distance = numa_distance(nindex_to_node()->at(i), nindex_to_node()->at(m)); + // If a closest node is found, update. There is always at least one + // configured node in the system so there is always at least one node +@@ -3140,9 +3166,16 @@ os::Linux::numa_interleave_memory_v2_func_t os::Linux::_numa_interleave_memory_v + os::Linux::numa_set_bind_policy_func_t os::Linux::_numa_set_bind_policy; + os::Linux::numa_bitmask_isbitset_func_t os::Linux::_numa_bitmask_isbitset; + os::Linux::numa_distance_func_t os::Linux::_numa_distance; ++os::Linux::numa_get_membind_func_t os::Linux::_numa_get_membind; ++os::Linux::numa_get_interleave_mask_func_t os::Linux::_numa_get_interleave_mask; ++os::Linux::numa_move_pages_func_t os::Linux::_numa_move_pages; ++os::Linux::numa_run_on_node_func_t os::Linux::_numa_run_on_node; ++os::Linux::NumaAllocationPolicy os::Linux::_current_numa_policy; + unsigned long* os::Linux::_numa_all_nodes; + struct bitmask* os::Linux::_numa_all_nodes_ptr; + struct bitmask* os::Linux::_numa_nodes_ptr; ++struct bitmask* os::Linux::_numa_interleave_bitmask; ++struct bitmask* os::Linux::_numa_membind_bitmask; + + bool os::pd_uncommit_memory(char* addr, size_t size) { + uintptr_t res = (uintptr_t) ::mmap(addr, size, PROT_NONE, +@@ -5195,9 +5228,11 @@ jint os::init_2(void) + if (!Linux::libnuma_init()) { + UseNUMA = false; + } else { +- if ((Linux::numa_max_node() < 1)) { ++ if ((Linux::numa_max_node() < 1) || Linux::isbound_to_single_node()) { + // There's only one node(they start from 0), disable NUMA. + UseNUMA = false; ++ } else { ++ Linux::set_configured_numa_policy(Linux::identify_numa_policy()); + } + } + // With SHM and HugeTLBFS large pages we cannot uncommit a page, so there's no way +diff --git a/hotspot/src/os/linux/vm/os_linux.hpp b/hotspot/src/os/linux/vm/os_linux.hpp +index 79a9f39ab..c6748824e 100644 +--- a/hotspot/src/os/linux/vm/os_linux.hpp ++++ b/hotspot/src/os/linux/vm/os_linux.hpp +@@ -260,6 +260,10 @@ private: + typedef int (*numa_tonode_memory_func_t)(void *start, size_t size, int node); + typedef void (*numa_interleave_memory_func_t)(void *start, size_t size, unsigned long *nodemask); + typedef void (*numa_interleave_memory_v2_func_t)(void *start, size_t size, struct bitmask* mask); ++ typedef struct bitmask* (*numa_get_membind_func_t)(void); ++ typedef struct bitmask* (*numa_get_interleave_mask_func_t)(void); ++ typedef long (*numa_move_pages_func_t)(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags); ++ typedef int (*numa_run_on_node_func_t)(int node); + + typedef void (*numa_set_bind_policy_func_t)(int policy); + typedef int (*numa_bitmask_isbitset_func_t)(struct bitmask *bmp, unsigned int n); +@@ -276,9 +280,16 @@ private: + static numa_set_bind_policy_func_t _numa_set_bind_policy; + static numa_bitmask_isbitset_func_t _numa_bitmask_isbitset; + static numa_distance_func_t _numa_distance; ++ static numa_get_membind_func_t _numa_get_membind; ++ static numa_get_interleave_mask_func_t _numa_get_interleave_mask; ++ static numa_move_pages_func_t _numa_move_pages; ++ static numa_run_on_node_func_t _numa_run_on_node; ++ + static unsigned long* _numa_all_nodes; + static struct bitmask* _numa_all_nodes_ptr; + static struct bitmask* _numa_nodes_ptr; ++ static struct bitmask* _numa_interleave_bitmask; ++ static struct bitmask* _numa_membind_bitmask; + + static void set_sched_getcpu(sched_getcpu_func_t func) { _sched_getcpu = func; } + static void set_numa_node_to_cpus(numa_node_to_cpus_func_t func) { _numa_node_to_cpus = func; } +@@ -291,10 +302,24 @@ private: + static void set_numa_set_bind_policy(numa_set_bind_policy_func_t func) { _numa_set_bind_policy = func; } + static void set_numa_bitmask_isbitset(numa_bitmask_isbitset_func_t func) { _numa_bitmask_isbitset = func; } + static void set_numa_distance(numa_distance_func_t func) { _numa_distance = func; } ++ static void set_numa_get_membind(numa_get_membind_func_t func) { _numa_get_membind = func; } ++ static void set_numa_get_interleave_mask(numa_get_interleave_mask_func_t func) { _numa_get_interleave_mask = func; } ++ static void set_numa_move_pages(numa_move_pages_func_t func) { _numa_move_pages = func; } ++ static void set_numa_run_on_node(numa_run_on_node_func_t func) { _numa_run_on_node = func; } + static void set_numa_all_nodes(unsigned long* ptr) { _numa_all_nodes = ptr; } + static void set_numa_all_nodes_ptr(struct bitmask **ptr) { _numa_all_nodes_ptr = (ptr == NULL ? NULL : *ptr); } + static void set_numa_nodes_ptr(struct bitmask **ptr) { _numa_nodes_ptr = (ptr == NULL ? NULL : *ptr); } ++ static void set_numa_interleave_bitmask(struct bitmask* ptr) { _numa_interleave_bitmask = ptr ; } ++ static void set_numa_membind_bitmask(struct bitmask* ptr) { _numa_membind_bitmask = ptr ; } + static int sched_getcpu_syscall(void); ++ ++ enum NumaAllocationPolicy{ ++ NotInitialized, ++ Membind, ++ Interleave ++ }; ++ static NumaAllocationPolicy _current_numa_policy; ++ + public: + static int sched_getcpu() { return _sched_getcpu != NULL ? _sched_getcpu() : -1; } + static int numa_node_to_cpus(int node, unsigned long *buffer, int bufferlen) { +@@ -308,6 +333,20 @@ public: + static int numa_tonode_memory(void *start, size_t size, int node) { + return _numa_tonode_memory != NULL ? _numa_tonode_memory(start, size, node) : -1; + } ++ ++ static void set_configured_numa_policy(NumaAllocationPolicy numa_policy) { ++ _current_numa_policy = numa_policy; ++ } ++ ++ static NumaAllocationPolicy identify_numa_policy() { ++ for (int node = 0; node <= Linux::numa_max_node(); node++) { ++ if (Linux::_numa_bitmask_isbitset(Linux::_numa_interleave_bitmask, node)) { ++ return Interleave; ++ } ++ } ++ return Membind; ++ } ++ + static void numa_interleave_memory(void *start, size_t size) { + // Use v2 api if available + if (_numa_interleave_memory_v2 != NULL && _numa_all_nodes_ptr != NULL) { +@@ -324,6 +363,14 @@ public: + static int numa_distance(int node1, int node2) { + return _numa_distance != NULL ? _numa_distance(node1, node2) : -1; + } ++ static int numa_run_on_node(int node) { ++ return _numa_run_on_node != NULL ? _numa_run_on_node(node) : -1; ++ } ++ ++ static long numa_move_pages(int pid, unsigned long count, void **pages, const int *nodes, int *status, int flags) { ++ return _numa_move_pages != NULL ? _numa_move_pages(pid, count, pages, nodes, status, flags) : -1; ++ } ++ + static int get_node_by_cpu(int cpu_id); + static int get_existing_num_nodes(); + // Check if numa node is configured (non-zero memory node). +@@ -352,6 +399,39 @@ public: + } else + return 0; + } ++ // Check if node is in bound node set. ++ static bool isnode_in_bound_nodes(int node) { ++ if (_numa_membind_bitmask != NULL && _numa_bitmask_isbitset != NULL) { ++ return _numa_bitmask_isbitset(_numa_membind_bitmask, node); ++ } else { ++ return false; ++ } ++ } ++ // Check if bound to only one numa node. ++ // Returns true if bound to a single numa node, otherwise returns false. ++ static bool isbound_to_single_node() { ++ int nodes = 0; ++ unsigned int node = 0; ++ unsigned int highest_node_number = 0; ++ ++ if (_numa_membind_bitmask != NULL && _numa_max_node != NULL && _numa_bitmask_isbitset != NULL) { ++ highest_node_number = _numa_max_node(); ++ } else { ++ return false; ++ } ++ ++ for (node = 0; node <= highest_node_number; node++) { ++ if (_numa_bitmask_isbitset(_numa_membind_bitmask, node)) { ++ nodes++; ++ } ++ } ++ ++ if (nodes == 1) { ++ return true; ++ } else { ++ return false; ++ } ++ } + }; + + +diff --git a/hotspot/src/os/solaris/vm/os_solaris.cpp b/hotspot/src/os/solaris/vm/os_solaris.cpp +index 732538434..d995f51e3 100644 +--- a/hotspot/src/os/solaris/vm/os_solaris.cpp ++++ b/hotspot/src/os/solaris/vm/os_solaris.cpp +@@ -2788,6 +2788,10 @@ int os::numa_get_group_id() { + return ids[os::random() % r]; + } + ++int os::numa_get_group_id_for_address(const void* address) { ++ return 0; ++} ++ + // Request information about the page. + bool os::get_page_info(char *start, page_info* info) { + const uint_t info_types[] = { MEMINFO_VLGRP, MEMINFO_VPAGESIZE }; +diff --git a/hotspot/src/os/windows/vm/os_windows.cpp b/hotspot/src/os/windows/vm/os_windows.cpp +index e7ff202af..39f5410d1 100644 +--- a/hotspot/src/os/windows/vm/os_windows.cpp ++++ b/hotspot/src/os/windows/vm/os_windows.cpp +@@ -3532,6 +3532,10 @@ size_t os::numa_get_leaf_groups(int *ids, size_t size) { + } + } + ++int os::numa_get_group_id_for_address(const void* address) { ++ return 0; ++} ++ + bool os::get_page_info(char *start, page_info* info) { + return false; + } +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp +index f92ae1102..0f9bc3f81 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.cpp +@@ -235,15 +235,16 @@ void G1AllocRegion::trace(const char* str, size_t word_size, HeapWord* result) { + #endif // G1_ALLOC_REGION_TRACING + + G1AllocRegion::G1AllocRegion(const char* name, +- bool bot_updates) +- : _name(name), _bot_updates(bot_updates), ++ bool bot_updates, ++ uint node_index) ++ : _name(name), _bot_updates(bot_updates), _node_index(node_index), + _alloc_region(NULL), _count(0), _used_bytes_before(0), + _allocation_context(AllocationContext::system()) { } + + + HeapRegion* MutatorAllocRegion::allocate_new_region(size_t word_size, + bool force) { +- return _g1h->new_mutator_alloc_region(word_size, force); ++ return _g1h->new_mutator_alloc_region(word_size, force, _node_index); + } + + void MutatorAllocRegion::retire_region(HeapRegion* alloc_region, +@@ -254,7 +255,7 @@ void MutatorAllocRegion::retire_region(HeapRegion* alloc_region, + HeapRegion* SurvivorGCAllocRegion::allocate_new_region(size_t word_size, + bool force) { + assert(!force, "not supported for GC alloc regions"); +- return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young); ++ return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Young, _node_index); + } + + void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region, +@@ -265,7 +266,7 @@ void SurvivorGCAllocRegion::retire_region(HeapRegion* alloc_region, + HeapRegion* OldGCAllocRegion::allocate_new_region(size_t word_size, + bool force) { + assert(!force, "not supported for GC alloc regions"); +- return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old); ++ return _g1h->new_gc_alloc_region(word_size, count(), InCSetState::Old, _node_index); + } + + void OldGCAllocRegion::retire_region(HeapRegion* alloc_region, +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp +index 2edc6545c..bc1c65d5a 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1AllocRegion.hpp +@@ -26,6 +26,7 @@ + #define SHARE_VM_GC_IMPLEMENTATION_G1_G1ALLOCREGION_HPP + + #include "gc_implementation/g1/heapRegion.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" + + class G1CollectedHeap; + +@@ -133,7 +134,9 @@ protected: + virtual void retire_region(HeapRegion* alloc_region, + size_t allocated_bytes) = 0; + +- G1AllocRegion(const char* name, bool bot_updates); ++ G1AllocRegion(const char* name, bool bot_updates, uint node_index); ++ // The memory node index this allocation region belongs to. ++ uint _node_index; + + public: + static void setup(G1CollectedHeap* g1h, HeapRegion* dummy_region); +@@ -197,8 +200,8 @@ protected: + virtual HeapRegion* allocate_new_region(size_t word_size, bool force); + virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes); + public: +- MutatorAllocRegion() +- : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */) { } ++ MutatorAllocRegion(uint node_index) ++ : G1AllocRegion("Mutator Alloc Region", false /* bot_updates */, node_index) { } + }; + + class SurvivorGCAllocRegion : public G1AllocRegion { +@@ -206,8 +209,8 @@ protected: + virtual HeapRegion* allocate_new_region(size_t word_size, bool force); + virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes); + public: +- SurvivorGCAllocRegion() +- : G1AllocRegion("Survivor GC Alloc Region", false /* bot_updates */) { } ++ SurvivorGCAllocRegion(uint node_index) ++ : G1AllocRegion("Survivor GC Alloc Region", false /* bot_updates */, node_index) { } + }; + + class OldGCAllocRegion : public G1AllocRegion { +@@ -216,7 +219,7 @@ protected: + virtual void retire_region(HeapRegion* alloc_region, size_t allocated_bytes); + public: + OldGCAllocRegion() +- : G1AllocRegion("Old GC Alloc Region", true /* bot_updates */) { } ++ : G1AllocRegion("Old GC Alloc Region", true /* bot_updates */, G1NUMA::AnyNodeIndex) { } + + // This specialization of release() makes sure that the last card that has + // been allocated into has been completely filled by a dummy object. This +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp +index 0d1ab8411..f6fb2cdee 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.cpp +@@ -26,19 +26,73 @@ + #include "gc_implementation/g1/g1Allocator.hpp" + #include "gc_implementation/g1/g1CollectedHeap.hpp" + #include "gc_implementation/g1/g1CollectorPolicy.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" + #include "gc_implementation/g1/heapRegion.inline.hpp" + #include "gc_implementation/g1/heapRegionSet.inline.hpp" + +-void G1DefaultAllocator::init_mutator_alloc_region() { +- assert(_mutator_alloc_region.get() == NULL, "pre-condition"); +- _mutator_alloc_region.init(); ++void G1DefaultAllocator::init_mutator_alloc_regions() { ++ for (uint i = 0; i < _num_alloc_regions; i++) { ++ assert(mutator_alloc_region(i)->get() == NULL, "pre-condition"); ++ mutator_alloc_region(i)->init(); ++ } ++} ++ ++void G1DefaultAllocator::release_mutator_alloc_regions() { ++ for (uint i = 0; i < _num_alloc_regions; i++) { ++ mutator_alloc_region(i)->release(); ++ assert(mutator_alloc_region(i)->get() == NULL, "post-condition"); ++ } ++} ++ ++inline HeapWord* G1DefaultAllocator::attempt_allocation_locked(size_t word_size, bool bot_updates, uint &node_index) { ++ node_index = current_node_index(); ++ HeapWord* result = mutator_alloc_region(node_index)->attempt_allocation_locked(word_size, bot_updates); ++ assert(result != NULL || mutator_alloc_region(node_index)->get() == NULL, ++ err_msg("Must not have a mutator alloc region if there is no memory, but is " PTR_FORMAT, p2i(mutator_alloc_region(node_index)->get()))); ++ return result; ++} ++ ++inline HeapWord* G1DefaultAllocator::attempt_allocation_force(size_t word_size, bool bot_updates, uint node_index) { ++ if (node_index == G1NUMA::AnyNodeIndex) { ++ return NULL; ++ } ++ assert(node_index < _num_alloc_regions, err_msg("Invalid index: %u", node_index)); ++ return mutator_alloc_region(node_index)->attempt_allocation_force(word_size, bot_updates); + } + +-void G1DefaultAllocator::release_mutator_alloc_region() { +- _mutator_alloc_region.release(); +- assert(_mutator_alloc_region.get() == NULL, "post-condition"); ++G1DefaultAllocator::G1DefaultAllocator(G1CollectedHeap* heap) : ++ G1Allocator(heap), ++ _numa(heap->numa()), ++ _num_alloc_regions(_numa->num_active_nodes()), ++ _mutator_alloc_regions(NULL), ++ _survivor_gc_alloc_regions(NULL), ++ _old_gc_alloc_region(), ++ _retained_old_gc_alloc_region(NULL) { ++ ++ _mutator_alloc_regions = NEW_C_HEAP_ARRAY(MutatorAllocRegion, _num_alloc_regions, mtGC); ++ _survivor_gc_alloc_regions = NEW_C_HEAP_ARRAY(SurvivorGCAllocRegion, _num_alloc_regions, mtGC); ++ for (uint i = 0; i < _num_alloc_regions; i++) { ++ ::new(_mutator_alloc_regions + i) MutatorAllocRegion(i); ++ ::new(_survivor_gc_alloc_regions + i) SurvivorGCAllocRegion(i); ++ } + } + ++G1DefaultAllocator::~G1DefaultAllocator() { ++ for (uint i = 0; i < _num_alloc_regions; i++) { ++ _mutator_alloc_regions[i].~MutatorAllocRegion(); ++ _survivor_gc_alloc_regions[i].~SurvivorGCAllocRegion(); ++ } ++ FREE_C_HEAP_ARRAY(MutatorAllocRegion, _mutator_alloc_regions, mtGC); ++ FREE_C_HEAP_ARRAY(SurvivorGCAllocRegion, _survivor_gc_alloc_regions, mtGC); ++} ++ ++#ifdef ASSERT ++bool G1Allocator::has_mutator_alloc_region() { ++ uint node_index = current_node_index(); ++ return mutator_alloc_region(node_index)->get() != NULL; ++} ++#endif ++ + void G1Allocator::reuse_retained_old_region(EvacuationInfo& evacuation_info, + OldGCAllocRegion* old, + HeapRegion** retained_old) { +@@ -76,7 +130,9 @@ void G1Allocator::reuse_retained_old_region(EvacuationInfo& evacuation_info, + void G1DefaultAllocator::init_gc_alloc_regions(EvacuationInfo& evacuation_info) { + assert_at_safepoint(true /* should_be_vm_thread */); + +- _survivor_gc_alloc_region.init(); ++ for (uint i = 0; i < _num_alloc_regions; i++) { ++ survivor_gc_alloc_region(i)->init(); ++ } + _old_gc_alloc_region.init(); + reuse_retained_old_region(evacuation_info, + &_old_gc_alloc_region, +@@ -85,9 +141,13 @@ void G1DefaultAllocator::init_gc_alloc_regions(EvacuationInfo& evacuation_info) + + void G1DefaultAllocator::release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info) { + AllocationContext_t context = AllocationContext::current(); +- evacuation_info.set_allocation_regions(survivor_gc_alloc_region(context)->count() + ++ uint survivor_region_count = 0; ++ for (uint node_index = 0; node_index < _num_alloc_regions; node_index++) { ++ survivor_region_count += survivor_gc_alloc_region(node_index)->count(); ++ survivor_gc_alloc_region(node_index)->release(); ++ } ++ evacuation_info.set_allocation_regions(survivor_region_count + + old_gc_alloc_region(context)->count()); +- survivor_gc_alloc_region(context)->release(); + // If we have an old GC alloc region to release, we'll save it in + // _retained_old_gc_alloc_region. If we don't + // _retained_old_gc_alloc_region will become NULL. This is what we +@@ -105,7 +165,9 @@ void G1DefaultAllocator::release_gc_alloc_regions(uint no_of_gc_workers, Evacuat + } + + void G1DefaultAllocator::abandon_gc_alloc_regions() { +- assert(survivor_gc_alloc_region(AllocationContext::current())->get() == NULL, "pre-condition"); ++ for (uint i = 0; i < _num_alloc_regions; i++) { ++ assert(survivor_gc_alloc_region(i)->get() == NULL, "pre-condition"); ++ } + assert(old_gc_alloc_region(AllocationContext::current())->get() == NULL, "pre-condition"); + _retained_old_gc_alloc_region = NULL; + } +@@ -113,16 +175,24 @@ void G1DefaultAllocator::abandon_gc_alloc_regions() { + G1ParGCAllocBuffer::G1ParGCAllocBuffer(size_t gclab_word_size) : + ParGCAllocBuffer(gclab_word_size), _retired(true) { } + ++G1ParGCAllocator::G1ParGCAllocator(G1CollectedHeap* g1h) : ++ _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()), ++ _numa(g1h->numa()), ++ _num_alloc_regions(_numa->num_active_nodes()), ++ _alloc_buffer_waste(0), _undo_waste(0) { ++} ++ + HeapWord* G1ParGCAllocator::allocate_direct_or_new_plab(InCSetState dest, + size_t word_sz, +- AllocationContext_t context) { ++ AllocationContext_t context, ++ uint node_index) { + size_t gclab_word_size = _g1h->desired_plab_sz(dest); + if (word_sz * 100 < gclab_word_size * ParallelGCBufferWastePct) { +- G1ParGCAllocBuffer* alloc_buf = alloc_buffer(dest, context); ++ G1ParGCAllocBuffer* alloc_buf = alloc_buffer(dest, context, node_index); + add_to_alloc_buffer_waste(alloc_buf->words_remaining()); + alloc_buf->retire(false /* end_of_gc */, false /* retain */); + +- HeapWord* buf = _g1h->par_allocate_during_gc(dest, gclab_word_size, context); ++ HeapWord* buf = _g1h->par_allocate_during_gc(dest, gclab_word_size, context, node_index); + if (buf == NULL) { + return NULL; // Let caller handle allocation failure. + } +@@ -134,29 +204,47 @@ HeapWord* G1ParGCAllocator::allocate_direct_or_new_plab(InCSetState dest, + assert(obj != NULL, "buffer was definitely big enough..."); + return obj; + } else { +- return _g1h->par_allocate_during_gc(dest, word_sz, context); ++ return _g1h->par_allocate_during_gc(dest, word_sz, context, node_index); + } + } + + G1DefaultParGCAllocator::G1DefaultParGCAllocator(G1CollectedHeap* g1h) : +- G1ParGCAllocator(g1h), +- _surviving_alloc_buffer(g1h->desired_plab_sz(InCSetState::Young)), +- _tenured_alloc_buffer(g1h->desired_plab_sz(InCSetState::Old)) { ++ G1ParGCAllocator(g1h) { + for (uint state = 0; state < InCSetState::Num; state++) { + _alloc_buffers[state] = NULL; ++ uint length = alloc_buffers_length(state); ++ _alloc_buffers[state] = NEW_C_HEAP_ARRAY(G1ParGCAllocBuffer*, length, mtGC); ++ for (uint node_index = 0; node_index < length; node_index++) { ++ _alloc_buffers[state][node_index] = new G1ParGCAllocBuffer(_g1h->desired_plab_sz(state)); ++ } ++ } ++} ++ ++G1DefaultParGCAllocator::~G1DefaultParGCAllocator() { ++ for (in_cset_state_t state = 0; state < InCSetState::Num; state++) { ++ uint length = alloc_buffers_length(state); ++ for (uint node_index = 0; node_index < length; node_index++) { ++ delete _alloc_buffers[state][node_index]; ++ } ++ FREE_C_HEAP_ARRAY(G1ParGCAllocBuffer*, _alloc_buffers[state], mtGC); + } +- _alloc_buffers[InCSetState::Young] = &_surviving_alloc_buffer; +- _alloc_buffers[InCSetState::Old] = &_tenured_alloc_buffer; + } + + void G1DefaultParGCAllocator::retire_alloc_buffers() { + for (uint state = 0; state < InCSetState::Num; state++) { +- G1ParGCAllocBuffer* const buf = _alloc_buffers[state]; +- if (buf != NULL) { +- add_to_alloc_buffer_waste(buf->words_remaining()); +- buf->flush_stats_and_retire(_g1h->alloc_buffer_stats(state), ++ uint length = alloc_buffers_length(state); ++ for (uint node_index = 0; node_index < length; node_index++) { ++ G1ParGCAllocBuffer* const buf = _alloc_buffers[state][node_index]; ++ if (buf != NULL) { ++ add_to_alloc_buffer_waste(buf->words_remaining()); ++ buf->flush_stats_and_retire(_g1h->alloc_buffer_stats(state), + true /* end_of_gc */, + false /* retain */); ++ } + } + } + } ++ ++uint G1DefaultAllocator::current_node_index() const { ++ return _numa->index_of_current_thread(); ++} +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp +index 04628b7de..9b26168a8 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp +@@ -30,6 +30,8 @@ + #include "gc_implementation/g1/g1InCSetState.hpp" + #include "gc_implementation/shared/parGCAllocBuffer.hpp" + ++class G1NUMA; ++ + // Base class for G1 allocators. + class G1Allocator : public CHeapObj { + friend class VMStructs; +@@ -44,17 +46,27 @@ public: + G1Allocator(G1CollectedHeap* heap) : + _g1h(heap), _summary_bytes_used(0) { } + ++ // Node index of current thread. ++ virtual uint current_node_index() const = 0; ++ + static G1Allocator* create_allocator(G1CollectedHeap* g1h); + +- virtual void init_mutator_alloc_region() = 0; +- virtual void release_mutator_alloc_region() = 0; ++ virtual void init_mutator_alloc_regions() = 0; ++ virtual void release_mutator_alloc_regions() = 0; + + virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info) = 0; + virtual void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info) = 0; + virtual void abandon_gc_alloc_regions() = 0; + +- virtual MutatorAllocRegion* mutator_alloc_region(AllocationContext_t context) = 0; +- virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(AllocationContext_t context) = 0; ++#ifdef ASSERT ++ // Do we currently have an active mutator region to allocate into? ++ bool has_mutator_alloc_region(); ++#endif ++ ++ virtual MutatorAllocRegion* mutator_alloc_region(uint node_index) = 0; ++ virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(uint node_index) = 0; ++ virtual MutatorAllocRegion* mutator_alloc_region() = 0; ++ virtual SurvivorGCAllocRegion* survivor_gc_alloc_region() = 0; + virtual OldGCAllocRegion* old_gc_alloc_region(AllocationContext_t context) = 0; + virtual size_t used() = 0; + virtual bool is_retained_old_region(HeapRegion* hr) = 0; +@@ -63,6 +75,9 @@ public: + OldGCAllocRegion* old, + HeapRegion** retained); + ++ virtual HeapWord* attempt_allocation_locked(size_t word_size, bool bot_updates, uint &node) = 0; ++ virtual HeapWord* attempt_allocation_force(size_t word_size, bool bot_updates, uint node = G1NUMA::AnyNodeIndex) = 0; ++ + size_t used_unlocked() const { + return _summary_bytes_used; + } +@@ -93,37 +108,58 @@ public: + class G1DefaultAllocator : public G1Allocator { + protected: + // Alloc region used to satisfy mutator allocation requests. +- MutatorAllocRegion _mutator_alloc_region; ++ MutatorAllocRegion* _mutator_alloc_regions; + + // Alloc region used to satisfy allocation requests by the GC for + // survivor objects. +- SurvivorGCAllocRegion _survivor_gc_alloc_region; ++ SurvivorGCAllocRegion* _survivor_gc_alloc_regions; + + // Alloc region used to satisfy allocation requests by the GC for + // old objects. + OldGCAllocRegion _old_gc_alloc_region; + + HeapRegion* _retained_old_gc_alloc_region; ++ ++ G1NUMA* _numa; ++ // The number of MutatorAllocRegions used, one per memory node. ++ size_t _num_alloc_regions; ++ + public: +- G1DefaultAllocator(G1CollectedHeap* heap) : G1Allocator(heap), _retained_old_gc_alloc_region(NULL) { } ++ G1DefaultAllocator(G1CollectedHeap* heap); ++ ~G1DefaultAllocator(); + +- virtual void init_mutator_alloc_region(); +- virtual void release_mutator_alloc_region(); ++ uint current_node_index() const; ++ uint num_nodes() { return (uint)_num_alloc_regions; } ++ ++ virtual void init_mutator_alloc_regions(); ++ virtual void release_mutator_alloc_regions(); + + virtual void init_gc_alloc_regions(EvacuationInfo& evacuation_info); + virtual void release_gc_alloc_regions(uint no_of_gc_workers, EvacuationInfo& evacuation_info); + virtual void abandon_gc_alloc_regions(); + ++ virtual HeapWord* attempt_allocation_locked(size_t word_size, bool bot_updates, uint &node); ++ virtual HeapWord* attempt_allocation_force(size_t word_size, bool bot_updates, uint node = G1NUMA::AnyNodeIndex); + virtual bool is_retained_old_region(HeapRegion* hr) { + return _retained_old_gc_alloc_region == hr; + } + +- virtual MutatorAllocRegion* mutator_alloc_region(AllocationContext_t context) { +- return &_mutator_alloc_region; ++ virtual MutatorAllocRegion* mutator_alloc_region() { ++ return &_mutator_alloc_regions[current_node_index()]; ++ } ++ ++ virtual SurvivorGCAllocRegion* survivor_gc_alloc_region() { ++ return &_survivor_gc_alloc_regions[current_node_index()]; + } + +- virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(AllocationContext_t context) { +- return &_survivor_gc_alloc_region; ++ virtual MutatorAllocRegion* mutator_alloc_region(uint node_index) { ++ assert(node_index < _num_alloc_regions, err_msg("Invalid index: %u", node_index)); ++ return &_mutator_alloc_regions[node_index]; ++ } ++ ++ virtual SurvivorGCAllocRegion* survivor_gc_alloc_region(uint node_index) { ++ assert(node_index < _num_alloc_regions, err_msg("Invalid index: %u", node_index)); ++ return &_survivor_gc_alloc_regions[node_index]; + } + + virtual OldGCAllocRegion* old_gc_alloc_region(AllocationContext_t context) { +@@ -136,9 +172,11 @@ public: + size_t result = _summary_bytes_used; + + // Read only once in case it is set to NULL concurrently +- HeapRegion* hr = mutator_alloc_region(AllocationContext::current())->get(); +- if (hr != NULL) { +- result += hr->used(); ++ for (uint i = 0; i < _num_alloc_regions; i++) { ++ HeapRegion* hr = mutator_alloc_region(i)->get(); ++ if (hr != NULL) { ++ result += hr->used(); ++ } + } + return result; + } +@@ -173,6 +211,7 @@ class G1ParGCAllocator : public CHeapObj { + protected: + G1CollectedHeap* _g1h; + ++ typedef InCSetState::in_cset_state_t in_cset_state_t; + // The survivor alignment in effect in bytes. + // == 0 : don't align survivors + // != 0 : align survivors to that alignment +@@ -187,7 +226,12 @@ protected: + void add_to_undo_waste(size_t waste) { _undo_waste += waste; } + + virtual void retire_alloc_buffers() = 0; +- virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) = 0; ++ virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context, uint node_index) = 0; ++ ++ // Returns the number of allocation buffers for the given dest. ++ // There is only 1 buffer for Old while Young may have multiple buffers depending on ++ // active NUMA nodes. ++ inline uint alloc_buffers_length(in_cset_state_t dest) const; + + // Calculate the survivor space object alignment in bytes. Returns that or 0 if + // there are no restrictions on survivor alignment. +@@ -203,30 +247,34 @@ protected: + } + } + ++ G1NUMA* _numa; ++ // The number of MutatorAllocRegions used, one per memory node. ++ size_t _num_alloc_regions; ++ + public: +- G1ParGCAllocator(G1CollectedHeap* g1h) : +- _g1h(g1h), _survivor_alignment_bytes(calc_survivor_alignment_bytes()), +- _alloc_buffer_waste(0), _undo_waste(0) { +- } ++ G1ParGCAllocator(G1CollectedHeap* g1h); + + static G1ParGCAllocator* create_allocator(G1CollectedHeap* g1h); + + size_t alloc_buffer_waste() { return _alloc_buffer_waste; } + size_t undo_waste() {return _undo_waste; } + ++ uint num_nodes() const { return (uint)_num_alloc_regions; } + // Allocate word_sz words in dest, either directly into the regions or by + // allocating a new PLAB. Returns the address of the allocated memory, NULL if + // not successful. + HeapWord* allocate_direct_or_new_plab(InCSetState dest, + size_t word_sz, +- AllocationContext_t context); ++ AllocationContext_t context, ++ uint node_index); + + // Allocate word_sz words in the PLAB of dest. Returns the address of the + // allocated memory, NULL if not successful. + HeapWord* plab_allocate(InCSetState dest, + size_t word_sz, +- AllocationContext_t context) { +- G1ParGCAllocBuffer* buffer = alloc_buffer(dest, context); ++ AllocationContext_t context, ++ uint node_index) { ++ G1ParGCAllocBuffer* buffer = alloc_buffer(dest, context, node_index); + if (_survivor_alignment_bytes == 0) { + return buffer->allocate(word_sz); + } else { +@@ -235,19 +283,19 @@ public: + } + + HeapWord* allocate(InCSetState dest, size_t word_sz, +- AllocationContext_t context) { +- HeapWord* const obj = plab_allocate(dest, word_sz, context); ++ AllocationContext_t context, uint node_index) { ++ HeapWord* const obj = plab_allocate(dest, word_sz, context, node_index); + if (obj != NULL) { + return obj; + } +- return allocate_direct_or_new_plab(dest, word_sz, context); ++ return allocate_direct_or_new_plab(dest, word_sz, context, node_index); + } + +- void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context) { +- if (alloc_buffer(dest, context)->contains(obj)) { +- assert(alloc_buffer(dest, context)->contains(obj + word_sz - 1), ++ void undo_allocation(InCSetState dest, HeapWord* obj, size_t word_sz, AllocationContext_t context, uint node_index) { ++ if (alloc_buffer(dest, context, node_index)->contains(obj)) { ++ assert(alloc_buffer(dest, context, node_index)->contains(obj + word_sz - 1), + "should contain whole object"); +- alloc_buffer(dest, context)->undo_allocation(obj, word_sz); ++ alloc_buffer(dest, context, node_index)->undo_allocation(obj, word_sz); + } else { + CollectedHeap::fill_with_object(obj, word_sz); + add_to_undo_waste(word_sz); +@@ -256,19 +304,38 @@ public: + }; + + class G1DefaultParGCAllocator : public G1ParGCAllocator { +- G1ParGCAllocBuffer _surviving_alloc_buffer; +- G1ParGCAllocBuffer _tenured_alloc_buffer; +- G1ParGCAllocBuffer* _alloc_buffers[InCSetState::Num]; ++ G1ParGCAllocBuffer** _alloc_buffers[InCSetState::Num]; + + public: + G1DefaultParGCAllocator(G1CollectedHeap* g1h); ++ ~G1DefaultParGCAllocator(); + +- virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context) { ++ virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context, uint node_index) { + assert(dest.is_valid(), + err_msg("Allocation buffer index out-of-bounds: " CSETSTATE_FORMAT, dest.value())); + assert(_alloc_buffers[dest.value()] != NULL, + err_msg("Allocation buffer is NULL: " CSETSTATE_FORMAT, dest.value())); +- return _alloc_buffers[dest.value()]; ++ return alloc_buffer(dest.value(), node_index); ++ } ++ ++ inline G1ParGCAllocBuffer* alloc_buffer(in_cset_state_t dest, uint node_index) const { ++ assert(dest < InCSetState::Num, err_msg("Allocation buffer index out of bounds: %u", dest)); ++ ++ if (dest == InCSetState::Young) { ++ assert(node_index < alloc_buffers_length(dest), ++ err_msg("Allocation buffer index out of bounds: %u, %u", dest, node_index)); ++ return _alloc_buffers[dest][node_index]; ++ } else { ++ return _alloc_buffers[dest][0]; ++ } ++ } ++ ++ inline uint alloc_buffers_length(in_cset_state_t dest) const { ++ if (dest == InCSetState::Young) { ++ return num_nodes(); ++ } else { ++ return 1; ++ } + } + + virtual void retire_alloc_buffers() ; +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +index 5cb135354..57dcff3f5 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.cpp +@@ -75,6 +75,9 @@ size_t G1CollectedHeap::_humongous_object_threshold_in_words = 0; + // to-be-collected) are printed at "strategic" points before / during + // / after the collection --- this is useful for debugging + #define YOUNG_LIST_VERBOSE 0 ++ ++#define THREAD_MIGRATION_MAX_TIMES 1 ++ + // CURRENT STATUS + // This file is under construction. Search for "FIXME". + +@@ -515,7 +518,7 @@ G1CollectedHeap* G1CollectedHeap::_g1h; + // Private methods. + + HeapRegion* +-G1CollectedHeap::new_region_try_secondary_free_list(bool is_old) { ++G1CollectedHeap::new_region_try_secondary_free_list(bool is_old, uint node_index) { + MutexLockerEx x(SecondaryFreeList_lock, Mutex::_no_safepoint_check_flag); + while (!_secondary_free_list.is_empty() || free_regions_coming()) { + if (!_secondary_free_list.is_empty()) { +@@ -531,7 +534,7 @@ G1CollectedHeap::new_region_try_secondary_free_list(bool is_old) { + + assert(_hrm.num_free_regions() > 0, "if the secondary_free_list was not " + "empty we should have moved at least one entry to the free_list"); +- HeapRegion* res = _hrm.allocate_free_region(is_old); ++ HeapRegion* res = _hrm.allocate_free_region(is_old, node_index); + if (G1ConcRegionFreeingVerbose) { + gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : " + "allocated " HR_FORMAT " from secondary_free_list", +@@ -553,7 +556,7 @@ G1CollectedHeap::new_region_try_secondary_free_list(bool is_old) { + return NULL; + } + +-HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool is_old, bool do_expand) { ++HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool is_old, bool do_expand, uint node_index) { + assert(!isHumongous(word_size) || word_size <= HeapRegion::GrainWords, + "the only time we use this to allocate a humongous region is " + "when we are allocating a single humongous region"); +@@ -565,21 +568,21 @@ HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool is_old, bool do_e + gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : " + "forced to look at the secondary_free_list"); + } +- res = new_region_try_secondary_free_list(is_old); ++ res = new_region_try_secondary_free_list(is_old, node_index); + if (res != NULL) { + return res; + } + } + } + +- res = _hrm.allocate_free_region(is_old); ++ res = _hrm.allocate_free_region(is_old, node_index); + + if (res == NULL) { + if (G1ConcRegionFreeingVerbose) { + gclog_or_tty->print_cr("G1ConcRegionFreeing [region alloc] : " + "res == NULL, trying the secondary_free_list"); + } +- res = new_region_try_secondary_free_list(is_old); ++ res = new_region_try_secondary_free_list(is_old, node_index); + } + if (res == NULL && do_expand && _expand_heap_after_alloc_failure) { + // Currently, only attempts to allocate GC alloc regions set +@@ -593,12 +596,12 @@ HeapRegion* G1CollectedHeap::new_region(size_t word_size, bool is_old, bool do_e + ergo_format_reason("region allocation request failed") + ergo_format_byte("allocation request"), + word_size * HeapWordSize); +- if (expand(word_size * HeapWordSize)) { ++ if (expand_single_region(node_index)) { + // Given that expand() succeeded in expanding the heap, and we + // always expand the heap by an amount aligned to the heap + // region size, the free list should in theory not be empty. + // In either case allocate_free_region() will check for NULL. +- res = _hrm.allocate_free_region(is_old); ++ res = _hrm.allocate_free_region(is_old, node_index); + } else { + _expand_heap_after_alloc_failure = false; + } +@@ -919,22 +922,29 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size, + + { + MutexLockerEx x(Heap_lock); +- result = _allocator->mutator_alloc_region(context)->attempt_allocation_locked(word_size, +- false /* bot_updates */); ++ uint node_idx_by_locked_alloc = G1NUMA::AnyNodeIndex; ++ result = _allocator->attempt_allocation_locked(word_size, ++ false /* bot_updates */, ++ node_idx_by_locked_alloc); + if (result != NULL) { + return result; + } + +- // If we reach here, attempt_allocation_locked() above failed to +- // allocate a new region. So the mutator alloc region should be NULL. +- assert(_allocator->mutator_alloc_region(context)->get() == NULL, "only way to get here"); +- + if (GC_locker::is_active_and_needs_gc()) { + if (g1_policy()->can_expand_young_list()) { + // No need for an ergo verbose message here, + // can_expand_young_list() does this when it returns true. +- result = _allocator->mutator_alloc_region(context)->attempt_allocation_force(word_size, +- false /* bot_updates */); ++ uint curr_node_index = _allocator->current_node_index(); ++ uint thread_migration_times = 0; ++ while (curr_node_index != node_idx_by_locked_alloc && thread_migration_times < THREAD_MIGRATION_MAX_TIMES) { ++ result = _allocator->attempt_allocation_locked(word_size, false, node_idx_by_locked_alloc); ++ if (result != NULL) { ++ return result; ++ } ++ thread_migration_times++; ++ curr_node_index = _allocator->current_node_index(); ++ } ++ result = _allocator->attempt_allocation_force(word_size, false /* bot_updates */, node_idx_by_locked_alloc); + if (result != NULL) { + return result; + } +@@ -994,7 +1004,7 @@ HeapWord* G1CollectedHeap::attempt_allocation_slow(size_t word_size, + // first attempt (without holding the Heap_lock) here and the + // follow-on attempt will be at the start of the next loop + // iteration (after taking the Heap_lock). +- result = _allocator->mutator_alloc_region(context)->attempt_allocation(word_size, ++ result = _allocator->mutator_alloc_region()->attempt_allocation(word_size, + false /* bot_updates */); + if (result != NULL) { + return result; +@@ -1134,12 +1144,12 @@ HeapWord* G1CollectedHeap::attempt_allocation_at_safepoint(size_t word_size, + AllocationContext_t context, + bool expect_null_mutator_alloc_region) { + assert_at_safepoint(true /* should_be_vm_thread */); +- assert(_allocator->mutator_alloc_region(context)->get() == NULL || ++ assert(!_allocator->has_mutator_alloc_region() || + !expect_null_mutator_alloc_region, + "the current alloc region was unexpectedly found to be non-NULL"); + + if (!isHumongous(word_size)) { +- return _allocator->mutator_alloc_region(context)->attempt_allocation_locked(word_size, ++ return _allocator->mutator_alloc_region()->attempt_allocation_locked(word_size, + false /* bot_updates */); + } else { + HeapWord* result = humongous_obj_allocate(word_size, context); +@@ -1341,7 +1351,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc, + concurrent_mark()->abort(); + + // Make sure we'll choose a new allocation region afterwards. +- _allocator->release_mutator_alloc_region(); ++ _allocator->release_mutator_alloc_regions(); + _allocator->abandon_gc_alloc_regions(); + g1_rem_set()->cleanupHRRS(); + +@@ -1517,7 +1527,7 @@ bool G1CollectedHeap::do_collection(bool explicit_gc, + + clear_cset_fast_test(); + +- _allocator->init_mutator_alloc_region(); ++ _allocator->init_mutator_alloc_regions(); + + double end = os::elapsedTime(); + g1_policy()->record_full_collection_end(); +@@ -1792,6 +1802,18 @@ bool G1CollectedHeap::expand(size_t expand_bytes) { + return regions_to_expand > 0; + } + ++bool G1CollectedHeap::expand_single_region(uint node_index) { ++ uint expanded_by = _hrm.expand_on_preferred_node(node_index); ++ ++ if (expanded_by == 0) { ++ assert(is_maximal_no_gc(), err_msg("Should be no regions left, available: %u", _hrm.available())); ++ return false; ++ } ++ ++ g1_policy()->record_new_heap_size(num_regions()); ++ return true; ++} ++ + void G1CollectedHeap::shrink_helper(size_t shrink_bytes) { + size_t aligned_shrink_bytes = + ReservedSpace::page_align_size_down(shrink_bytes); +@@ -1853,6 +1875,7 @@ G1CollectedHeap::G1CollectedHeap(G1CollectorPolicy* policy_) : + _ref_processor_cm(NULL), + _ref_processor_stw(NULL), + _bot_shared(NULL), ++ _numa(G1NUMA::numa()), + _evac_failure_scan_stack(NULL), + _mark_in_progress(false), + _cg1r(NULL), +@@ -2015,10 +2038,11 @@ jint G1CollectedHeap::initialize() { + // Carve out the G1 part of the heap. + + ReservedSpace g1_rs = heap_rs.first_part(max_byte_size); ++ size_t page_size = UseLargePages ? os::large_page_size() : os::vm_page_size(); + G1RegionToSpaceMapper* heap_storage = + G1RegionToSpaceMapper::create_mapper(g1_rs, + g1_rs.size(), +- UseLargePages ? os::large_page_size() : os::vm_page_size(), ++ page_size, + HeapRegion::GrainBytes, + 1, + mtJavaHeap); +@@ -2077,6 +2101,7 @@ jint G1CollectedHeap::initialize() { + _humongous_reclaim_candidates.initialize(start, end, granularity); + } + ++ _numa->set_region_info(HeapRegion::GrainBytes, page_size); + // Create the ConcurrentMark data structure and thread. + // (Must do this late, so that "max_regions" is defined.) + _cm = new ConcurrentMark(this, prev_bitmap_storage, next_bitmap_storage); +@@ -2145,7 +2170,7 @@ jint G1CollectedHeap::initialize() { + dummy_region->set_top(dummy_region->end()); + G1AllocRegion::setup(this, dummy_region); + +- _allocator->init_mutator_alloc_region(); ++ _allocator->init_mutator_alloc_regions(); + + // Do create of the monitoring and management support so that + // values in the heap have been properly initialized. +@@ -2975,8 +3000,7 @@ size_t G1CollectedHeap::unsafe_max_tlab_alloc(Thread* ignored) const { + // Also, this value can be at most the humongous object threshold, + // since we can't allow tlabs to grow big enough to accommodate + // humongous objects. +- +- HeapRegion* hr = _allocator->mutator_alloc_region(AllocationContext::current())->get(); ++ HeapRegion* hr = _allocator->mutator_alloc_region()->get(); + size_t max_tlab = max_tlab_size() * wordSize; + if (hr == NULL) { + return max_tlab; +@@ -3535,6 +3559,15 @@ void G1CollectedHeap::print_on(outputStream* st) const { + st->print("%u survivors (" SIZE_FORMAT "K)", survivor_regions, + (size_t) survivor_regions * HeapRegion::GrainBytes / K); + st->cr(); ++ if (_numa->is_enabled()) { ++ uint num_nodes = _numa->num_active_nodes(); ++ st->print(" remaining free region(s) on each NUMA node: "); ++ const int* node_ids = _numa->node_ids(); ++ for (uint node_index = 0; node_index < num_nodes; node_index++) { ++ st->print("%d=%u ", node_ids[node_index], _hrm.num_free_regions(node_index)); ++ } ++ st->cr(); ++ } + MetaspaceAux::print_on(st); + } + +@@ -4032,6 +4065,8 @@ void G1CollectedHeap::log_gc_footer(double pause_time_sec) { + g1_policy()->phase_times()->note_gc_end(); + g1_policy()->phase_times()->print(pause_time_sec); + g1_policy()->print_detailed_heap_transition(); ++ // Print NUMA statistics. ++ _numa->print_statistics(); + } else { + if (evacuation_failed()) { + gclog_or_tty->print("--"); +@@ -4042,6 +4077,14 @@ void G1CollectedHeap::log_gc_footer(double pause_time_sec) { + gclog_or_tty->flush(); + } + ++void G1CollectedHeap::verify_numa_regions(const char* desc) { ++ if (G1Log::finer()) { ++ // Iterate all heap regions to print matching between preferred numa id and actual numa id. ++ G1NodeIndexCheckClosure cl(desc, _numa); ++ heap_region_iterate(&cl); ++ } ++} ++ + bool + G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { + assert_at_safepoint(true /* should_be_vm_thread */); +@@ -4149,7 +4192,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { + + verify_before_gc(); + check_bitmaps("GC Start"); +- ++ verify_numa_regions("GC Start"); + COMPILER2_PRESENT(DerivedPointerTable::clear()); + + // Please see comment in g1CollectedHeap.hpp and +@@ -4169,7 +4212,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { + + // Forget the current alloc region (we might even choose it to be part + // of the collection set!). +- _allocator->release_mutator_alloc_region(); ++ _allocator->release_mutator_alloc_regions(); + + // We should call this after we retire the mutator alloc + // region(s) so that all the ALLOC / RETIRE events are generated +@@ -4223,7 +4266,6 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { + #endif // YOUNG_LIST_VERBOSE + + g1_policy()->finalize_cset(target_pause_time_ms, evacuation_info); +- + // Make sure the remembered sets are up to date. This needs to be + // done before register_humongous_regions_with_cset(), because the + // remembered sets are used there to choose eager reclaim candidates. +@@ -4327,7 +4369,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { + g1_policy()->print_collection_set(g1_policy()->inc_cset_head(), gclog_or_tty); + #endif // YOUNG_LIST_VERBOSE + +- _allocator->init_mutator_alloc_region(); ++ _allocator->init_mutator_alloc_regions(); + + { + size_t expand_bytes = g1_policy()->expansion_amount(); +@@ -4388,7 +4430,7 @@ G1CollectedHeap::do_collection_pause_at_safepoint(double target_pause_time_ms) { + + verify_after_gc(); + check_bitmaps("GC End"); +- ++ verify_numa_regions("GC End"); + assert(!ref_processor_stw()->discovery_enabled(), "Postcondition"); + ref_processor_stw()->verify_no_references_recorded(); + +@@ -4744,6 +4786,7 @@ class G1KlassScanClosure : public KlassClosure { + class G1ParTask : public AbstractGangTask { + protected: + G1CollectedHeap* _g1h; ++ G1ParScanThreadStateSet* _per_thread_states; + RefToScanQueueSet *_queues; + G1RootProcessor* _root_processor; + TaskTerminator _terminator; +@@ -4753,9 +4796,10 @@ protected: + Mutex* stats_lock() { return &_stats_lock; } + + public: +- G1ParTask(G1CollectedHeap* g1h, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor) ++ G1ParTask(G1CollectedHeap* g1h, G1ParScanThreadStateSet* per_thread_states, RefToScanQueueSet *task_queues, G1RootProcessor* root_processor) + : AbstractGangTask("G1 collection"), + _g1h(g1h), ++ _per_thread_states(per_thread_states), + _queues(task_queues), + _root_processor(root_processor), + _terminator(0, _queues), +@@ -4816,26 +4860,26 @@ public: + + ReferenceProcessor* rp = _g1h->ref_processor_stw(); + +- G1ParScanThreadState pss(_g1h, worker_id, rp); +- G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, &pss, rp); ++ G1ParScanThreadState* pss = _per_thread_states->state_for_worker(worker_id, rp); ++ G1ParScanHeapEvacFailureClosure evac_failure_cl(_g1h, pss, rp); + +- pss.set_evac_failure_closure(&evac_failure_cl); ++ pss->set_evac_failure_closure(&evac_failure_cl); + + bool only_young = _g1h->g1_policy()->gcs_are_young(); + + // Non-IM young GC. +- G1ParCopyClosure scan_only_root_cl(_g1h, &pss, rp); ++ G1ParCopyClosure scan_only_root_cl(_g1h, pss, rp); + G1CLDClosure scan_only_cld_cl(&scan_only_root_cl, + only_young, // Only process dirty klasses. + false); // No need to claim CLDs. + // IM young GC. + // Strong roots closures. +- G1ParCopyClosure scan_mark_root_cl(_g1h, &pss, rp); ++ G1ParCopyClosure scan_mark_root_cl(_g1h, pss, rp); + G1CLDClosure scan_mark_cld_cl(&scan_mark_root_cl, + false, // Process all klasses. + true); // Need to claim CLDs. + // Weak roots closures. +- G1ParCopyClosure scan_mark_weak_root_cl(_g1h, &pss, rp); ++ G1ParCopyClosure scan_mark_weak_root_cl(_g1h, pss, rp); + G1CLDClosure scan_mark_weak_cld_cl(&scan_mark_weak_root_cl, + false, // Process all klasses. + true); // Need to claim CLDs. +@@ -4866,7 +4910,7 @@ public: + weak_cld_cl = &scan_only_cld_cl; + } + +- pss.start_strong_roots(); ++ pss->start_strong_roots(); + + _root_processor->evacuate_roots(strong_root_cl, + weak_root_cl, +@@ -4875,31 +4919,31 @@ public: + trace_metadata, + worker_id); + +- G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, &pss); ++ G1ParPushHeapRSClosure push_heap_rs_cl(_g1h, pss); + _root_processor->scan_remembered_sets(&push_heap_rs_cl, + weak_root_cl, + worker_id); +- pss.end_strong_roots(); ++ pss->end_strong_roots(); + + { + double start = os::elapsedTime(); +- G1ParEvacuateFollowersClosure evac(_g1h, &pss, _queues, _terminator.terminator()); ++ G1ParEvacuateFollowersClosure evac(_g1h, pss, _queues, _terminator.terminator()); + evac.do_void(); + double elapsed_sec = os::elapsedTime() - start; +- double term_sec = pss.term_time(); ++ double term_sec = pss->term_time(); + _g1h->g1_policy()->phase_times()->add_time_secs(G1GCPhaseTimes::ObjCopy, worker_id, elapsed_sec - term_sec); + _g1h->g1_policy()->phase_times()->record_time_secs(G1GCPhaseTimes::Termination, worker_id, term_sec); +- _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss.term_attempts()); ++ _g1h->g1_policy()->phase_times()->record_thread_work_item(G1GCPhaseTimes::Termination, worker_id, pss->term_attempts()); + } +- _g1h->g1_policy()->record_thread_age_table(pss.age_table()); +- _g1h->update_surviving_young_words(pss.surviving_young_words()+1); ++ _g1h->g1_policy()->record_thread_age_table(pss->age_table()); ++ _g1h->update_surviving_young_words(pss->surviving_young_words()+1); + + if (ParallelGCVerbose) { + MutexLocker x(stats_lock()); +- pss.print_termination_stats(worker_id); ++ pss->print_termination_stats(worker_id); + } + +- assert(pss.queue_is_empty(), "should be empty"); ++ assert(pss->queue_is_empty(), "should be empty"); + + // Close the inner scope so that the ResourceMark and HandleMark + // destructors are executed here and are included as part of the +@@ -5890,8 +5934,9 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { + double end_par_time_sec; + + { ++ G1ParScanThreadStateSet per_thread_states(this, workers()->active_workers()); + G1RootProcessor root_processor(this); +- G1ParTask g1_par_task(this, _task_queues, &root_processor); ++ G1ParTask g1_par_task(this, &per_thread_states, _task_queues, &root_processor); + // InitialMark needs claim bits to keep track of the marked-through CLDs. + if (g1_policy()->during_initial_mark_pause()) { + ClassLoaderDataGraph::clear_claimed_marks(); +@@ -5916,6 +5961,8 @@ void G1CollectedHeap::evacuate_collection_set(EvacuationInfo& evacuation_info) { + // elapsed time before closing the scope so that time + // taken for the destructor is NOT included in the + // reported parallel time. ++ ++ per_thread_states.flush(); + } + + G1GCPhaseTimes* phase_times = g1_policy()->phase_times(); +@@ -6325,7 +6372,6 @@ void G1CollectedHeap::free_collection_set(HeapRegion* cs_head, EvacuationInfo& e + // all we need to do to clear the young list is clear its + // head and length, and unlink any young regions in the code below + _young_list->clear(); +- + G1CollectorPolicy* policy = g1_policy(); + + double start_sec = os::elapsedTime(); +@@ -6803,7 +6849,8 @@ bool G1CollectedHeap::is_in_closed_subset(const void* p) const { + // Methods for the mutator alloc region + + HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size, +- bool force) { ++ bool force, ++ uint node_index) { + assert_heap_locked_or_at_safepoint(true /* should_be_vm_thread */); + assert(!force || g1_policy()->can_expand_young_list(), + "if force is true we should be able to expand the young list"); +@@ -6811,7 +6858,8 @@ HeapRegion* G1CollectedHeap::new_mutator_alloc_region(size_t word_size, + if (force || !young_list_full) { + HeapRegion* new_alloc_region = new_region(word_size, + false /* is_old */, +- false /* do_expand */); ++ false /* do_expand */, ++ node_index); + if (new_alloc_region != NULL) { + set_region_short_lived_locked(new_alloc_region); + _hr_printer.alloc(new_alloc_region, G1HRPrinter::Eden, young_list_full); +@@ -6856,14 +6904,16 @@ void G1CollectedHeap::set_par_threads() { + + HeapRegion* G1CollectedHeap::new_gc_alloc_region(size_t word_size, + uint count, +- InCSetState dest) { ++ InCSetState dest, ++ uint node_index) { + assert(FreeList_lock->owned_by_self(), "pre-condition"); + + if (count < g1_policy()->max_regions(dest)) { + const bool is_survivor = (dest.is_young()); + HeapRegion* new_alloc_region = new_region(word_size, + !is_survivor, +- true /* do_expand */); ++ true /* do_expand */, ++ node_index); + if (new_alloc_region != NULL) { + // We really only need to do this for old regions given that we + // should never scan survivors. But it doesn't hurt to do it +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +index f8c52e681..61d5aad2d 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.hpp +@@ -268,6 +268,9 @@ private: + // Callback for region mapping changed events. + G1RegionMappingChangedListener _listener; + ++ // Handle G1 NUMA support. ++ G1NUMA* _numa; ++ + // The sequence of all heap regions in the heap. + HeapRegionManager _hrm; + +@@ -468,14 +471,14 @@ protected: + // check whether there's anything available on the + // secondary_free_list and/or wait for more regions to appear on + // that list, if _free_regions_coming is set. +- HeapRegion* new_region_try_secondary_free_list(bool is_old); ++ HeapRegion* new_region_try_secondary_free_list(bool is_old, uint node_index); + + // Try to allocate a single non-humongous HeapRegion sufficient for + // an allocation of the given word_size. If do_expand is true, + // attempt to expand the heap if necessary to satisfy the allocation + // request. If the region is to be used as an old region or for a + // humongous object, set is_old to true. If not, to false. +- HeapRegion* new_region(size_t word_size, bool is_old, bool do_expand); ++ HeapRegion* new_region(size_t word_size, bool is_old, bool do_expand, uint node_index = G1NUMA::AnyNodeIndex); + + // Initialize a contiguous set of free regions of length num_regions + // and starting at index first so that they appear as a single +@@ -573,14 +576,16 @@ protected: + // may not be a humongous - it must fit into a single heap region. + inline HeapWord* par_allocate_during_gc(InCSetState dest, + size_t word_size, +- AllocationContext_t context); ++ AllocationContext_t context, ++ uint node_index); + // Ensure that no further allocations can happen in "r", bearing in mind + // that parallel threads might be attempting allocations. + void par_allocate_remaining_space(HeapRegion* r); + + // Allocation attempt during GC for a survivor object / PLAB. + inline HeapWord* survivor_attempt_allocation(size_t word_size, +- AllocationContext_t context); ++ AllocationContext_t context, ++ uint node_index); + + // Allocation attempt during GC for an old object / PLAB. + inline HeapWord* old_attempt_allocation(size_t word_size, +@@ -589,13 +594,13 @@ protected: + // These methods are the "callbacks" from the G1AllocRegion class. + + // For mutator alloc regions. +- HeapRegion* new_mutator_alloc_region(size_t word_size, bool force); ++ HeapRegion* new_mutator_alloc_region(size_t word_size, bool force, uint node_index); + void retire_mutator_alloc_region(HeapRegion* alloc_region, + size_t allocated_bytes); + + // For GC alloc regions. + HeapRegion* new_gc_alloc_region(size_t word_size, uint count, +- InCSetState dest); ++ InCSetState dest, uint node_index); + void retire_gc_alloc_region(HeapRegion* alloc_region, + size_t allocated_bytes, InCSetState dest); + +@@ -641,6 +646,8 @@ protected: + // after processing. + void enqueue_discovered_references(uint no_of_gc_workers); + ++ void verify_numa_regions(const char* desc); ++ + public: + + G1Allocator* allocator() { +@@ -654,11 +661,13 @@ public: + return _g1mm; + } + ++ G1NUMA* numa() const { return _numa; } + // Expand the garbage-first heap by at least the given size (in bytes!). + // Returns true if the heap was expanded by the requested amount; + // false otherwise. + // (Rounds up to a HeapRegion boundary.) + bool expand(size_t expand_bytes); ++ bool expand_single_region(uint node_index); + + // Returns the PLAB statistics for a given destination. + inline PLABStats* alloc_buffer_stats(InCSetState dest); +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp +index c8b270aa3..9350c7bac 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1CollectedHeap.inline.hpp +@@ -58,10 +58,11 @@ size_t G1CollectedHeap::desired_plab_sz(InCSetState dest) { + + HeapWord* G1CollectedHeap::par_allocate_during_gc(InCSetState dest, + size_t word_size, +- AllocationContext_t context) { ++ AllocationContext_t context, ++ uint node_index) { + switch (dest.value()) { + case InCSetState::Young: +- return survivor_attempt_allocation(word_size, context); ++ return survivor_attempt_allocation(word_size, context, node_index); + case InCSetState::Old: + return old_attempt_allocation(word_size, context); + default: +@@ -138,7 +139,7 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size, + "be called for humongous allocation requests"); + + AllocationContext_t context = AllocationContext::current(); +- HeapWord* result = _allocator->mutator_alloc_region(context)->attempt_allocation(word_size, ++ HeapWord* result = _allocator->mutator_alloc_region()->attempt_allocation(word_size, + false /* bot_updates */); + if (result == NULL) { + result = attempt_allocation_slow(word_size, +@@ -154,15 +155,16 @@ inline HeapWord* G1CollectedHeap::attempt_allocation(size_t word_size, + } + + inline HeapWord* G1CollectedHeap::survivor_attempt_allocation(size_t word_size, +- AllocationContext_t context) { ++ AllocationContext_t context, ++ uint node_index) { + assert(!isHumongous(word_size), + "we should not be seeing humongous-size allocations in this path"); + +- HeapWord* result = _allocator->survivor_gc_alloc_region(context)->attempt_allocation(word_size, ++ HeapWord* result = _allocator->survivor_gc_alloc_region(node_index)->attempt_allocation(word_size, + false /* bot_updates */); + if (result == NULL) { + MutexLockerEx x(FreeList_lock, Mutex::_no_safepoint_check_flag); +- result = _allocator->survivor_gc_alloc_region(context)->attempt_allocation_locked(word_size, ++ result = _allocator->survivor_gc_alloc_region(node_index)->attempt_allocation_locked(word_size, + false /* bot_updates */); + } + if (result != NULL) { +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1InCSetState.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1InCSetState.hpp +index 50639c330..cbeb93f34 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1InCSetState.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1InCSetState.hpp +@@ -58,10 +58,10 @@ struct InCSetState { + // or not, which is encoded by values < 0. + // The other values are simply encoded in increasing generation order, which + // makes getting the next generation fast by a simple increment. +- Humongous = -1, // The region is humongous - note that actually any value < 0 would be possible here. +- NotInCSet = 0, // The region is not in the collection set. +- Young = 1, // The region is in the collection set and a young region. +- Old = 2, // The region is in the collection set and an old region. ++ Humongous = -2, // The region is humongous - note that actually any value < 0 would be possible here. ++ NotInCSet = -1, // The region is not in the collection set. ++ Young = 0, // The region is in the collection set and a young region. ++ Old = 1, // The region is in the collection set and an old region. + Num + }; + +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1NUMA.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1NUMA.cpp +new file mode 100644 +index 000000000..05b4d8989 +--- /dev/null ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1NUMA.cpp +@@ -0,0 +1,311 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc_implementation/g1/g1Log.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" ++#include "memory/allocation.hpp" ++#include "runtime/globals.hpp" ++#include "runtime/os.hpp" ++#include "utilities/align.hpp" ++ ++G1NUMA* G1NUMA::_inst = NULL; ++ ++size_t G1NUMA::region_size() const { ++ assert(_region_size > 0, "Heap region size is not yet set"); ++ return _region_size; ++} ++ ++size_t G1NUMA::page_size() const { ++ assert(_page_size > 0, "Page size not is yet set"); ++ return _page_size; ++} ++ ++bool G1NUMA::is_enabled() const { return num_active_nodes() > 1; } ++ ++G1NUMA* G1NUMA::create() { ++ guarantee(_inst == NULL, "Should be called once."); ++ _inst = new G1NUMA(); ++ ++ // NUMA only supported on Linux. ++#ifdef LINUX ++ _inst->initialize(UseNUMA); ++#else ++ _inst->initialize(false); ++#endif /* LINUX */ ++ ++ return _inst; ++} ++ ++ // Returns memory node ids ++const int* G1NUMA::node_ids() const { ++ return _node_ids; ++} ++ ++uint G1NUMA::index_of_node_id(int node_id) const { ++ assert(node_id >= 0, err_msg("invalid node id %d", node_id)); ++ assert(node_id < _len_node_id_to_index_map, err_msg("invalid node id %d", node_id)); ++ uint node_index = _node_id_to_index_map[node_id]; ++ assert(node_index != G1NUMA::UnknownNodeIndex, ++ err_msg("invalid node id %d", node_id)); ++ return node_index; ++} ++ ++G1NUMA::G1NUMA() : ++ _node_id_to_index_map(NULL), _len_node_id_to_index_map(0), ++ _node_ids(NULL), _num_active_node_ids(0), ++ _region_size(0), _page_size(0), _stats(NULL) { ++} ++ ++void G1NUMA::initialize_without_numa() { ++ // If NUMA is not enabled or supported, initialize as having a singel node. ++ _num_active_node_ids = 1; ++ _node_ids = NEW_C_HEAP_ARRAY(int, _num_active_node_ids, mtGC); ++ _node_ids[0] = 0; ++ // Map index 0 to node 0 ++ _len_node_id_to_index_map = 1; ++ _node_id_to_index_map = NEW_C_HEAP_ARRAY(uint, _len_node_id_to_index_map, mtGC); ++ _node_id_to_index_map[0] = 0; ++} ++ ++void G1NUMA::initialize(bool use_numa) { ++ if (!use_numa) { ++ initialize_without_numa(); ++ return; ++ } ++ ++ assert(UseNUMA, "Invariant"); ++ size_t num_node_ids = os::numa_get_groups_num(); ++ ++ // Create an array of active node ids. ++ _node_ids = NEW_C_HEAP_ARRAY(int, num_node_ids, mtGC); ++ _num_active_node_ids = (uint)os::numa_get_leaf_groups(_node_ids, num_node_ids); ++ ++ int max_node_id = 0; ++ for (uint i = 0; i < _num_active_node_ids; i++) { ++ max_node_id = MAX2(max_node_id, _node_ids[i]); ++ } ++ ++ // Create a mapping between node_id and index. ++ _len_node_id_to_index_map = max_node_id + 1; ++ _node_id_to_index_map = NEW_C_HEAP_ARRAY(uint, _len_node_id_to_index_map, mtGC); ++ ++ // Set all indices with unknown node id. ++ for (int i = 0; i < _len_node_id_to_index_map; i++) { ++ _node_id_to_index_map[i] = G1NUMA::UnknownNodeIndex; ++ } ++ ++ // Set the indices for the actually retrieved node ids. ++ for (uint i = 0; i < _num_active_node_ids; i++) { ++ _node_id_to_index_map[_node_ids[i]] = i; ++ } ++ ++ _stats = new G1NUMAStats(_node_ids, _num_active_node_ids); ++} ++ ++G1NUMA::~G1NUMA() { ++ delete _stats; ++ FREE_C_HEAP_ARRAY(int, _node_id_to_index_map, mtGC); ++ FREE_C_HEAP_ARRAY(int, _node_ids, mtGC); ++} ++ ++void G1NUMA::set_region_info(size_t region_size, size_t page_size) { ++ _region_size = region_size; ++ _page_size = page_size; ++} ++ ++uint G1NUMA::num_active_nodes() const { ++ assert(_num_active_node_ids > 0, "just checking"); ++ return _num_active_node_ids; ++} ++ ++uint G1NUMA::index_of_current_thread() const { ++ if (!is_enabled()) { ++ return 0; ++ } ++ return index_of_node_id(os::numa_get_group_id()); ++} ++ ++uint G1NUMA::preferred_node_index_for_index(uint region_index) const { ++ if (region_size() >= page_size()) { ++ // Simple case, pages are smaller than the region so we ++ // can just alternate over the nodes. ++ return region_index % _num_active_node_ids; ++ } else { ++ // Multiple regions in one page, so we need to make sure the ++ // regions within a page is preferred on the same node. ++ size_t regions_per_page = page_size() / region_size(); ++ return (region_index / regions_per_page) % _num_active_node_ids; ++ } ++} ++ ++int G1NUMA::numa_id(int index) const { ++ assert(index < _len_node_id_to_index_map, err_msg("Index %d out of range: [0,%d)", ++ index, _len_node_id_to_index_map)); ++ return _node_ids[index]; ++} ++ ++uint G1NUMA::index_of_address(HeapWord *address) const { ++ int numa_id = os::numa_get_group_id_for_address((const void*)address); ++ if (numa_id == -1) { ++ return UnknownNodeIndex; ++ } else { ++ return index_of_node_id(numa_id); ++ } ++} ++ ++uint G1NUMA::index_for_region(HeapRegion* hr) const { ++ if (!is_enabled()) { ++ return 0; ++ } ++ ++ if (AlwaysPreTouch) { ++ // If we already pretouched, we can check actual node index here. ++ // However, if node index is still unknown, use preferred node index. ++ uint node_index = index_of_address(hr->bottom()); ++ if (node_index != UnknownNodeIndex) { ++ return node_index; ++ } ++ } ++ ++ return preferred_node_index_for_index(hr->hrm_index()); ++} ++ ++// Request to spread the given memory evenly across the available NUMA ++// nodes. Which node to request for a given address is given by the ++// region size and the page size. Below are two examples on 4 NUMA nodes system: ++// 1. G1HeapRegionSize(_region_size) is larger than or equal to page size. ++// * Page #: |-0--||-1--||-2--||-3--||-4--||-5--||-6--||-7--||-8--||-9--||-10-||-11-||-12-||-13-||-14-||-15-| ++// * HeapRegion #: |----#0----||----#1----||----#2----||----#3----||----#4----||----#5----||----#6----||----#7----| ++// * NUMA node #: |----#0----||----#1----||----#2----||----#3----||----#0----||----#1----||----#2----||----#3----| ++// 2. G1HeapRegionSize(_region_size) is smaller than page size. ++// Memory will be touched one page at a time because G1RegionToSpaceMapper commits ++// pages one by one. ++// * Page #: |-----0----||-----1----||-----2----||-----3----||-----4----||-----5----||-----6----||-----7----| ++// * HeapRegion #: |-#0-||-#1-||-#2-||-#3-||-#4-||-#5-||-#6-||-#7-||-#8-||-#9-||#10-||#11-||#12-||#13-||#14-||#15-| ++// * NUMA node #: |----#0----||----#1----||----#2----||----#3----||----#0----||----#1----||----#2----||----#3----| ++void G1NUMA::request_memory_on_node(void* aligned_address, size_t size_in_bytes, uint region_index) { ++ if (!is_enabled()) { ++ return; ++ } ++ ++ if (size_in_bytes == 0) { ++ return; ++ } ++ ++ uint node_index = preferred_node_index_for_index(region_index); ++ ++ assert(is_aligned(aligned_address, page_size()), err_msg("Given address (" PTR_FORMAT ") should be aligned.", p2i(aligned_address))); ++ assert(is_aligned(size_in_bytes, page_size()), err_msg("Given size (" SIZE_FORMAT ") should be aligned.", size_in_bytes)); ++ ++ if (G1Log::finer()) { ++ gclog_or_tty->print_cr("Request memory [" PTR_FORMAT ", " PTR_FORMAT ") to be NUMA id (%d)", ++ p2i(aligned_address), p2i((char*)aligned_address + size_in_bytes), _node_ids[node_index]); ++ } ++ os::numa_make_local((char*)aligned_address, size_in_bytes, _node_ids[node_index]); ++} ++ ++uint G1NUMA::max_search_depth() const { ++ // Multiple of 3 is just random number to limit iterations. ++ // There would be some cases that 1 page may be consisted of multiple HeapRegions. ++ return 3 * MAX2((uint)(page_size() / region_size()), (uint)1) * num_active_nodes(); ++} ++ ++void G1NUMA::update_statistics(G1NUMAStats::NodeDataItems phase, ++ uint requested_node_index, ++ uint allocated_node_index) { ++ if (_stats == NULL) { ++ return; ++ } ++ ++ uint converted_req_index; ++ if(requested_node_index < _num_active_node_ids) { ++ converted_req_index = requested_node_index; ++ } else { ++ assert(requested_node_index == AnyNodeIndex, ++ err_msg("Requested node index %u should be AnyNodeIndex.", requested_node_index)); ++ converted_req_index = _num_active_node_ids; ++ } ++ _stats->update(phase, converted_req_index, allocated_node_index); ++} ++ ++void G1NUMA::copy_statistics(G1NUMAStats::NodeDataItems phase, ++ uint requested_node_index, ++ size_t* allocated_stat) { ++ if (_stats == NULL) { ++ return; ++ } ++ ++ _stats->copy(phase, requested_node_index, allocated_stat); ++} ++ ++void G1NUMA::print_statistics() const { ++ if (_stats == NULL) { ++ return; ++ } ++ ++ _stats->print_statistics(); ++} ++ ++G1NodeIndexCheckClosure::G1NodeIndexCheckClosure(const char* desc, G1NUMA* numa) : ++ _desc(desc), _numa(numa) { ++ ++ uint num_nodes = _numa->num_active_nodes(); ++ _matched = NEW_C_HEAP_ARRAY(uint, num_nodes, mtGC); ++ _mismatched = NEW_C_HEAP_ARRAY(uint, num_nodes, mtGC); ++ _total = NEW_C_HEAP_ARRAY(uint, num_nodes, mtGC); ++ memset(_matched, 0, sizeof(uint) * num_nodes); ++ memset(_mismatched, 0, sizeof(uint) * num_nodes); ++ memset(_total, 0, sizeof(uint) * num_nodes); ++} ++ ++G1NodeIndexCheckClosure::~G1NodeIndexCheckClosure() { ++ if (G1Log::finer()) { ++ gclog_or_tty->print("%s: NUMA region verification (id: matched/mismatched/total): ", _desc); ++ const int* numa_ids = _numa->node_ids(); ++ for (uint i = 0; i < _numa->num_active_nodes(); i++) { ++ gclog_or_tty->print("%d: %u/%u/%u ", numa_ids[i], _matched[i], _mismatched[i], _total[i]); ++ } ++ gclog_or_tty->print_cr(" "); ++ } ++ FREE_C_HEAP_ARRAY(uint, _matched, mtGC); ++ FREE_C_HEAP_ARRAY(uint, _mismatched, mtGC); ++ FREE_C_HEAP_ARRAY(uint, _total, mtGC); ++} ++ ++bool G1NodeIndexCheckClosure::doHeapRegion(HeapRegion* hr) { ++ // Preferred node index will only have valid node index. ++ uint preferred_node_index = _numa->preferred_node_index_for_index(hr->hrm_index()); ++ // Active node index may have UnknownNodeIndex. ++ uint active_node_index = _numa->index_of_address(hr->bottom()); ++ ++ if (preferred_node_index == active_node_index) { ++ _matched[preferred_node_index]++; ++ } else if (active_node_index != G1NUMA::UnknownNodeIndex) { ++ _mismatched[preferred_node_index]++; ++ } ++ _total[preferred_node_index]++; ++ ++ return false; ++} +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1NUMA.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1NUMA.hpp +new file mode 100644 +index 000000000..30a03dd6d +--- /dev/null ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1NUMA.hpp +@@ -0,0 +1,149 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef SHARE_VM_GC_G1_NUMA_HPP ++#define SHARE_VM_GC_G1_NUMA_HPP ++ ++#include "gc_implementation/g1/g1NUMAStats.hpp" ++#include "gc_implementation/g1/heapRegion.hpp" ++#include "memory/allocation.hpp" ++#include "runtime/os.hpp" ++ ++class G1NUMA: public CHeapObj { ++ // Mapping of available node ids to 0-based index which can be used for ++ // fast resource management. I.e. for every node id provides a unique value in ++ // the range from [0, {# of nodes-1}]. ++ // For invalid node id, return UnknownNodeIndex. ++ uint* _node_id_to_index_map; ++ // Length of _num_active_node_ids_id to index map. ++ int _len_node_id_to_index_map; ++ ++ // Current active node ids. ++ int* _node_ids; ++ // Total number of node ids. ++ uint _num_active_node_ids; ++ ++ // HeapRegion size ++ size_t _region_size; ++ // Necessary when touching memory. ++ size_t _page_size; ++ ++ // Stores statistic data. ++ G1NUMAStats* _stats; ++ ++ size_t region_size() const; ++ size_t page_size() const; ++ ++ // Returns node index of the given node id. ++ // Precondition: node_id is an active node id. ++ inline uint index_of_node_id(int node_id) const; ++ ++ // Creates node id and node index mapping table of _node_id_to_index_map. ++ void init_node_id_to_index_map(const int* node_ids, uint num_node_ids); ++ ++ static G1NUMA* _inst; ++ ++ G1NUMA(); ++ void initialize(bool use_numa); ++ void initialize_without_numa(); ++ ++public: ++ static const uint UnknownNodeIndex = UINT_MAX; ++ static const uint AnyNodeIndex = UnknownNodeIndex - 1; ++ ++ static G1NUMA* numa() { return _inst; } ++ ++ static G1NUMA* create(); ++ ++ ~G1NUMA(); ++ ++ // Sets heap region size and page size after those values ++ // are determined at G1CollectedHeap::initialize(). ++ void set_region_info(size_t region_size, size_t page_size); ++ ++ // Returns active memory node count. ++ uint num_active_nodes() const; ++ ++ bool is_enabled() const; ++ ++ int numa_id(int index) const; ++ ++ // Returns memory node ids ++ const int* node_ids() const; ++ ++ // Returns node index of current calling thread. ++ uint index_of_current_thread() const; ++ ++ // Returns the preferred index for the given HeapRegion index. ++ // This assumes that HeapRegions are evenly spit, so we can decide preferred index ++ // with the given HeapRegion index. ++ // Result is less than num_active_nodes(). ++ uint preferred_node_index_for_index(uint region_index) const; ++ ++ // Retrieves node index of the given address. ++ // Result is less than num_active_nodes() or is UnknownNodeIndex. ++ // Precondition: address is in reserved range for heap. ++ uint index_of_address(HeapWord* address) const; ++ ++ // If AlwaysPreTouch is enabled, return actual node index via system call. ++ // If disabled, return preferred node index of the given heap region. ++ uint index_for_region(HeapRegion* hr) const; ++ ++ // Requests the given memory area to be located at the given node index. ++ void request_memory_on_node(void* aligned_address, size_t size_in_bytes, uint region_index); ++ ++ // Returns maximum search depth which is used to limit heap region search iterations. ++ // The number of active nodes, page size and heap region size are considered. ++ uint max_search_depth() const; ++ ++ // Update the given phase of requested and allocated node index. ++ void update_statistics(G1NUMAStats::NodeDataItems phase, uint requested_node_index, uint allocated_node_index); ++ ++ // Copy all allocated statistics of the given phase and requested node. ++ // Precondition: allocated_stat should have same length of active nodes. ++ void copy_statistics(G1NUMAStats::NodeDataItems phase, uint requested_node_index, size_t* allocated_stat); ++ ++ // Print all statistics. ++ void print_statistics() const; ++}; ++ ++class G1NodeIndexCheckClosure : public HeapRegionClosure { ++ const char* _desc; ++ G1NUMA* _numa; ++ // Records matched count of each node. ++ uint* _matched; ++ // Records mismatched count of each node. ++ uint* _mismatched; ++ // Records total count of each node. ++ // Total = matched + mismatched + unknown. ++ uint* _total; ++ ++public: ++ G1NodeIndexCheckClosure(const char* desc, G1NUMA* numa); ++ ~G1NodeIndexCheckClosure(); ++ ++ bool doHeapRegion(HeapRegion* hr); ++}; ++ ++#endif // SHARE_VM_GC_G1_NUMA_HPP +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1NUMAStats.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1NUMAStats.cpp +new file mode 100644 +index 000000000..cfc3633f8 +--- /dev/null ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1NUMAStats.cpp +@@ -0,0 +1,226 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#include "precompiled.hpp" ++#include "gc_implementation/g1/g1Log.hpp" ++#include "gc_implementation/g1/g1NUMAStats.hpp" ++#include "memory/allocation.inline.hpp" ++ ++double G1NUMAStats::Stat::rate() const { ++ return _requested == 0 ? 0 : (double)_hit / _requested * 100; ++} ++ ++G1NUMAStats::NodeDataArray::NodeDataArray(uint num_nodes) { ++ guarantee(num_nodes > 1, err_msg("Number of nodes (%u) should be set", num_nodes)); ++ ++ // The row represents the number of nodes. ++ _num_column = num_nodes; ++ // +1 for G1MemoryNodeManager::AnyNodeIndex. ++ _num_row = num_nodes + 1; ++ ++ _data = NEW_C_HEAP_ARRAY(size_t*, _num_row, mtGC); ++ for (uint row = 0; row < _num_row; row++) { ++ _data[row] = NEW_C_HEAP_ARRAY(size_t, _num_column, mtGC); ++ } ++ ++ clear(); ++} ++ ++G1NUMAStats::NodeDataArray::~NodeDataArray() { ++ for (uint row = 0; row < _num_row; row++) { ++ FREE_C_HEAP_ARRAY(size_t, _data[row], mtGC); ++ } ++ FREE_C_HEAP_ARRAY(size_t*, _data, mtGC); ++} ++ ++void G1NUMAStats::NodeDataArray::create_hit_rate(Stat* result) const { ++ size_t requested = 0; ++ size_t hit = 0; ++ ++ for (size_t row = 0; row < _num_row; row++) { ++ for (size_t column = 0; column < _num_column; column++) { ++ requested += _data[row][column]; ++ if (row == column) { ++ hit += _data[row][column]; ++ } ++ } ++ } ++ ++ assert(result != NULL, "Invariant"); ++ result->_hit = hit; ++ result->_requested = requested; ++} ++ ++void G1NUMAStats::NodeDataArray::create_hit_rate(Stat* result, uint req_index) const { ++ size_t requested = 0; ++ size_t hit = _data[req_index][req_index]; ++ ++ for (size_t column = 0; column < _num_column; column++) { ++ requested += _data[req_index][column]; ++ } ++ ++ assert(result != NULL, "Invariant"); ++ result->_hit = hit; ++ result->_requested = requested; ++} ++ ++size_t G1NUMAStats::NodeDataArray::sum(uint req_index) const { ++ size_t sum = 0; ++ for (size_t column = 0; column < _num_column; column++) { ++ sum += _data[req_index][column]; ++ } ++ ++ return sum; ++} ++ ++void G1NUMAStats::NodeDataArray::increase(uint req_index, uint alloc_index) { ++ assert(req_index < _num_row, ++ err_msg("Requested index %u should be less than the row size %u", ++ req_index, _num_row)); ++ assert(alloc_index < _num_column, ++ err_msg("Allocated index %u should be less than the column size %u", ++ alloc_index, _num_column)); ++ _data[req_index][alloc_index] += 1; ++} ++ ++void G1NUMAStats::NodeDataArray::clear() { ++ for (uint row = 0; row < _num_row; row++) { ++ memset((void*)_data[row], 0, sizeof(size_t) * _num_column); ++ } ++} ++ ++size_t G1NUMAStats::NodeDataArray::get(uint req_index, uint alloc_index) { ++ return _data[req_index][alloc_index]; ++} ++ ++void G1NUMAStats::NodeDataArray::copy(uint req_index, size_t* stat) { ++ assert(stat != NULL, "Invariant"); ++ ++ for (uint column = 0; column < _num_column; column++) { ++ _data[req_index][column] += stat[column]; ++ } ++} ++ ++G1NUMAStats::G1NUMAStats(const int* node_ids, uint num_node_ids) : ++ _node_ids(node_ids), _num_node_ids(num_node_ids), _node_data() { ++ ++ assert(_num_node_ids > 1, err_msg("Should have more than one active memory nodes %u", _num_node_ids)); ++ ++ for (int i = 0; i < NodeDataItemsSentinel; i++) { ++ _node_data[i] = new NodeDataArray(_num_node_ids); ++ } ++} ++ ++G1NUMAStats::~G1NUMAStats() { ++ for (int i = 0; i < NodeDataItemsSentinel; i++) { ++ delete _node_data[i]; ++ } ++} ++ ++void G1NUMAStats::clear(G1NUMAStats::NodeDataItems phase) { ++ _node_data[phase]->clear(); ++} ++ ++void G1NUMAStats::update(G1NUMAStats::NodeDataItems phase, ++ uint requested_node_index, ++ uint allocated_node_index) { ++ _node_data[phase]->increase(requested_node_index, allocated_node_index); ++} ++ ++void G1NUMAStats::copy(G1NUMAStats::NodeDataItems phase, ++ uint requested_node_index, ++ size_t* allocated_stat) { ++ _node_data[phase]->copy(requested_node_index, allocated_stat); ++} ++ ++static const char* phase_to_explanatory_string(G1NUMAStats::NodeDataItems phase) { ++ switch(phase) { ++ case G1NUMAStats::NewRegionAlloc: ++ return "Placement match ratio"; ++ case G1NUMAStats::LocalObjProcessAtCopyToSurv: ++ return "Worker task locality match ratio"; ++ default: ++ return ""; ++ } ++} ++ ++#define RATE_TOTAL_FORMAT "%0.0f%% " SIZE_FORMAT "/" SIZE_FORMAT ++ ++void G1NUMAStats::print_info(G1NUMAStats::NodeDataItems phase) { ++ if (G1Log::finer()) { ++ Stat result; ++ size_t array_width = _num_node_ids; ++ ++ _node_data[phase]->create_hit_rate(&result); ++ gclog_or_tty->print("%s: " RATE_TOTAL_FORMAT " (", ++ phase_to_explanatory_string(phase), result.rate(), result._hit, result._requested); ++ ++ for (uint i = 0; i < array_width; i++) { ++ if (i != 0) { ++ gclog_or_tty->print(", "); ++ } ++ _node_data[phase]->create_hit_rate(&result, i); ++ gclog_or_tty->print("%d: " RATE_TOTAL_FORMAT, ++ _node_ids[i], result.rate(), result._hit, result._requested); ++ } ++ gclog_or_tty->print_cr(")"); ++ } ++} ++ ++void G1NUMAStats::print_mutator_alloc_stat_debug() { ++ uint array_width = _num_node_ids; ++ ++ if (G1Log::finer()) { ++ gclog_or_tty->print("Allocated NUMA ids "); ++ for (uint i = 0; i < array_width; i++) { ++ gclog_or_tty->print("%8d", _node_ids[i]); ++ } ++ gclog_or_tty->print_cr(" Total"); ++ ++ gclog_or_tty->print("Requested NUMA id "); ++ for (uint req = 0; req < array_width; req++) { ++ gclog_or_tty->print("%3d ", _node_ids[req]); ++ for (uint alloc = 0; alloc < array_width; alloc++) { ++ gclog_or_tty->print(SIZE_FORMAT_W(8), _node_data[NewRegionAlloc]->get(req, alloc)); ++ } ++ gclog_or_tty->print(SIZE_FORMAT_W(8), _node_data[NewRegionAlloc]->sum(req)); ++ gclog_or_tty->print_cr(" "); ++ // Add padding to align with the string 'Requested NUMA id'. ++ gclog_or_tty->print(" "); ++ } ++ gclog_or_tty->print("Any "); ++ for (uint alloc = 0; alloc < array_width; alloc++) { ++ gclog_or_tty->print(SIZE_FORMAT_W(8), _node_data[NewRegionAlloc]->get(array_width, alloc)); ++ } ++ gclog_or_tty->print(SIZE_FORMAT_W(8), _node_data[NewRegionAlloc]->sum(array_width)); ++ gclog_or_tty->print_cr(" "); ++ } ++} ++ ++void G1NUMAStats::print_statistics() { ++ print_info(NewRegionAlloc); ++ print_mutator_alloc_stat_debug(); ++ ++ print_info(LocalObjProcessAtCopyToSurv); ++} +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1NUMAStats.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1NUMAStats.hpp +new file mode 100644 +index 000000000..fba9442c8 +--- /dev/null ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1NUMAStats.hpp +@@ -0,0 +1,119 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ * ++ */ ++ ++#ifndef SHARE_VM_GC_G1_NODE_TIMES_HPP ++#define SHARE_VM_GC_G1_NODE_TIMES_HPP ++ ++#include "memory/allocation.hpp" ++ ++// Manages statistics of multi nodes. ++class G1NUMAStats : public CHeapObj { ++ struct Stat { ++ // Hit count: if requested id equals to returned id. ++ size_t _hit; ++ // Total request count ++ size_t _requested; ++ ++ // Hit count / total request count ++ double rate() const; ++ }; ++ ++ // Holds data array which has a size of (node count) * (node count + 1) to ++ // represent request node * allocated node. The request node includes any node case. ++ // All operations are NOT thread-safe. ++ // The row index indicates a requested node index while the column node index ++ // indicates an allocated node index. The last row is for any node index request. ++ // E.g. (req, alloc) = (0,0) (1,0) (2,0) (0,1) (Any, 3) (0,2) (0,3) (0,3) (3,3) ++ // Allocated node index 0 1 2 3 Total ++ // Requested node index 0 1 1 1 2 5 ++ // 1 1 0 0 0 1 ++ // 2 1 0 0 0 1 ++ // 3 0 0 0 1 1 ++ // Any 0 0 0 1 1 ++ class NodeDataArray : public CHeapObj { ++ // The number of nodes. ++ uint _num_column; ++ // The number of nodes + 1 (for any node request) ++ uint _num_row; ++ // 2-dimension array that holds count of allocated / requested node index. ++ size_t** _data; ++ ++ public: ++ NodeDataArray(uint num_nodes); ++ ~NodeDataArray(); ++ ++ // Create Stat result of hit count, requested count and hit rate. ++ // The result is copied to the given result parameter. ++ void create_hit_rate(Stat* result) const; ++ // Create Stat result of hit count, requested count and hit rate of the given index. ++ // The result is copied to the given result parameter. ++ void create_hit_rate(Stat* result, uint req_index) const; ++ // Return sum of the given index. ++ size_t sum(uint req_index) const; ++ // Increase at the request / allocated index. ++ void increase(uint req_index, uint alloc_index); ++ // Clear all data. ++ void clear(); ++ // Return current value of the given request / allocated index. ++ size_t get(uint req_index, uint alloc_index); ++ // Copy values of the given request index. ++ void copy(uint req_index, size_t* stat); ++ }; ++ ++public: ++ enum NodeDataItems { ++ // Statistics of a new region allocation. ++ NewRegionAlloc, ++ // Statistics of object processing during copy to survivor region. ++ LocalObjProcessAtCopyToSurv, ++ NodeDataItemsSentinel ++ }; ++ ++private: ++ const int* _node_ids; ++ uint _num_node_ids; ++ ++ NodeDataArray* _node_data[NodeDataItemsSentinel]; ++ ++ void print_info(G1NUMAStats::NodeDataItems phase); ++ ++ void print_mutator_alloc_stat_debug(); ++ ++public: ++ G1NUMAStats(const int* node_ids, uint num_node_ids); ++ ~G1NUMAStats(); ++ ++ void clear(G1NUMAStats::NodeDataItems phase); ++ ++ // Update the given phase of requested and allocated node index. ++ void update(G1NUMAStats::NodeDataItems phase, uint requested_node_index, uint allocated_node_index); ++ ++ // Copy all allocated statistics of the given phase and requested node. ++ // Precondition: allocated_stat should have same length of active nodes. ++ void copy(G1NUMAStats::NodeDataItems phase, uint requested_node_index, size_t* allocated_stat); ++ ++ void print_statistics(); ++}; ++ ++#endif // SHARE_VM_GC_G1_NODE_TIMES_HPP +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp +index 075217d60..7bc84bfe8 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.cpp +@@ -135,6 +135,11 @@ char* G1PageBasedVirtualSpace::page_start(size_t index) const { + return _low_boundary + index * _page_size; + } + ++size_t G1PageBasedVirtualSpace::page_size() const { ++ assert(_page_size > 0, "Page size is not yet initialized."); ++ return _page_size; ++} ++ + bool G1PageBasedVirtualSpace::is_after_last_page(size_t index) const { + guarantee(index <= _committed.size(), + err_msg("Given boundary page " SIZE_FORMAT " is beyond managed page count " SIZE_FORMAT, index, _committed.size())); +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp +index 4d0b7b21b..f171bfcf1 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1PageBasedVirtualSpace.hpp +@@ -90,8 +90,6 @@ class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC { + + // Returns the index of the page which contains the given address. + uintptr_t addr_to_page_index(char* addr) const; +- // Returns the address of the given page index. +- char* page_start(size_t index) const; + + // Is the given page index the last page? + bool is_last_page(size_t index) const { return index == (_committed.size() - 1); } +@@ -143,6 +141,10 @@ class G1PageBasedVirtualSpace VALUE_OBJ_CLASS_SPEC { + + void check_for_contiguity() PRODUCT_RETURN; + ++ // Returns the address of the given page index. ++ char* page_start(size_t index) const; ++ size_t page_size() const; ++ + // Debugging + void print_on(outputStream* out) PRODUCT_RETURN; + void print(); +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp +index 394f20e82..a095abaf6 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.cpp +@@ -40,6 +40,8 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, + _term_attempts(0), + _tenuring_threshold(g1h->g1_policy()->tenuring_threshold()), + _age_table(false), _scanner(g1h, rp), ++ _numa(g1h->numa()), ++ _obj_alloc_stat(NULL), + _strong_roots_time(0), _term_time(0) { + _scanner.set_par_scan_thread_state(this); + // we allocate G1YoungSurvRateNumRegions plus one entries, since +@@ -60,19 +62,20 @@ G1ParScanThreadState::G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, + + _g1_par_allocator = G1ParGCAllocator::create_allocator(_g1h); + +- _dest[InCSetState::NotInCSet] = InCSetState::NotInCSet; + // The dest for Young is used when the objects are aged enough to + // need to be moved to the next space. + _dest[InCSetState::Young] = InCSetState::Old; + _dest[InCSetState::Old] = InCSetState::Old; + + _start = os::elapsedTime(); ++ initialize_numa_stats(); + } + + G1ParScanThreadState::~G1ParScanThreadState() { + _g1_par_allocator->retire_alloc_buffers(); + delete _g1_par_allocator; + FREE_C_HEAP_ARRAY(size_t, _surviving_young_words_base, mtGC); ++ FREE_C_HEAP_ARRAY(size_t, _obj_alloc_stat, mtGC); + } + + void +@@ -162,7 +165,8 @@ void G1ParScanThreadState::trim_queue() { + HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state, + InCSetState* dest, + size_t word_sz, +- AllocationContext_t const context) { ++ AllocationContext_t const context, ++ uint node_index) { + assert(state.is_in_cset_or_humongous(), err_msg("Unexpected state: " CSETSTATE_FORMAT, state.value())); + assert(dest->is_in_cset_or_humongous(), err_msg("Unexpected dest: " CSETSTATE_FORMAT, dest->value())); + +@@ -170,7 +174,7 @@ HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state, + // let's keep the logic here simple. We can generalize it when necessary. + if (dest->is_young()) { + HeapWord* const obj_ptr = _g1_par_allocator->allocate(InCSetState::Old, +- word_sz, context); ++ word_sz, context, node_index); + if (obj_ptr == NULL) { + return NULL; + } +@@ -190,8 +194,8 @@ HeapWord* G1ParScanThreadState::allocate_in_next_plab(InCSetState const state, + void G1ParScanThreadState::report_promotion_event(InCSetState const dest_state, + oop const old, size_t word_sz, uint age, + HeapWord * const obj_ptr, +- AllocationContext_t context) const { +- ParGCAllocBuffer* alloc_buf = _g1_par_allocator->alloc_buffer(dest_state, context); ++ AllocationContext_t context, uint node_index) const { ++ ParGCAllocBuffer* alloc_buf = _g1_par_allocator->alloc_buffer(dest_state, context, node_index); + if (alloc_buf->contains(obj_ptr)) { + _g1h->_gc_tracer_stw->report_promotion_in_new_plab_event(old->klass(), word_sz, age, + dest_state.value() == InCSetState::Old, +@@ -226,23 +230,25 @@ oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state, + + uint age = 0; + InCSetState dest_state = next_state(state, old_mark, age); +- HeapWord* obj_ptr = _g1_par_allocator->plab_allocate(dest_state, word_sz, context); ++ uint node_index = from_region->node_index(); ++ HeapWord* obj_ptr = _g1_par_allocator->plab_allocate(dest_state, word_sz, context, node_index); + + // PLAB allocations should succeed most of the time, so we'll + // normally check against NULL once and that's it. + if (obj_ptr == NULL) { +- obj_ptr = _g1_par_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context); ++ obj_ptr = _g1_par_allocator->allocate_direct_or_new_plab(dest_state, word_sz, context, node_index); + if (obj_ptr == NULL) { +- obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context); ++ obj_ptr = allocate_in_next_plab(state, &dest_state, word_sz, context, node_index); + if (obj_ptr == NULL) { + // This will either forward-to-self, or detect that someone else has + // installed a forwarding pointer. + return _g1h->handle_evacuation_failure_par(this, old); + } + } ++ update_numa_stats(node_index); + if (_g1h->_gc_tracer_stw->should_report_promotion_events()) { + // The events are checked individually as part of the actual commit +- report_promotion_event(dest_state, old, word_sz, age, obj_ptr, context); ++ report_promotion_event(dest_state, old, word_sz, age, obj_ptr, context, node_index); + } + } + +@@ -252,7 +258,7 @@ oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state, + if (_g1h->evacuation_should_fail()) { + // Doing this after all the allocation attempts also tests the + // undo_allocation() method too. +- _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context); ++ _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context, node_index); + return _g1h->handle_evacuation_failure_par(this, old); + } + #endif // !PRODUCT +@@ -314,7 +320,49 @@ oop G1ParScanThreadState::copy_to_survivor_space(InCSetState const state, + } + return obj; + } else { +- _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context); ++ _g1_par_allocator->undo_allocation(dest_state, obj_ptr, word_sz, context, node_index); + return forward_ptr; + } + } ++ ++G1ParScanThreadState* G1ParScanThreadStateSet::state_for_worker(uint worker_id, ReferenceProcessor* rp) { ++ assert(worker_id < _n_workers, "out of bounds access"); ++ if (_states[worker_id] == NULL) { ++ _states[worker_id] = ++ new G1ParScanThreadState(_g1h, worker_id, rp); ++ } ++ return _states[worker_id]; ++} ++ ++void G1ParScanThreadStateSet::flush() { ++ assert(!_flushed, "thread local state from the per thread states should be flushed once"); ++ ++ for (uint worker_index = 0; worker_index < _n_workers; ++worker_index) { ++ G1ParScanThreadState* pss = _states[worker_index]; ++ ++ if (pss == NULL) { ++ continue; ++ } ++ ++ pss->flush_numa_stats(); ++ delete pss; ++ _states[worker_index] = NULL; ++ } ++ _flushed = true; ++} ++ ++G1ParScanThreadStateSet::G1ParScanThreadStateSet(G1CollectedHeap* g1h, ++ uint n_workers) : ++ _g1h(g1h), ++ _states(NEW_C_HEAP_ARRAY(G1ParScanThreadState*, n_workers, mtGC)), ++ _n_workers(n_workers), ++ _flushed(false) { ++ for (uint i = 0; i < n_workers; ++i) { ++ _states[i] = NULL; ++ } ++} ++ ++G1ParScanThreadStateSet::~G1ParScanThreadStateSet() { ++ assert(_flushed, "thread local state from the per thread states should have been flushed"); ++ FREE_C_HEAP_ARRAY(G1ParScanThreadState*, _states, mtGC); ++} +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp +index 990b71d31..60c00b178 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.hpp +@@ -38,7 +38,7 @@ + class HeapRegion; + class outputStream; + +-class G1ParScanThreadState : public StackObj { ++class G1ParScanThreadState : public CHeapObj { + private: + G1CollectedHeap* _g1h; + RefToScanQueue* _refs; +@@ -91,6 +91,13 @@ class G1ParScanThreadState : public StackObj { + return _dest[original.value()]; + } + ++ G1NUMA* _numa; ++ ++ // Records how many object allocations happened at each node during copy to survivor. ++ // Only starts recording when log of gc+heap+numa is enabled and its data is ++ // transferred when flushed. ++ size_t* _obj_alloc_stat; ++ + public: + G1ParScanThreadState(G1CollectedHeap* g1h, uint queue_num, ReferenceProcessor* rp); + ~G1ParScanThreadState(); +@@ -208,13 +215,19 @@ class G1ParScanThreadState : public StackObj { + HeapWord* allocate_in_next_plab(InCSetState const state, + InCSetState* dest, + size_t word_sz, +- AllocationContext_t const context); ++ AllocationContext_t const context, ++ uint node_index); + + void report_promotion_event(InCSetState const dest_state, + oop const old, size_t word_sz, uint age, +- HeapWord * const obj_ptr, AllocationContext_t context) const; ++ HeapWord * const obj_ptr, AllocationContext_t context, uint node_index) const; + + inline InCSetState next_state(InCSetState const state, markOop const m, uint& age); ++ ++ // NUMA statistics related methods. ++ inline void initialize_numa_stats(); ++ inline void update_numa_stats(uint node_index); ++ + public: + + oop copy_to_survivor_space(InCSetState const state, oop const obj, markOop const old_mark); +@@ -222,6 +235,22 @@ class G1ParScanThreadState : public StackObj { + void trim_queue(); + + inline void steal_and_trim_queue(RefToScanQueueSet *task_queues); ++ inline void flush_numa_stats(); ++}; ++ ++class G1ParScanThreadStateSet : public StackObj { ++ G1CollectedHeap* _g1h; ++ G1ParScanThreadState** _states; ++ uint _n_workers; ++ bool _flushed; ++ ++ public: ++ G1ParScanThreadStateSet(G1CollectedHeap* g1h, ++ uint n_workers); ++ ~G1ParScanThreadStateSet(); ++ ++ void flush(); ++ G1ParScanThreadState* state_for_worker(uint worker_id, ReferenceProcessor* rp); + }; + + #endif // SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_HPP +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp +index 7dedb1517..b3dc22b30 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1ParScanThreadState.inline.hpp +@@ -142,5 +142,27 @@ void G1ParScanThreadState::steal_and_trim_queue(RefToScanQueueSet *task_queues) + } + } + ++void G1ParScanThreadState::initialize_numa_stats() { ++ if (_numa->is_enabled()) { ++ uint num_nodes = _numa->num_active_nodes(); ++ // Record only if there are multiple active nodes. ++ _obj_alloc_stat = NEW_C_HEAP_ARRAY(size_t, num_nodes, mtGC); ++ memset(_obj_alloc_stat, 0, sizeof(size_t) * num_nodes); ++ } ++} ++ ++void G1ParScanThreadState::flush_numa_stats() { ++ if (_obj_alloc_stat != NULL) { ++ uint node_index = _numa->index_of_current_thread(); ++ _numa->copy_statistics(G1NUMAStats::LocalObjProcessAtCopyToSurv, node_index, _obj_alloc_stat); ++ } ++} ++ ++void G1ParScanThreadState::update_numa_stats(uint node_index) { ++ if (_obj_alloc_stat != NULL) { ++ _obj_alloc_stat[node_index]++; ++ } ++} ++ + #endif /* SHARE_VM_GC_IMPLEMENTATION_G1_G1PARSCANTHREADSTATE_INLINE_HPP */ + +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp +index f07c27107..27ea0d7a1 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.cpp +@@ -24,6 +24,7 @@ + + #include "precompiled.hpp" + #include "gc_implementation/g1/g1BiasedArray.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" + #include "gc_implementation/g1/g1RegionToSpaceMapper.hpp" + #include "memory/allocation.inline.hpp" + #include "runtime/mutex.hpp" +@@ -40,6 +41,7 @@ G1RegionToSpaceMapper::G1RegionToSpaceMapper(ReservedSpace rs, + _storage(rs, used_size, page_size), + _region_granularity(region_granularity), + _listener(NULL), ++ _memory_type(type), + _commit_map() { + guarantee(is_power_of_2(page_size), "must be"); + guarantee(is_power_of_2(region_granularity), "must be"); +@@ -71,6 +73,14 @@ class G1RegionsLargerThanCommitSizeMapper : public G1RegionToSpaceMapper { + virtual void commit_regions(uint start_idx, size_t num_regions) { + bool zero_filled = _storage.commit((size_t)start_idx * _pages_per_region, num_regions * _pages_per_region); + _commit_map.par_set_range(start_idx, start_idx + num_regions, BitMap::unknown_range); ++ if (_memory_type == mtJavaHeap) { ++ for (uint region_index = start_idx; region_index < start_idx + num_regions; region_index++ ) { ++ void* address = _storage.page_start(region_index * _pages_per_region); ++ size_t size_in_bytes = _storage.page_size() * _pages_per_region; ++ G1NUMA::numa()->request_memory_on_node(address, size_in_bytes, region_index); ++ } ++ } ++ + fire_on_commit(start_idx, num_regions, zero_filled); + } + +@@ -106,7 +116,7 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper { + size_t commit_factor, + MemoryType type) : + G1RegionToSpaceMapper(rs, actual_size, page_size, alloc_granularity, type), +- _par_lock(Mutex::leaf, "G1RegionsSmallerThanCommitSizeMapper par lock"), ++ _par_lock(Mutex::leaf, "G1RegionsSmallerThanCommitSizeMapper par lock", true), + _regions_per_page((page_size * commit_factor) / alloc_granularity), _refcounts() { + + guarantee((page_size * commit_factor) >= alloc_granularity, "allocation granularity smaller than commit granularity"); +@@ -123,6 +133,11 @@ class G1RegionsSmallerThanCommitSizeMapper : public G1RegionToSpaceMapper { + bool zero_filled = false; + if (old_refcount == 0) { + zero_filled = _storage.commit(idx, 1); ++ if (_memory_type == mtJavaHeap) { ++ void* address = _storage.page_start(idx); ++ size_t size_in_bytes = _storage.page_size(); ++ G1NUMA::numa()->request_memory_on_node(address, size_in_bytes, i); ++ } + } + _refcounts.set_by_index(idx, old_refcount + 1); + _commit_map.set_bit(i); +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp +index 6623a37f9..6eee4d309 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1RegionToSpaceMapper.hpp +@@ -51,6 +51,8 @@ class G1RegionToSpaceMapper : public CHeapObj { + // Mapping management + BitMap _commit_map; + ++ MemoryType _memory_type; ++ + G1RegionToSpaceMapper(ReservedSpace rs, size_t used_size, size_t page_size, size_t region_granularity, MemoryType type); + + void fire_on_commit(uint start_idx, size_t num_regions, bool zero_filled); +diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp +index facd28948..131cdeacd 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.cpp +@@ -26,6 +26,7 @@ + #include "code/nmethod.hpp" + #include "gc_implementation/g1/g1BlockOffsetTable.inline.hpp" + #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" + #include "gc_implementation/g1/g1OopClosures.inline.hpp" + #include "gc_implementation/g1/heapRegion.inline.hpp" + #include "gc_implementation/g1/heapRegionBounds.inline.hpp" +@@ -313,7 +314,7 @@ HeapRegion::HeapRegion(uint hrm_index, + _in_uncommit_list(false), + _young_index_in_cset(-1), _surv_rate_group(NULL), _age_index(-1), + _rem_set(NULL), _recorded_rs_length(0), _predicted_elapsed_time_ms(0), +- _predicted_bytes_to_copy(0) ++ _predicted_bytes_to_copy(0), _node_index(G1NUMA::UnknownNodeIndex) + { + _rem_set = new HeapRegionRemSet(sharedOffsetArray, this); + assert(HeapRegionRemSet::num_par_rem_sets() > 0, "Invariant."); +@@ -704,6 +705,15 @@ void HeapRegion::print_on(outputStream* st) const { + st->print(" TS %5d", _gc_time_stamp); + st->print(" PTAMS " PTR_FORMAT " NTAMS " PTR_FORMAT, + prev_top_at_mark_start(), next_top_at_mark_start()); ++ if (UseNUMA) { ++ G1NUMA* numa = G1NUMA::numa(); ++ if (node_index() < numa->num_active_nodes()) { ++ st->print("|%d", numa->numa_id(node_index())); ++ } else { ++ st->print("|-"); ++ } ++ } ++ st->print_cr(" "); + G1OffsetTableContigSpace::print_on(st); + } + +diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp +index 656d605ef..bc9527a87 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegion.hpp +@@ -295,7 +295,7 @@ class HeapRegion: public G1OffsetTableContigSpace { + // The RSet length that was added to the total value + // for the collection set. + size_t _recorded_rs_length; +- ++ uint _node_index; + // The predicted elapsed time that was added to total value + // for the collection set. + double _predicted_elapsed_time_ms; +@@ -768,6 +768,9 @@ class HeapRegion: public G1OffsetTableContigSpace { + // the strong code roots list for this region + void strong_code_roots_do(CodeBlobClosure* blk) const; + ++ uint node_index() const { return _node_index; } ++ void set_node_index(uint node_index) { _node_index = node_index; } ++ + // Verify that the entries on the strong code root list for this + // region are live and include at least one pointer into this region. + void verify_strong_code_roots(VerifyOption vo, bool* failures) const; +diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp +index 842550d21..6ad85596d 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.cpp +@@ -27,6 +27,7 @@ + #include "gc_implementation/g1/heapRegionManager.inline.hpp" + #include "gc_implementation/g1/heapRegionSet.inline.hpp" + #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" + #include "gc_implementation/g1/concurrentG1Refine.hpp" + #include "memory/allocation.hpp" + +@@ -72,6 +73,34 @@ bool HeapRegionManager::can_expand(uint region) const { + return !_available_map.at(region); + } + ++HeapRegion* HeapRegionManager::allocate_free_region(bool is_old, uint requested_node_index) { ++ HeapRegion* hr = NULL; ++ bool from_head = is_old; ++ G1NUMA* numa = G1NUMA::numa(); ++ ++ if (requested_node_index != G1NUMA::AnyNodeIndex && numa->is_enabled()) { ++ // Try to allocate with requested node index. ++ hr = _free_list.remove_region_with_node_index(from_head, requested_node_index); ++ } ++ ++ if (hr == NULL) { ++ // If there's a single active node or we did not get a region from our requested node, ++ // try without requested node index. ++ hr = _free_list.remove_region(from_head); ++ } ++ ++ if (hr != NULL) { ++ assert(hr->next() == NULL, "Single region should not have next"); ++ assert(is_available(hr->hrm_index()), "Must be committed"); ++ ++ if (numa->is_enabled() && hr->node_index() < numa->num_active_nodes()) { ++ numa->update_statistics(G1NUMAStats::NewRegionAlloc, requested_node_index, hr->node_index()); ++ } ++ } ++ ++ return hr; ++} ++ + #ifdef ASSERT + bool HeapRegionManager::is_free(HeapRegion* hr) const { + return _free_list.contains(hr); +@@ -107,6 +136,10 @@ void HeapRegionManager::commit_regions(uint index, size_t num_regions) { + void HeapRegionManager::uncommit_regions(uint start, size_t num_regions) { + guarantee(num_regions >= 1, err_msg("Need to specify at least one region to uncommit, tried to uncommit zero regions at %u", start)); + guarantee(_num_committed >= num_regions, "pre-condition"); ++ // Reset node index to distinguish with committed regions. ++ for (uint i = start; i < start + num_regions; i++) { ++ at(i)->set_node_index(G1NUMA::UnknownNodeIndex); ++ } + + // Print before uncommitting. + if (G1CollectedHeap::heap()->hr_printer()->is_active()) { +@@ -155,6 +188,7 @@ void HeapRegionManager::make_regions_available(uint start, uint num_regions) { + MemRegion mr(bottom, bottom + HeapRegion::GrainWords); + + hr->initialize(mr); ++ hr->set_node_index(G1NUMA::numa()->index_for_region(hr)); + insert_into_free_list(at(i)); + } + } +@@ -204,6 +238,35 @@ uint HeapRegionManager::expand_at(uint start, uint num_regions) { + return expanded; + } + ++uint HeapRegionManager::expand_on_preferred_node(uint preferred_index) { ++ uint expand_candidate = UINT_MAX; ++ for (uint i = 0; i < max_length(); i++) { ++ if (is_available(i)) { ++ // Already in use continue ++ continue; ++ } ++ // Always save the candidate so we can expand later on. ++ expand_candidate = i; ++ if (is_on_preferred_index(expand_candidate, preferred_index)) { ++ // We have found a candidate on the preffered node, break. ++ break; ++ } ++ } ++ ++ if (expand_candidate == UINT_MAX) { ++ // No regions left, expand failed. ++ return 0; ++ } ++ ++ make_regions_available(expand_candidate, 1); ++ return 1; ++} ++ ++bool HeapRegionManager::is_on_preferred_index(uint region_index, uint preferred_node_index) { ++ uint region_node_index = G1NUMA::numa()->preferred_node_index_for_index(region_index); ++ return region_node_index == preferred_node_index; ++} ++ + uint HeapRegionManager::find_contiguous(size_t num, bool empty_only) { + uint found = 0; + size_t length_found = 0; +diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp +index 715122181..a06fa4f56 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionManager.hpp +@@ -120,6 +120,8 @@ class HeapRegionManager: public CHeapObj { + // the heap. Returns the length of the sequence found. If this value is zero, no + // sequence could be found, otherwise res_idx contains the start index of this range. + uint find_empty_from_idx_reverse(uint start_idx, uint* res_idx) const; ++ // Checks the G1MemoryNodeManager to see if this region is on the preferred node. ++ bool is_on_preferred_index(uint region_index, uint preferred_node_index); + // Allocate a new HeapRegion for the given index. + HeapRegion* new_heap_region(uint hrm_index); + #ifdef ASSERT +@@ -175,15 +177,7 @@ public: + _free_list.add_ordered(list); + } + +- HeapRegion* allocate_free_region(bool is_old) { +- HeapRegion* hr = _free_list.remove_region(is_old); +- +- if (hr != NULL) { +- assert(hr->next() == NULL, "Single region should not have next"); +- assert(is_available(hr->hrm_index()), "Must be committed"); +- } +- return hr; +- } ++ virtual HeapRegion* allocate_free_region(bool is_old, uint requested_node_index); + + inline void allocate_free_regions_starting_at(uint first, uint num_regions); + +@@ -197,6 +191,10 @@ public: + return _free_list.length(); + } + ++ uint num_free_regions(uint node_index) const { ++ return _free_list.length(node_index); ++ } ++ + size_t total_capacity_bytes() const { + return num_free_regions() * HeapRegion::GrainBytes; + } +@@ -225,6 +223,9 @@ public: + // this. + uint expand_at(uint start, uint num_regions); + ++ // Try to expand on the given node index. ++ virtual uint expand_on_preferred_node(uint node_index); ++ + // Find a contiguous set of empty regions of length num. Returns the start index of + // that set, or G1_NO_HRM_INDEX. + uint find_contiguous_only_empty(size_t num) { return find_contiguous(num, true); } +diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp +index 09d12fd3f..881bab784 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.cpp +@@ -24,6 +24,7 @@ + + #include "precompiled.hpp" + #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" + #include "gc_implementation/g1/heapRegionRemSet.hpp" + #include "gc_implementation/g1/heapRegionSet.inline.hpp" + +@@ -100,6 +101,12 @@ HeapRegionSetBase::HeapRegionSetBase(const char* name, bool humongous, bool free + _count() + { } + ++FreeRegionList::FreeRegionList(const char* name, HRSMtSafeChecker* mt_safety_checker): ++ HeapRegionSetBase(name, false /* humongous */, true /* empty */, mt_safety_checker), ++ _node_info(G1NUMA::numa()->is_enabled() ? new NodeInfo() : NULL) { ++ clear(); ++} ++ + void FreeRegionList::set_unrealistically_long_length(uint len) { + guarantee(_unrealistically_long_length == 0, "should only be set once"); + _unrealistically_long_length = len; +@@ -127,6 +134,7 @@ void FreeRegionList::remove_all(bool uncommit) { + OrderAccess::storestore(); + curr->set_uncommit_list(false); + } ++ decrease_length(curr->node_index()); + curr = next; + } + clear(); +@@ -144,6 +152,9 @@ void FreeRegionList::add_ordered(FreeRegionList* from_list) { + if (from_list->is_empty()) { + return; + } ++ if (_node_info != NULL && from_list->_node_info != NULL) { ++ _node_info->add(from_list->_node_info); ++ } + + #ifdef ASSERT + FreeRegionListIterator iter(from_list); +@@ -246,6 +257,7 @@ void FreeRegionList::remove_starting_at(HeapRegion* first, uint num_regions) { + remove(curr); + + count++; ++ decrease_length(curr->node_index()); + curr = next; + } + +@@ -278,6 +290,9 @@ void FreeRegionList::clear() { + _head = NULL; + _tail = NULL; + _last = NULL; ++ if (_node_info!= NULL) { ++ _node_info->clear(); ++ } + } + + void FreeRegionList::print_on(outputStream* out, bool print_contents) { +@@ -454,6 +469,29 @@ void HumongousRegionSetMtSafeChecker::check() { + } + } + ++FreeRegionList::NodeInfo::NodeInfo() : _numa(G1NUMA::numa()), _length_of_node(NULL), ++ _num_nodes(_numa->num_active_nodes()) { ++ assert(UseNUMA, "Invariant"); ++ ++ _length_of_node = NEW_C_HEAP_ARRAY(uint, _num_nodes, mtGC); ++} ++ ++FreeRegionList::NodeInfo::~NodeInfo() { ++ FREE_C_HEAP_ARRAY(uint, _length_of_node, mtGC); ++} ++ ++void FreeRegionList::NodeInfo::clear() { ++ for (uint i = 0; i < _num_nodes; ++i) { ++ _length_of_node[i] = 0; ++ } ++} ++ ++void FreeRegionList::NodeInfo::add(NodeInfo* info) { ++ for (uint i = 0; i < _num_nodes; ++i) { ++ _length_of_node[i] += info->_length_of_node[i]; ++ } ++} ++ + void FreeRegionList_test() { + FreeRegionList l("test"); + +diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.hpp +index ede3136d5..42f0bd4d0 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.hpp +@@ -197,22 +197,45 @@ public: + // add / remove one region at a time or concatenate two lists. + + class FreeRegionListIterator; ++class G1NUMA; + + class FreeRegionList : public HeapRegionSetBase { + friend class FreeRegionListIterator; + + private: ++ // This class is only initialized if there are multiple active nodes. ++ class NodeInfo : public CHeapObj { ++ G1NUMA* _numa; ++ uint* _length_of_node; ++ uint _num_nodes; ++ ++ public: ++ NodeInfo(); ++ ~NodeInfo(); ++ ++ inline void increase_length(uint node_index); ++ inline void decrease_length(uint node_index); ++ ++ inline uint length(uint index) const; ++ ++ void clear(); ++ ++ void add(NodeInfo* info); ++ }; ++ + HeapRegion* _head; + HeapRegion* _tail; + + // _last is used to keep track of where we added an element the last + // time. It helps to improve performance when adding several ordered items in a row. + HeapRegion* _last; +- ++ NodeInfo* _node_info; + static uint _unrealistically_long_length; + + inline HeapRegion* remove_from_head_impl(); + inline HeapRegion* remove_from_tail_impl(); ++ inline void increase_length(uint node_index); ++ inline void decrease_length(uint node_index); + + protected: + virtual void fill_in_ext_msg_extra(hrs_ext_msg* msg); +@@ -221,9 +244,12 @@ protected: + virtual void clear(); + + public: +- FreeRegionList(const char* name, HRSMtSafeChecker* mt_safety_checker = NULL): +- HeapRegionSetBase(name, false /* humongous */, true /* empty */, mt_safety_checker) { +- clear(); ++ FreeRegionList(const char* name, HRSMtSafeChecker* mt_safety_checker = NULL); ++ ++ ~FreeRegionList() { ++ if (_node_info != NULL) { ++ delete _node_info; ++ } + } + + void verify_list(); +@@ -244,6 +270,10 @@ public: + // Removes from head or tail based on the given argument. + HeapRegion* remove_region(bool from_head); + ++ HeapRegion* remove_region_with_node_index(bool from_head, ++ uint requested_node_index); ++ ++ + // Merge two ordered lists. The result is also ordered. The order is + // determined by hrm_index. + void add_ordered(FreeRegionList* from_list); +@@ -260,6 +290,9 @@ public: + + virtual void verify(); + ++ using HeapRegionSetBase::length; ++ uint length(uint node_index) const; ++ + virtual void print_on(outputStream* out, bool print_contents = false); + }; + +diff --git a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp +index f1fce751a..5ce306288 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/heapRegionSet.inline.hpp +@@ -25,6 +25,7 @@ + #ifndef SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSET_INLINE_HPP + #define SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSET_INLINE_HPP + ++#include "gc_implementation/g1/g1NUMA.hpp" + #include "gc_implementation/g1/heapRegionSet.hpp" + + inline void HeapRegionSetBase::add(HeapRegion* hr) { +@@ -94,6 +95,7 @@ inline void FreeRegionList::add_ordered(HeapRegion* hr) { + _head = hr; + } + _last = hr; ++ increase_length(hr->node_index()); + } + + inline HeapRegion* FreeRegionList::remove_from_head_impl() { +@@ -145,8 +147,106 @@ inline HeapRegion* FreeRegionList::remove_region(bool from_head) { + + // remove() will verify the region and check mt safety. + remove(hr); ++ decrease_length(hr->node_index()); + return hr; + } + ++inline HeapRegion* FreeRegionList::remove_region_with_node_index(bool from_head, ++ uint requested_node_index) { ++ assert(UseNUMA, "Invariant"); ++ ++ const uint max_search_depth = G1NUMA::numa()->max_search_depth(); ++ HeapRegion* cur; ++ ++ // Find the region to use, searching from _head or _tail as requested. ++ size_t cur_depth = 0; ++ if (from_head) { ++ for (cur = _head; ++ cur != NULL && cur_depth < max_search_depth; ++ cur = cur->next(), ++cur_depth) { ++ if (requested_node_index == cur->node_index()) { ++ break; ++ } ++ } ++ } else { ++ for (cur = _tail; ++ cur != NULL && cur_depth < max_search_depth; ++ cur = cur->prev(), ++cur_depth) { ++ if (requested_node_index == cur->node_index()) { ++ break; ++ } ++ } ++ } ++ ++ // Didn't find a region to use. ++ if (cur == NULL || cur_depth >= max_search_depth) { ++ return NULL; ++ } ++ ++ // Splice the region out of the list. ++ HeapRegion* prev = cur->prev(); ++ HeapRegion* next = cur->next(); ++ if (prev == NULL) { ++ _head = next; ++ } else { ++ prev->set_next(next); ++ } ++ if (next == NULL) { ++ _tail = prev; ++ } else { ++ next->set_prev(prev); ++ } ++ cur->set_prev(NULL); ++ cur->set_next(NULL); ++ ++ if (_last == cur) { ++ _last = NULL; ++ } ++ ++ remove(cur); ++ decrease_length(cur->node_index()); ++ ++ return cur; ++} ++ ++inline void FreeRegionList::NodeInfo::increase_length(uint node_index) { ++ if (node_index < _num_nodes) { ++ _length_of_node[node_index] += 1; ++ } ++} ++ ++inline void FreeRegionList::NodeInfo::decrease_length(uint node_index) { ++ if (node_index < _num_nodes) { ++ assert(_length_of_node[node_index] > 0, ++ err_msg("Current length %u should be greater than zero for node %u", ++ _length_of_node[node_index], node_index)); ++ _length_of_node[node_index] -= 1; ++ } ++} ++ ++inline uint FreeRegionList::NodeInfo::length(uint node_index) const { ++ return _length_of_node[node_index]; ++} ++ ++inline void FreeRegionList::increase_length(uint node_index) { ++ if (_node_info != NULL) { ++ return _node_info->increase_length(node_index); ++ } ++} ++ ++inline void FreeRegionList::decrease_length(uint node_index) { ++ if (_node_info != NULL) { ++ return _node_info->decrease_length(node_index); ++ } ++} ++ ++inline uint FreeRegionList::length(uint node_index) const { ++ if (_node_info != NULL) { ++ return _node_info->length(node_index); ++ } else { ++ return 0; ++ } ++} ++ + #endif // SHARE_VM_GC_IMPLEMENTATION_G1_HEAPREGIONSET_INLINE_HPP + +diff --git a/hotspot/src/share/vm/memory/universe.cpp b/hotspot/src/share/vm/memory/universe.cpp +index 53f402172..1b66e0cb8 100644 +--- a/hotspot/src/share/vm/memory/universe.cpp ++++ b/hotspot/src/share/vm/memory/universe.cpp +@@ -78,6 +78,7 @@ + #include "gc_implementation/concurrentMarkSweep/cmsAdaptiveSizePolicy.hpp" + #include "gc_implementation/concurrentMarkSweep/cmsCollectorPolicy.hpp" + #include "gc_implementation/g1/g1CollectedHeap.inline.hpp" ++#include "gc_implementation/g1/g1NUMA.hpp" + #include "gc_implementation/g1/g1CollectorPolicy_ext.hpp" + #include "gc_implementation/parallelScavenge/parallelScavengeHeap.hpp" + #include "gc_implementation/shenandoah/shenandoahHeap.hpp" +@@ -811,6 +812,7 @@ jint Universe::initialize_heap() { + #if INCLUDE_ALL_GCS + G1CollectorPolicyExt* g1p = new G1CollectorPolicyExt(); + g1p->initialize_all(); ++ G1NUMA::create(); + G1CollectedHeap* g1h = new G1CollectedHeap(g1p); + Universe::_collectedHeap = g1h; + #else // INCLUDE_ALL_GCS +diff --git a/hotspot/src/share/vm/prims/whitebox.cpp b/hotspot/src/share/vm/prims/whitebox.cpp +index 2247b29f3..c44697f0d 100644 +--- a/hotspot/src/share/vm/prims/whitebox.cpp ++++ b/hotspot/src/share/vm/prims/whitebox.cpp +@@ -28,6 +28,7 @@ + #include "memory/metaspaceShared.hpp" + #include "memory/iterator.hpp" + #include "memory/universe.hpp" ++#include "memory/oopFactory.hpp" + #include "oops/oop.inline.hpp" + + #include "classfile/symbolTable.hpp" +@@ -354,6 +355,30 @@ WB_ENTRY(jobject, WB_G1AuxiliaryMemoryUsage(JNIEnv* env)) + Handle h = MemoryService::create_MemoryUsage_obj(usage, CHECK_NULL); + return JNIHandles::make_local(env, h()); + WB_END ++ ++WB_ENTRY(jint, WB_G1ActiveMemoryNodeCount(JNIEnv* env, jobject o)) ++ if (UseG1GC) { ++ G1NUMA* numa = G1NUMA::numa(); ++ return (jint)numa->num_active_nodes(); ++ } ++ THROW_MSG_0(vmSymbols::java_lang_UnsupportedOperationException(), "WB_G1ActiveMemoryNodeCount: G1 GC is not enabled"); ++WB_END ++ ++WB_ENTRY(jintArray, WB_G1MemoryNodeIds(JNIEnv* env, jobject o)) ++ if (UseG1GC) { ++ G1NUMA* numa = G1NUMA::numa(); ++ int num_node_ids = (int)numa->num_active_nodes(); ++ const int* node_ids = numa->node_ids(); ++ ++ typeArrayOop result = oopFactory::new_intArray(num_node_ids, CHECK_NULL); ++ for (int i = 0; i < num_node_ids; i++) { ++ result->int_at_put(i, (jint)node_ids[i]); ++ } ++ return (jintArray) JNIHandles::make_local(env, result); ++ } ++ THROW_MSG_NULL(vmSymbols::java_lang_UnsupportedOperationException(), "WB_G1MemoryNodeIds: G1 GC is not enabled"); ++WB_END ++ + #endif // INCLUDE_ALL_GCS + + #if INCLUDE_NMT +@@ -1246,6 +1271,9 @@ static JNINativeMethod methods[] = { + {CC"g1StartConcMarkCycle", CC"()Z", (void*)&WB_G1StartMarkCycle }, + {CC"g1AuxiliaryMemoryUsage", CC"()Ljava/lang/management/MemoryUsage;", + (void*)&WB_G1AuxiliaryMemoryUsage }, ++ {CC"g1ActiveMemoryNodeCount", CC"()I", (void*)&WB_G1ActiveMemoryNodeCount }, ++ {CC"g1MemoryNodeIds", CC"()[I", (void*)&WB_G1MemoryNodeIds }, ++ + #endif // INCLUDE_ALL_GCS + #if INCLUDE_NMT + {CC"NMTMalloc", CC"(J)J", (void*)&WB_NMTMalloc }, +diff --git a/hotspot/src/share/vm/runtime/os.hpp b/hotspot/src/share/vm/runtime/os.hpp +index cff2e9c3e..a60ef4206 100644 +--- a/hotspot/src/share/vm/runtime/os.hpp ++++ b/hotspot/src/share/vm/runtime/os.hpp +@@ -369,6 +369,7 @@ class os: AllStatic { + static size_t numa_get_leaf_groups(int *ids, size_t size); + static bool numa_topology_changed(); + static int numa_get_group_id(); ++ static int numa_get_group_id_for_address(const void* address); + + // Page manipulation + struct page_info { +diff --git a/hotspot/test/gc/g1/TestG1NUMATouchRegions.java b/hotspot/test/gc/g1/TestG1NUMATouchRegions.java +new file mode 100644 +index 000000000..c5322849e +--- /dev/null ++++ b/hotspot/test/gc/g1/TestG1NUMATouchRegions.java +@@ -0,0 +1,245 @@ ++/* ++ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved. ++ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. ++ * ++ * This code is free software; you can redistribute it and/or modify it ++ * under the terms of the GNU General Public License version 2 only, as ++ * published by the Free Software Foundation. ++ * ++ * This code is distributed in the hope that it will be useful, but WITHOUT ++ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or ++ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License ++ * version 2 for more details (a copy is included in the LICENSE file that ++ * accompanied this code). ++ * ++ * You should have received a copy of the GNU General Public License version ++ * 2 along with this work; if not, write to the Free Software Foundation, ++ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. ++ * ++ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA ++ * or visit www.oracle.com if you need additional information or have any ++ * questions. ++ */ ++ ++package gc.g1; ++ ++/** ++ * @test TestG1NUMATouchRegions ++ * @summary Ensure the bottom of the given heap regions are properly touched with requested NUMA id. ++ * @key gc ++ * @requires vm.gc.G1 ++ * @requires os.family == "linux" ++ * @library /test/lib ++ * @modules java.base/jdk.internal.misc ++ * java.management ++ * @build sun.hotspot.WhiteBox ++ * @run driver ClassFileInstaller sun.hotspot.WhiteBox ++ * @run main/othervm -XX:+UseG1GC -Xbootclasspath/a:. -XX:+UseNUMA -XX:+UnlockDiagnosticVMOptions -XX:+WhiteBoxAPI gc.g1.TestG1NUMATouchRegions ++ */ ++ ++import java.util.LinkedList; ++import jdk.test.lib.process.OutputAnalyzer; ++import jdk.test.lib.process.ProcessTools; ++import sun.hotspot.WhiteBox; ++ ++public class TestG1NUMATouchRegions { ++ enum NUMASupportStatus { ++ NOT_CHECKED, ++ SUPPORT, ++ NOT_SUPPORT ++ }; ++ ++ static int G1HeapRegionSize1MB = 1; ++ static int G1HeapRegionSize8MB = 8; ++ ++ static NUMASupportStatus status = NUMASupportStatus.NOT_CHECKED; ++ ++ public static void main(String[] args) throws Exception { ++ // 1. Page size < G1HeapRegionSize ++ // Test default page with 1MB heap region size ++ testMemoryTouch("-XX:-UseLargePages", G1HeapRegionSize1MB); ++ // 2. Page size > G1HeapRegionSize ++ // Test large page with 1MB heap region size. ++ testMemoryTouch("-XX:+UseLargePages", G1HeapRegionSize1MB); ++ // 3. Page size < G1HeapRegionSize ++ // Test large page with 8MB heap region size. ++ testMemoryTouch("-XX:+UseLargePages", G1HeapRegionSize8MB); ++ } ++ ++ // On Linux, always UseNUMA is enabled if there is multiple active numa nodes. ++ static NUMASupportStatus checkNUMAIsEnabled(OutputAnalyzer output) { ++ boolean supportNUMA = Boolean.parseBoolean(output.firstMatch("\\bUseNUMA\\b.*?=.*?([a-z]+)", 1)); ++ System.out.println("supportNUMA=" + supportNUMA); ++ return supportNUMA ? NUMASupportStatus.SUPPORT : NUMASupportStatus.NOT_SUPPORT; ++ } ++ ++ static long parseSizeString(String size) { ++ long multiplier = 1; ++ ++ if (size.endsWith("B")) { ++ multiplier = 1; ++ } else if (size.endsWith("K")) { ++ multiplier = 1024; ++ } else if (size.endsWith("M")) { ++ multiplier = 1024 * 1024; ++ } else if (size.endsWith("G")) { ++ multiplier = 1024 * 1024 * 1024; ++ } else { ++ throw new IllegalArgumentException("Expected memory string '" + size + "'to end with either of: B, K, M, G"); ++ } ++ ++ long longSize = Long.parseUnsignedLong(size.substring(0, size.length() - 1)); ++ ++ return longSize * multiplier; ++ } ++ ++ static long heapPageSize(OutputAnalyzer output) { ++ String HeapPageSizePattern = "Heap: .*page_size=([^ ]+)"; ++ String str = output.firstMatch(HeapPageSizePattern, 1); ++ ++ if (str == null) { ++ output.reportDiagnosticSummary(); ++ throw new RuntimeException("Match from '" + HeapPageSizePattern + "' got 'null'"); ++ } ++ ++ return parseSizeString(str); ++ } ++ ++ // 1. -UseLargePages: default page, page size < G1HeapRegionSize ++ // +UseLargePages: large page size <= G1HeapRegionSize ++ // ++ // Each 'int' represents a numa id of single HeapRegion (bottom page). ++ // e.g. 1MB heap region, 2MB page size and 2 NUMA nodes system ++ // Check the first set(2 regions) ++ // 0| ...omitted..| 0 ++ // 1| ...omitted..| 1 ++ static void checkCase1Pattern(OutputAnalyzer output, int index, long g1HeapRegionSize, long actualPageSize, int[] memoryNodeIds) throws Exception { ++ StringBuilder sb = new StringBuilder(); ++ ++ // Append index which means heap region index. ++ sb.append(String.format("%6d", index)); ++ sb.append("| .* | "); ++ ++ // Append page node id. ++ sb.append(memoryNodeIds[index]); ++ ++ output.shouldMatch(sb.toString()); ++ } ++ ++ // 3. +UseLargePages: large page size > G1HeapRegionSize ++ // ++ // As a OS page is consist of multiple heap regions, log also should be ++ // printed multiple times for same numa id. ++ // e.g. 1MB heap region, 2MB page size and 2 NUMA nodes system ++ // Check the first set(4 regions) ++ // 0| ...omitted..| 0 ++ // 1| ...omitted..| 0 ++ // 2| ...omitted..| 1 ++ // 3| ...omitted..| 1 ++ static void checkCase2Pattern(OutputAnalyzer output, int index, long g1HeapRegionSize, long actualPageSize, int[] memoryNodeIds) throws Exception { ++ StringBuilder sb = new StringBuilder(); ++ ++ // Append page range. ++ int lines_to_print = (int)(actualPageSize / g1HeapRegionSize); ++ for (int i = 0; i < lines_to_print; i++) { ++ // Append index which means heap region index. ++ sb.append(String.format("%6d", index * lines_to_print + i)); ++ sb.append("| .* | "); ++ ++ // Append page node id. ++ sb.append(memoryNodeIds[index]); ++ ++ output.shouldMatch(sb.toString()); ++ sb.setLength(0); ++ } ++ } ++ ++ static void checkNUMALog(OutputAnalyzer output, int regionSizeInMB) throws Exception { ++ WhiteBox wb = WhiteBox.getWhiteBox(); ++ long g1HeapRegionSize = regionSizeInMB * 1024 * 1024; ++ long actualPageSize = heapPageSize(output); ++ long defaultPageSize = (long)wb.getVMPageSize(); ++ int memoryNodeCount = wb.g1ActiveMemoryNodeCount(); ++ int[] memoryNodeIds = wb.g1MemoryNodeIds(); ++ ++ System.out.println("node count=" + memoryNodeCount + ", actualPageSize=" + actualPageSize); ++ // Check for the first set of active numa nodes. ++ for (int index = 0; index < memoryNodeCount; index++) { ++ if (actualPageSize <= defaultPageSize) { ++ checkCase1Pattern(output, index, g1HeapRegionSize, actualPageSize, memoryNodeIds); ++ } else { ++ checkCase2Pattern(output, index, g1HeapRegionSize, actualPageSize, memoryNodeIds); ++ } ++ } ++ } ++ ++ static void testMemoryTouch(String largePagesSetting, int regionSizeInMB) throws Exception { ++ // Skip testing with message. ++ if (status == NUMASupportStatus.NOT_SUPPORT) { ++ System.out.println("NUMA is not supported"); ++ return; ++ } ++ ++ ProcessBuilder pb_enabled = ProcessTools.createJavaProcessBuilder( ++ "-Xbootclasspath/a:.", ++ "-Xlog:pagesize,gc+heap+region=trace", ++ "-XX:+UseG1GC", ++ "-Xmx128m", ++ "-Xms128m", ++ "-XX:+UnlockDiagnosticVMOptions", ++ "-XX:+WhiteBoxAPI", ++ "-XX:+PrintFlagsFinal", ++ "-XX:+UseNUMA", ++ "-XX:+AlwaysPreTouch", ++ largePagesSetting, ++ "-XX:G1HeapRegionSize=" + regionSizeInMB + "m", ++ GCTest.class.getName()); ++ OutputAnalyzer output = new OutputAnalyzer(pb_enabled.start()); ++ ++ // Check NUMA availability. ++ if (status == NUMASupportStatus.NOT_CHECKED) { ++ status = checkNUMAIsEnabled(output); ++ } ++ ++ if (status == NUMASupportStatus.SUPPORT) { ++ checkNUMALog(output, regionSizeInMB); ++ } else { ++ // Exit with message for the first test. ++ System.out.println("NUMA is not supported"); ++ } ++ } ++ ++ static class GCTest { ++ public static final int M = 1024*1024; ++ public static LinkedList garbageList = new LinkedList(); ++ // A large object referenced by a static. ++ static int[] filler = new int[10 * M]; ++ ++ public static void genGarbage() { ++ for (int i = 0; i < 32*1024; i++) { ++ garbageList.add(new int[100]); ++ } ++ garbageList.clear(); ++ } ++ ++ public static void main(String[] args) { ++ ++ int[] large = new int[M]; ++ Object ref = large; ++ ++ System.out.println("Creating garbage"); ++ for (int i = 0; i < 100; i++) { ++ // A large object that will be reclaimed eagerly. ++ large = new int[6*M]; ++ genGarbage(); ++ // Make sure that the compiler cannot completely remove ++ // the allocation of the large object until here. ++ System.out.println(large); ++ } ++ ++ // Keep the reference to the first object alive. ++ System.out.println(ref); ++ System.out.println("Done"); ++ } ++ } ++} +diff --git a/jdk/test/lib/sun/hotspot/WhiteBox.java b/jdk/test/lib/sun/hotspot/WhiteBox.java +index 9497c9530..a6d773bc8 100644 +--- a/jdk/test/lib/sun/hotspot/WhiteBox.java ++++ b/jdk/test/lib/sun/hotspot/WhiteBox.java +@@ -141,6 +141,8 @@ public class WhiteBox { + public native int g1RegionSize(); + public native MemoryUsage g1AuxiliaryMemoryUsage(); + public native Object[] parseCommandLine(String commandline, DiagnosticCommand[] args); ++ public native int g1ActiveMemoryNodeCount(); ++ public native int[] g1MemoryNodeIds(); + + // Parallel GC + public native long psVirtualSpaceAlignment(); diff --git a/openjdk-1.8.0.spec b/openjdk-1.8.0.spec index 83df46b..c70b9a0 100644 --- a/openjdk-1.8.0.spec +++ b/openjdk-1.8.0.spec @@ -918,7 +918,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r Name: java-%{javaver}-%{origin} Version: %{javaver}.%{updatever}.%{buildver} -Release: 10 +Release: 11 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -1106,6 +1106,7 @@ Patch192: add_kae_implementation_add_default_conf_file.patch Patch193: improve_algorithmConstraints_checkAlgorithm_performance.patch Patch194: modify_the_default_iteration_time_and_forks_in_the_JMH_of_KAEProvider.patch Patch195: support_CMS_parallel_inspection.patch +Patch196: g1gc-numa-aware-Implementation.patch ############################################# # @@ -1560,6 +1561,7 @@ pushd %{top_level_dir_name} %patch192 -p1 %patch194 -p1 %patch195 -p1 +%patch196 -p1 popd # System library fixes @@ -2176,6 +2178,9 @@ require "copy_jdk_configs.lua" %endif %changelog +* Sat Jun 12 2021 hu_bo_dao - 1:1.8.0.292-b10.11 +- add g1gc-numa-aware-Implementation.patch + * Wed Jun 10 2021 hu_bo_dao - 1:1.8.0.292-b10.10 - add support_CMS_parallel_inspection.patch -- Gitee From 6f9be5d77523265b7674ec27171d1c0489fb2496 Mon Sep 17 00:00:00 2001 From: kuenking111 Date: Sat, 12 Jun 2021 12:36:48 +0800 Subject: [PATCH 3/6] I3VFBA: Implementation of Blas hotspot function in Intrinsics --- ..._Blas_hotspot_function_in_Intrinsics.patch | 1638 +++++++++++++++++ openjdk-1.8.0.spec | 9 +- 2 files changed, 1645 insertions(+), 2 deletions(-) create mode 100755 implementation_of_Blas_hotspot_function_in_Intrinsics.patch diff --git a/implementation_of_Blas_hotspot_function_in_Intrinsics.patch b/implementation_of_Blas_hotspot_function_in_Intrinsics.patch new file mode 100755 index 0000000..39335db --- /dev/null +++ b/implementation_of_Blas_hotspot_function_in_Intrinsics.patch @@ -0,0 +1,1638 @@ +commit 9856171f660f6edb240bb4e7e95a87b60f4d2bc3 +Author: hubodao +Date: Tue Jun 8 08:07:38 2021 +0000 + + blas instrinsic + +diff --git a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp +index 7080ea10d..62a8ab7bd 100644 +--- a/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/c1_LIRGenerator_aarch64.cpp +@@ -919,6 +919,126 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + } + } + ++void LIRGenerator::do_dgemm_dgemm(Intrinsic* x) { ++ assert(x->number_of_arguments() == 16, "wrong type"); ++ ++ LIRItem ta(x->argument_at(0), this); ++ LIRItem tb(x->argument_at(1), this); ++ LIRItem m(x->argument_at(2), this); ++ LIRItem n(x->argument_at(3), this); ++ LIRItem k(x->argument_at(4), this); ++ LIRItem alpha(x->argument_at(5), this); ++ LIRItem a(x->argument_at(6), this); ++ LIRItem a_offset(x->argument_at(7), this); ++ LIRItem lda(x->argument_at(8), this); ++ LIRItem b(x->argument_at(9), this); ++ LIRItem b_offset(x->argument_at(10), this); ++ LIRItem ldb(x->argument_at(11), this); ++ LIRItem beta(x->argument_at(12), this); ++ LIRItem c(x->argument_at(13), this); ++ LIRItem c_offset(x->argument_at(14), this); ++ LIRItem ldc(x->argument_at(15), this); ++ ++ ta.load_item(); ++ tb.load_item(); ++ m.load_item(); ++ n.load_item(); ++ k.load_item(); ++ alpha.load_item(); ++ a.load_item(); ++ a_offset.load_nonconstant(); ++ lda.load_item(); ++ b.load_item(); ++ b_offset.load_nonconstant(); ++ ldb.load_item(); ++ beta.load_item(); ++ c.load_item(); ++ c_offset.load_nonconstant(); ++ ldc.load_item(); ++ ++ LIR_Opr ta_base = ta.result(); ++ LIR_Opr tb_base = tb.result(); ++ LIR_Opr r_m = m.result(); ++ LIR_Opr r_n = n.result(); ++ LIR_Opr r_k = k.result(); ++ LIR_Opr r_alpha = alpha.result(); ++ LIR_Opr a_base = a.result(); ++ LIR_Opr r_a_offset = a_offset.result(); ++ LIR_Opr r_lda = lda.result(); ++ LIR_Opr b_base = b.result(); ++ LIR_Opr r_b_offset = b_offset.result(); ++ LIR_Opr r_ldb = ldb.result(); ++ LIR_Opr r_beta = beta.result(); ++ LIR_Opr c_base = c.result(); ++ LIR_Opr r_c_offset = c_offset.result(); ++ LIR_Opr r_ldc = ldc.result(); ++ ++ LIR_Opr ta_value = load_String_value(ta_base); ++ LIR_Opr ta_offset = load_String_offset(ta_base); ++ LIR_Opr tb_value = load_String_value(tb_base); ++ LIR_Opr tb_offset = load_String_offset(tb_base); ++ ++ LIR_Address* addr_ta = emit_array_address(ta_value, ta_offset, T_CHAR, false); ++ LIR_Address* addr_tb = emit_array_address(tb_value, tb_offset, T_CHAR, false); ++ LIR_Address* addr_a = emit_array_address(a_base, r_a_offset, T_DOUBLE, false); ++ LIR_Address* addr_b = emit_array_address(b_base, r_b_offset, T_DOUBLE, false); ++ LIR_Address* addr_c = emit_array_address(c_base, r_c_offset, T_DOUBLE, false); ++ ++ LIR_Opr tmp = new_pointer_register(); ++ LIR_Opr ta_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_ta), tmp); ++ __ move(tmp, ta_addr); ++ tmp = new_pointer_register(); ++ LIR_Opr tb_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_tb), tmp); ++ __ move(tmp, tb_addr); ++ tmp = new_pointer_register(); ++ LIR_Opr a_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_a), tmp); ++ __ move(tmp, a_addr); ++ tmp = new_pointer_register(); ++ LIR_Opr b_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_b), tmp); ++ __ move(tmp, b_addr); ++ tmp = new_pointer_register(); ++ LIR_Opr c_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_c), tmp); ++ __ move(tmp, c_addr); ++ ++ BasicTypeList signature(13); ++ signature.append(T_ADDRESS); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ signature.append(T_INT); ++ signature.append(T_INT); ++ signature.append(T_DOUBLE); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ signature.append(T_DOUBLE); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ ++ LIR_OprList* args = new LIR_OprList(); ++ args->append(ta_addr); ++ args->append(tb_addr); ++ args->append(r_m); ++ args->append(r_n); ++ args->append(r_k); ++ args->append(r_alpha); ++ args->append(a_addr); ++ args->append(r_lda); ++ args->append(b_addr); ++ args->append(r_ldb); ++ args->append(r_beta); ++ args->append(c_addr); ++ args->append(r_ldc); ++ ++ assert(StubRoutines::dgemmDgemm() != NULL, "invalid stub entry"); ++ call_runtime(&signature, args, StubRoutines::dgemmDgemm(), voidType, NULL); ++ set_no_result(x); ++} + + void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); +@@ -1038,6 +1158,114 @@ void LIRGenerator::do_update_CRC32(Intrinsic* x) { + } + } + ++void LIRGenerator::do_dgemv_dgemv(Intrinsic* x) { ++ assert(x->number_of_arguments() == 14, "wrong type"); ++ ++ LIRItem trans(x->argument_at(0), this); ++ LIRItem m(x->argument_at(1), this); ++ LIRItem n(x->argument_at(2), this); ++ LIRItem alpha(x->argument_at(3), this); ++ LIRItem array_a(x->argument_at(4), this); ++ LIRItem array_a_offset(x->argument_at(5), this); ++ LIRItem lda(x->argument_at(6), this); ++ LIRItem array_x(x->argument_at(7), this); ++ LIRItem array_x_offset(x->argument_at(8), this); ++ LIRItem incx(x->argument_at(9), this); ++ LIRItem beta(x->argument_at(10), this); ++ LIRItem array_y(x->argument_at(11), this); ++ LIRItem array_y_offset(x->argument_at(12), this); ++ LIRItem incy(x->argument_at(13), this); ++ ++ trans.load_item(); ++ m.load_item(); ++ n.load_item(); ++ alpha.load_item(); ++ array_a.load_item(); ++ array_a_offset.load_nonconstant(); ++ lda.load_item(); ++ array_x.load_item(); ++ array_x_offset.load_nonconstant(); ++ incx.load_item(); ++ beta.load_item(); ++ array_y.load_item(); ++ array_y_offset.load_nonconstant(); ++ incy.load_item(); ++ ++ LIR_Opr res_trans_base = trans.result(); ++ LIR_Opr res_m = m.result(); ++ LIR_Opr res_n = n.result(); ++ LIR_Opr res_alpha = alpha.result(); ++ LIR_Opr res_a_base = array_a.result(); ++ LIR_Opr res_a_offset = array_a_offset.result(); ++ LIR_Opr res_lda = lda.result(); ++ LIR_Opr res_x_base = array_x.result(); ++ LIR_Opr res_x_offset = array_x_offset.result(); ++ LIR_Opr res_incx = incx.result(); ++ LIR_Opr res_beta = beta.result(); ++ LIR_Opr res_y_base = array_y.result(); ++ LIR_Opr res_y_offset = array_y_offset.result(); ++ LIR_Opr res_incy = incy.result(); ++ ++ LIR_Opr addr_trans_base = LIRGenerator::load_String_value(res_trans_base); ++ LIR_Opr addr_trans_offset = LIRGenerator::load_String_offset(res_trans_base); ++ LIR_Address* addr_trans = emit_array_address(addr_trans_base, addr_trans_offset, T_CHAR, false); ++ ++ LIR_Address* addr_a = emit_array_address(res_a_base, res_a_offset, T_DOUBLE, false); ++ LIR_Address* addr_x = emit_array_address(res_x_base, res_x_offset, T_DOUBLE, false); ++ LIR_Address* addr_y = emit_array_address(res_y_base, res_y_offset, T_DOUBLE, false); ++ ++ // load addr to register ++ LIR_Opr tmp = new_pointer_register(); ++ LIR_Opr trans_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_trans), tmp); ++ __ move(tmp, trans_addr); ++ ++ LIR_Opr tmp1 = new_pointer_register(); ++ LIR_Opr a_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_a), tmp1); ++ __ move(tmp1, a_addr); ++ ++ LIR_Opr tmp2 = new_pointer_register(); ++ LIR_Opr x_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_x), tmp2); ++ __ move(tmp2, x_addr); ++ ++ LIR_Opr tmp3 = new_pointer_register(); ++ LIR_Opr y_addr = new_register(T_ADDRESS); ++ __ leal(LIR_OprFact::address(addr_y), tmp3); ++ __ move(tmp3, y_addr); ++ ++ BasicTypeList signature(11); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ signature.append(T_INT); ++ signature.append(T_DOUBLE); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ signature.append(T_DOUBLE); ++ signature.append(T_ADDRESS); ++ signature.append(T_INT); ++ ++ LIR_OprList* args = new LIR_OprList(); ++ args->append(trans_addr); ++ args->append(res_m); ++ args->append(res_n); ++ args->append(res_alpha); ++ args->append(a_addr); ++ args->append(res_lda); ++ args->append(x_addr); ++ args->append(res_incx); ++ args->append(res_beta); ++ args->append(y_addr); ++ args->append(res_incy); ++ ++ assert(StubRoutines::dgemvDgemv() != NULL, "invalid stub entry"); ++ call_runtime(&signature, args, StubRoutines::dgemvDgemv(), voidType, NULL); ++ set_no_result(x); ++} ++ + // _i2l, _i2f, _i2d, _l2i, _l2f, _l2d, _f2i, _f2l, _f2d, _d2i, _d2l, _d2f + // _i2b, _i2c, _i2s + void LIRGenerator::do_Convert(Convert* x) { +diff --git a/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp b/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp +index c0aaa1de4..a275a6a99 100644 +--- a/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp ++++ b/hotspot/src/cpu/aarch64/vm/interpreterGenerator_aarch64.hpp +@@ -50,6 +50,11 @@ void generate_transcendental_entry(AbstractInterpreter::MethodKind kind, int fpa + address generate_CRC32_updateBytes_entry(AbstractInterpreter::MethodKind kind); + void lock_method(void); + void generate_stack_overflow_check(void); ++ void load_String_value(Register src, Register dst); ++ void load_String_offset(Register src, Register dst); ++ void emit_array_address(Register src, Register idx, Register dst, BasicType type); ++ address generate_Dgemm_dgemm_entry(); ++ address generate_Dgemv_dgemv_entry(); + + void generate_counter_incr(Label* overflow, Label* profile_method, Label* profile_method_continue); + void generate_counter_overflow(Label* do_continue); +diff --git a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +index c5ec637a1..125983179 100644 +--- a/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/stubGenerator_aarch64.cpp +@@ -3221,6 +3221,44 @@ class StubGenerator: public StubCodeGenerator { + return start; + } + ++ address load_BLAS_library() { ++ // Try to load BLAS library. ++ const char library_name[] = "openblas"; ++ char err_buf[1024] = {0}; ++ char path[JVM_MAXPATHLEN] = {0}; ++ os::jvm_path(path, sizeof(path)); ++ int jvm_offset = -1; ++ ++ // Match "jvm[^/]*" in jvm_path. ++ const char* last_name = strrchr(path, '/'); ++ last_name = last_name ? last_name : path; ++ const char* last_lib_name = strstr(last_name, "jvm"); ++ if (last_lib_name != NULL) { ++ jvm_offset = last_lib_name - path; ++ } ++ ++ address library = NULL; ++ // Find the BLAS shared library. ++ // Search path: /jre/lib///libopenblas.so ++ if (jvm_offset >= 0) { ++ if (jvm_offset + strlen(library_name) + strlen(os::dll_file_extension()) < JVM_MAXPATHLEN) { ++ strncpy(&path[jvm_offset], library_name, strlen(library_name)); ++ strncat(&path[jvm_offset], os::dll_file_extension(), strlen(os::dll_file_extension())); ++ library = (address)os::dll_load(path, err_buf, sizeof(err_buf)); ++ } ++ } ++ return library; ++ } ++ ++ address get_BLAS_func_entry(address library, const char* func_name) { ++ if (library == NULL) { ++ return NULL; ++ } ++ ++ // Try to find BLAS function entry. ++ return (address)os::dll_lookup((void*)library, func_name); ++ } ++ + /** + * Arguments: + * +@@ -3254,6 +3292,218 @@ class StubGenerator: public StubCodeGenerator { + return start; + } + ++ // Parameter conversion from JVM to native BLAS ++ // ++ // Register: ++ // r0: transa r0: transa ++ // r1: transb r1: transb ++ // r2: m r2: &m ++ // r3: n r3: &n ++ // r4: k =========> r4: &k ++ // r5: A r5: &alpha ++ // r6: lda r6: A ++ // r7: B r7: &lda ++ // v0: alpha ++ // v1: beta ++ // ++ // Stack: ++ // |-------| |-------| ++ // | ldc | | ldc | ++ // |-------| |-------| ++ // | C | | C | ++ // |-------| |-------| ++ // | ldb | | ldb | ++ // |-------| <-- sp |-------| ++ // | | | m | ++ // |-------| |-------| ++ // | | | n | ++ // |-------| |-------| ++ // | | | k | ++ // |-------| |-------| ++ // | | | lda | ++ // |-------| |-------| ++ // | | | alpha | ++ // |-------| |-------| ++ // | | | beta | ++ // |-------| =========> |-------| ++ // | | | lr | ++ // |-------| |-------| ++ // | | | rfp | ++ // |-------| |-------| <-- fp ++ // | ... | | ... | ++ // |-------| |-------| ++ // | | | &ldc | ++ // |-------| |-------| ++ // | | | C | ++ // |-------| |-------| ++ // | | | &bata | ++ // |-------| |-------| ++ // | | | &ldb | ++ // |-------| |-------| ++ // | | | B | ++ // |-------| |-------| <-- sp ++ address generate_dgemmDgemm(address library) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "dgemm_dgemm"); ++ ++ address fn = get_BLAS_func_entry(library, "dgemm_"); ++ if (fn == NULL) return NULL; ++ ++ address start = __ pc(); ++ ++ const Register transa = c_rarg0; ++ const Register transb = c_rarg1; ++ const Register m = c_rarg2; ++ const Register n = c_rarg3; ++ const Register k = c_rarg4; ++ const FloatRegister alpha = c_farg0; ++ const Register A = c_rarg5; ++ const Register lda = c_rarg6; ++ const Register B = c_rarg7; ++ const FloatRegister beta = c_farg1; ++ ++ BLOCK_COMMENT("Entry:"); ++ ++ // extend stack ++ __ sub(sp, sp, 0x60); ++ __ stp(rfp, lr, Address(sp, 48)); ++ __ add(rfp, sp, 0x30); ++ // load BLAS function entry ++ __ mov(rscratch1, fn); ++ // C ++ __ ldr(rscratch2, Address(rfp, 56)); ++ // store m / n to stack ++ __ stpw(n, m, Address(rfp, 40)); ++ // &beta ++ __ add(r2, rfp, 0x10); ++ // store k / lda to stack ++ __ stpw(lda, k, Address(rfp, 32)); ++ // load ldc ++ __ add(r3, rfp, 0x40); ++ // store C / &beta ++ __ stp(r2, rscratch2, Address(sp, 16)); ++ // &ldb ++ __ add(r2, rfp, 0x30); ++ // store B ++ __ str(B, Address(sp)); ++ // move A from r5 to r6 ++ __ mov(r6, A); ++ // store ldc ++ __ str(r3, Address(sp, 32)); ++ // &alpha ++ __ add(r5, rfp, 0x18); ++ // store &ldb ++ __ str(r2, Address(sp, 8)); ++ // &k ++ __ add(r4, rfp, 0x24); ++ // store alpha / beta ++ __ stpd(beta, alpha, Address(rfp, 16)); ++ // load &lda to r7 ++ __ add(r7, rfp, 0x20); ++ // load &n ++ __ add(r3, rfp, 0x28); ++ // load &m ++ __ add(r2, rfp, 0x2c); ++ // call dgemm ++ __ blr(rscratch1); ++ ++ // restore rfp and lr ++ __ ldp(rfp, lr, Address(sp, 48)); ++ // exit stack ++ __ add(sp, sp, 0x60); ++ __ ret(lr); ++ ++ return start; ++ } ++ ++ /** ++ * public void dgemv(String trans, int m, int n, ++ * double alpha, double[] a, int lda, ++ * double[] x, int incx, ++ * double beta, double[] y, int incy) ++ * ++ * Arguments: ++ * ++ * Inputs: ++ * c_rarg0 - char* trans ++ * c_rarg1 - int m ++ * c_rarg2 - int n ++ * d0/c_farg0 - double alpha ++ * c_rarg3 - double[] a ++ * c_rarg4 - int lda ++ * c_rarg5 - double[] x ++ * c_rarg6 - int incx ++ * d1/c_farg1 - double beta ++ * c_rarg7 - double[] y ++ * [sp] - int incy ++ * ++ * Output: ++ * null ++ * ++ */ ++ ++ address generate_dgemvDgemv(address library) { ++ __ align(CodeEntryAlignment); ++ StubCodeMark mark(this, "StubRoutines", "dgemv_dgemv"); ++ ++ address fn = get_BLAS_func_entry(library, "dgemv_"); ++ if (fn == NULL) return NULL; ++ ++ address start = __ pc(); ++ BLOCK_COMMENT("Entry: "); ++ ++ Register trans = c_rarg0; ++ Register m = c_rarg1; ++ Register n = c_rarg2; ++ Register a = c_rarg3; ++ Register lda = c_rarg4; ++ Register x = c_rarg5; ++ Register incx = c_rarg6; ++ Register y = c_rarg7; ++ ++ FloatRegister alpha = c_farg0; ++ FloatRegister beta = c_farg1; ++ ++ __ sub(sp, sp, 0x50); ++ __ stp(rfp, lr, Address(sp, 32)); ++ __ add(rfp, sp, 0x20); ++ ++ // no need for saving trans to tmp register, keep it in register x0 ++ __ strw(m, Address(rfp, 44)); ++ __ strw(n, Address(rfp, 40)); ++ __ strd(alpha, Address(rfp, 32)); ++ __ strw(lda, Address(rfp, 28)); ++ __ strw(incx, Address(rfp, 24)); ++ __ strd(beta, Address(rfp, 16)); ++ ++ // pre call ++ // load incy and push on stack, order incy --> y --> beta ++ __ add(r1, rfp, 0x30); ++ __ str(r1, Address(sp, 16)); ++ __ str(y, Address(sp, 8)); ++ __ add(r1, rfp, 0x10); ++ __ str(r1, Address(sp)); ++ ++ __ add(r7, rfp, 0x18); ++ __ mov(r6, x); ++ __ add(r5, rfp, 0x1c); ++ __ mov(r4, a); ++ __ add(r3, rfp, 0x20); ++ __ add(r2, rfp, 0x28); ++ __ add(r1, rfp, 0x2c); ++ ++ __ mov(rscratch1, fn); ++ __ blr(rscratch1); ++ ++ __ ldp(rfp, lr, Address(sp, 32)); ++ __ add(sp, sp, 0x50); ++ __ ret(lr); ++ ++ return start; ++ } ++ ++ ++ + /** + * Arguments: + * +@@ -4252,6 +4502,14 @@ class StubGenerator: public StubCodeGenerator { + StubRoutines::_crc_table_adr = (address)StubRoutines::aarch64::_crc_table; + StubRoutines::_updateBytesCRC32 = generate_updateBytesCRC32(); + } ++ ++ if (UseF2jBLASIntrinsics) { ++ StubRoutines::_BLAS_library = load_BLAS_library(); ++ // F2jBLAS intrinsics will use the implements in BLAS dynamic library ++ StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS(); ++ StubRoutines::_dgemmDgemm = generate_dgemmDgemm(StubRoutines::_BLAS_library); ++ StubRoutines::_dgemvDgemv = generate_dgemvDgemv(StubRoutines::_BLAS_library); ++ } + } + + void generate_all() { +@@ -4296,10 +4554,6 @@ class StubGenerator: public StubCodeGenerator { + StubRoutines::_montgomerySquare = g.generate_multiply(); + } + +- if (UseF2jBLASIntrinsics) { +- StubRoutines::_ddotF2jBLAS = generate_ddotF2jBLAS(); +- } +- + if (UseAESIntrinsics) { + StubRoutines::_aescrypt_encryptBlock = generate_aescrypt_encryptBlock(); + StubRoutines::_aescrypt_decryptBlock = generate_aescrypt_decryptBlock(); +diff --git a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp +index ae5cb3f32..924b6670f 100644 +--- a/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp ++++ b/hotspot/src/cpu/aarch64/vm/templateInterpreter_aarch64.cpp +@@ -856,6 +856,250 @@ address InterpreterGenerator::generate_CRC32_updateBytes_entry(AbstractInterpret + return generate_native_entry(false); + } + ++// Access the char-array of String ++void InterpreterGenerator::load_String_value(Register src, Register dst) { ++ // Need to cooperate with JDK-8243996 ++ int value_offset = java_lang_String::value_offset_in_bytes(); ++ ++ __ add(src, src, value_offset); ++ __ load_heap_oop(dst, Address(src)); ++} ++ ++void InterpreterGenerator::load_String_offset(Register src, Register dst) { ++ __ mov(dst, 0); ++ ++ // Get String value offset, because of order of initialization for Interpreter, ++ // we have to hardcode the offset for String value. (JDK-8243996) ++ if (java_lang_String::has_offset_field()) { ++ int offset_offset = java_lang_String::offset_offset_in_bytes(); ++ __ add(src, src, offset_offset); ++ __ ldrw(dst, Address(src)); ++ } ++} ++ ++void InterpreterGenerator::emit_array_address(Register src, Register idx, ++ Register dst, BasicType type) { ++ int offset_in_bytes = arrayOopDesc::base_offset_in_bytes(type); ++ int elem_size = type2aelembytes(type); ++ int shift = exact_log2(elem_size); ++ ++ __ lsl(idx, idx, shift); ++ __ add(idx, idx, offset_in_bytes); ++ __ add(dst, src, idx); ++} ++ ++/** ++ * Stub Arguments: ++ * ++ * c_rarg0 - char* transa ++ * c_rarg1 - char* transb ++ * c_rarg2 - int m ++ * c_rarg3 - int n ++ * c_rarg4 - int k ++ * d0 - double alpha ++ * c_rarg5 - double[] A ++ * c_rarg6 - int lda ++ * c_rarg7 - double[] B ++ * d1 - double beta ++ * [sp + 16] - int ldc ++ * [sp + 8] - double[] C ++ * [sp] - int ldb ++ * ++ */ ++address InterpreterGenerator::generate_Dgemm_dgemm_entry() { ++ if (!UseF2jBLASIntrinsics || (StubRoutines::dgemmDgemm() == NULL)) return NULL; ++ ++ address entry = __ pc(); ++ ++ // r13: senderSP must preserved for slow path ++ ++ // Arguments are reversed on java expression stack ++ const Register ta = c_rarg0; ++ const Register tb = c_rarg1; ++ const Register m = c_rarg2; ++ const Register n = c_rarg3; ++ const Register k = c_rarg4; ++ const FloatRegister alpha = c_farg0; ++ const Register A = c_rarg5; ++ const Register lda = c_rarg6; ++ const Register B = c_rarg7; ++ const FloatRegister beta = c_farg1; ++ const Register tmp1 = rscratch1; ++ const Register tmp2 = rscratch2; ++ ++ // trana ++ __ ldr(ta, Address(esp, 17 * wordSize)); ++ load_String_value(ta, tmp1); ++ load_String_offset(ta, tmp2); ++ emit_array_address(tmp1, tmp2, ta, T_CHAR); ++ // tranb ++ __ ldr(tb, Address(esp, 16 * wordSize)); ++ load_String_value(tb, tmp1); ++ load_String_offset(tb, tmp2); ++ emit_array_address(tmp1, tmp2, tb, T_CHAR); ++ // m, n, k ++ __ ldrw(m, Address(esp, 15 * wordSize)); ++ __ ldrw(n, Address(esp, 14 * wordSize)); ++ __ ldrw(k, Address(esp, 13 * wordSize)); ++ // alpha ++ __ ldrd(alpha, Address(esp, 11 * wordSize)); ++ // A ++ __ ldr(tmp1, Address(esp, 10 * wordSize)); ++ __ mov(tmp2, 0); ++ __ ldrw(tmp2, Address(esp, 9 * wordSize)); ++ emit_array_address(tmp1, tmp2, A, T_DOUBLE); ++ // lda ++ __ ldrw(lda, Address(esp, 8 * wordSize)); ++ // B ++ __ ldr(tmp1, Address(esp, 7 * wordSize)); ++ __ ldrw(tmp2, Address(esp, 6 * wordSize)); ++ emit_array_address(tmp1, tmp2, B, T_DOUBLE); ++ // beta ++ __ ldrd(beta, Address(esp, 3 * wordSize)); ++ // Start pushing arguments to machine stack. ++ // ++ // Remove the incoming args, peeling the machine SP back to where it ++ // was in the caller. This is not strictly necessary, but unless we ++ // do so the stack frame may have a garbage FP; this ensures a ++ // correct call stack that we can always unwind. The ANDR should be ++ // unnecessary because the sender SP in r13 is always aligned, but ++ // it doesn't hurt. ++ __ andr(sp, r13, -16); ++ __ str(lr, Address(sp, -wordSize)); ++ // ldc ++ __ ldrw(tmp1, Address(esp, 0x0)); ++ __ strw(tmp1, Address(sp, 2 * -wordSize)); ++ // C ++ __ ldr(tmp1, Address(esp, 2 * wordSize)); ++ __ ldrw(tmp2, Address(esp, wordSize)); ++ emit_array_address(tmp1, tmp2, tmp1, T_DOUBLE); ++ __ str(tmp1, Address(sp, 3 * -wordSize)); ++ // ldb ++ __ ldrw(tmp2, Address(esp, 5 * wordSize)); ++ __ strw(tmp2, Address(sp, 4 * -wordSize)); ++ ++ // Call function ++ __ add(sp, sp, 4 * -wordSize); ++ address fn = CAST_FROM_FN_PTR(address, StubRoutines::dgemmDgemm()); ++ __ mov(tmp1, fn); ++ __ blr(tmp1); ++ ++ __ ldr(lr, Address(sp, 3 * wordSize)); ++ // For assert(Rd != sp || imm % 16 == 0) ++ __ add(sp, sp, 4 * wordSize); ++ __ br(lr); ++ ++ return entry; ++} ++ ++address InterpreterGenerator::generate_Dgemv_dgemv_entry() { ++ if (StubRoutines::dgemvDgemv() == NULL) return NULL; ++ address entry = __ pc(); ++ ++ const Register trans = c_rarg0; // trans ++ const Register m = c_rarg1; // m ++ const Register n = c_rarg2; // n ++ const Register a = c_rarg3; // array a addr ++ const Register lda = c_rarg4; // lda ++ const Register x = c_rarg5; // array x addr ++ const Register incx = c_rarg6; // incx ++ const Register y = c_rarg7; // array y addr ++ ++ const FloatRegister alpha = v0; // alpha ++ const FloatRegister beta = v1; // beta ++ ++ const Register tmp1 = rscratch1; ++ const Register tmp2 = rscratch2; ++ ++ // esp: expression stack of caller ++ // dgemv parameter ---> the position in stack ---> move to register ++ // | char* trans | | esp + 15 | | r0 | ++ // | int m | | esp + 14 | | r1 | ++ // | int n | | esp + 13 | | r2 | ++ // | double alpha | | esp + 11 | | v0 | ++ // ---------------- ------------ -------- ++ // | double* a | | esp + 10 | | | ++ // | | | | | r3 | ++ // | int a_offset | | esp + 9 | | | ++ // ---------------- ------------ -------- ++ // | int lda | | esp + 8 | | r4 | ++ // ---------------- ------------ -------- ++ // | double* x | | esp + 7 | | | ++ // | | | | | r5 | ++ // | int x_offset | | esp + 6 | | | ++ // ---------------- ------------ -------- ++ // | int incx | | esp + 5 | | r6 | ++ // | double beta | | esp + 3 | | v1 | ++ // ---------------- ------------ -------- ++ // | double* y | | esp + 2 | | | ++ // | | | | | r7 | ++ // | int y_offset | | esp + 1 | | | ++ // ---------------- ------------ -------- ++ // | int incy | | esp | | [sp] | ++ ++ ++ // trans ++ __ ldr(trans, Address(esp, 15 * wordSize)); ++ load_String_value(trans, tmp1); ++ load_String_offset(trans, tmp2); ++ emit_array_address(tmp1, tmp2, trans, T_CHAR); ++ // m, n ++ __ ldrw(m, Address(esp, 14 * wordSize)); ++ __ ldrw(n, Address(esp, 13 * wordSize)); ++ ++ // alpha ++ __ ldrd(alpha, Address(esp, 11 * wordSize)); ++ ++ // a ++ __ ldr(tmp1, Address(esp, 10 * wordSize)); ++ __ mov(tmp2, zr); ++ __ ldrw(tmp2, Address(esp, 9 * wordSize)); ++ emit_array_address(tmp1, tmp2, a, T_DOUBLE); ++ ++ // lda ++ __ ldrw(lda, Address(esp, 8 * wordSize)); ++ ++ // x ++ __ ldr(tmp1, Address(esp, 7 * wordSize)); ++ __ mov(tmp2, zr); ++ __ ldrw(tmp2, Address(esp, 6 * wordSize)); ++ emit_array_address(tmp1, tmp2, x, T_DOUBLE); ++ ++ // incx ++ __ ldrw(incx, Address(esp, 5 * wordSize)); ++ ++ // beta ++ __ ldrd(beta, Address(esp, 3 * wordSize)); ++ ++ // y ++ __ ldr(tmp1, Address(esp, 2 * wordSize)); ++ __ mov(tmp2, zr); ++ __ ldrw(tmp2, Address(esp, wordSize)); ++ emit_array_address(tmp1, tmp2, y, T_DOUBLE); ++ ++ // resume sp, restore lr ++ __ andr(sp, r13, -16); ++ __ str(lr, Address(sp, -wordSize)); ++ ++ // incy, push on stack ++ __ ldrw(tmp1, Address(esp, 0)); ++ __ strw(tmp1, Address(sp, 2 * -wordSize)); ++ ++ __ add(sp, sp, -2 * wordSize); ++ ++ // call function ++ address fn = CAST_FROM_FN_PTR(address, StubRoutines::dgemvDgemv()); ++ __ mov(tmp1, fn); ++ __ blr(tmp1); ++ ++ // resume lr ++ __ ldr(lr, Address(sp, wordSize)); ++ __ add(sp, sp, 2 * wordSize); ++ __ br(lr); ++ ++ return entry; ++} ++ + void InterpreterGenerator::bang_stack_shadow_pages(bool native_call) { + // Bang each page in the shadow zone. We can't assume it's been done for + // an interpreter frame with greater than a page of locals, so each page +@@ -1575,6 +1819,10 @@ address AbstractInterpreterGenerator::generate_method_entry( + : // fall thru + case Interpreter::java_util_zip_CRC32_updateByteBuffer + : entry_point = ((InterpreterGenerator*)this)->generate_CRC32_updateBytes_entry(kind); break; ++ case Interpreter::org_netlib_blas_Dgemm_dgemm ++ : entry_point = ((InterpreterGenerator*)this)->generate_Dgemm_dgemm_entry(); break; ++ case Interpreter::org_netlib_blas_Dgemv_dgemv ++ : entry_point = ((InterpreterGenerator*)this)->generate_Dgemv_dgemv_entry(); break; + default : ShouldNotReachHere(); break; + } + +diff --git a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp +index f1160792a..477c6e550 100644 +--- a/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp ++++ b/hotspot/src/cpu/sparc/vm/c1_LIRGenerator_sparc.cpp +@@ -754,6 +754,13 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + } + } + ++void LIRGenerator::do_dgemm_dgemm(Intrinsic* x) { ++ fatal("BLAS intrinsics are not implemented on this platform!"); ++} ++ ++void LIRGenerator::do_dgemv_dgemv(Intrinsic* x) { ++ fatal("BLAS intrinsics are not implemented on this platform!"); ++} + + void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); +diff --git a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp +index dd23f005b..d1ecbaeb4 100644 +--- a/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp ++++ b/hotspot/src/cpu/x86/vm/c1_LIRGenerator_x86.cpp +@@ -896,6 +896,13 @@ void LIRGenerator::do_MathIntrinsic(Intrinsic* x) { + } + } + ++void LIRGenerator::do_dgemm_dgemm(Intrinsic* x) { ++ fatal("BLAS intrinsics are not implemented on this platform!"); ++} ++ ++void LIRGenerator::do_dgemv_dgemv(Intrinsic *x) { ++ fatal("Blas intrinsics are not implemented on this platform!"); ++} + + void LIRGenerator::do_ArrayCopy(Intrinsic* x) { + assert(x->number_of_arguments() == 5, "wrong type"); +diff --git a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp +index 459315cb7..79b2b2bb1 100644 +--- a/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp ++++ b/hotspot/src/share/vm/c1/c1_GraphBuilder.cpp +@@ -3672,6 +3672,20 @@ bool GraphBuilder::try_inline_intrinsics(ciMethod* callee) { + case vmIntrinsics::_fullFence : + break; + ++ case vmIntrinsics::_dgemm_dgemm: ++ if (!UseF2jBLASIntrinsics || (StubRoutines::dgemmDgemm() == NULL)) { ++ return false; ++ } ++ cantrap = false; ++ preserves_state = true; ++ break; ++ ++ case vmIntrinsics::_dgemv_dgemv: ++ if (!UseF2jBLASIntrinsics || (StubRoutines::dgemvDgemv() == NULL)) return false; ++ cantrap = false; ++ preserves_state = true; ++ break; ++ + default : return false; // do not inline + } + // create intrinsic node +diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +index 65c04e3e5..070fd8052 100644 +--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp ++++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.cpp +@@ -1208,7 +1208,7 @@ void LIRGenerator::do_Return(Return* x) { + set_no_result(x); + } + +-// Examble: ref.get() ++// Example: ref.get() + // Combination of LoadField and g1 pre-write barrier + void LIRGenerator::do_Reference_get(Intrinsic* x) { + +@@ -1220,7 +1220,7 @@ void LIRGenerator::do_Reference_get(Intrinsic* x) { + LIRItem reference(x->argument_at(0), this); + reference.load_item(); + +- // need to perform the null check on the reference objecy ++ // need to perform the null check on the reference object + CodeEmitInfo* info = NULL; + if (x->needs_null_check()) { + info = state_for(x); +@@ -1422,6 +1422,44 @@ LIR_Opr LIRGenerator::load_constant(LIR_Const* c) { + return result; + } + ++// Access the char-array of String ++LIR_Opr LIRGenerator::load_String_value(LIR_Opr str) { ++ int value_offset = java_lang_String::value_offset_in_bytes(); ++ LIR_Opr value = new_register(T_ARRAY); ++ LIR_Opr tmp = new_pointer_register(); ++ ++ __ add(str, LIR_OprFact::intConst(value_offset), tmp); ++ LIR_Address* array_addr = new LIR_Address(tmp, T_ARRAY); ++#if INCLUDE_ALL_GCS ++ if (UseShenandoahGC) { ++ LIR_Opr tmp = new_register(T_OBJECT); ++ LIR_Opr addr = ShenandoahBarrierSet::barrier_set()->bsc1()->resolve_address(this, array_addr, T_OBJECT, NULL); ++ __ load(addr->as_address_ptr(), tmp); ++ tmp = ShenandoahBarrierSet::barrier_set()->bsc1()->load_reference_barrier(this, tmp, addr); ++ __ move(tmp, value); ++ } else ++#endif ++ __ load(array_addr, value); ++ ++ return value; ++} ++ ++LIR_Opr LIRGenerator::load_String_offset(LIR_Opr str) { ++ LIR_Opr offset = new_register(T_INT); ++ ++ if (java_lang_String::has_offset_field()) { ++ LIR_Opr tmp = new_pointer_register(); ++ int offset_offset = java_lang_String::offset_offset_in_bytes(); ++ __ add(str, LIR_OprFact::intConst(offset_offset), tmp); ++ LIR_Address* addr = new LIR_Address(tmp, T_INT); ++ __ load(addr, offset); ++ } else { ++ offset = LIR_OprFact::intConst(0); ++ } ++ ++ return offset; ++} ++ + // Various barriers + + void LIRGenerator::pre_barrier(LIR_Opr addr_opr, LIR_Opr pre_val, +@@ -3290,6 +3328,14 @@ void LIRGenerator::do_Intrinsic(Intrinsic* x) { + do_update_CRC32(x); + break; + ++ case vmIntrinsics::_dgemm_dgemm: ++ do_dgemm_dgemm(x); ++ break; ++ ++ case vmIntrinsics::_dgemv_dgemv: ++ do_dgemv_dgemv(x); ++ break; ++ + default: ShouldNotReachHere(); break; + } + } +diff --git a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp +index 24d072b36..57d675c5b 100644 +--- a/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp ++++ b/hotspot/src/share/vm/c1/c1_LIRGenerator.hpp +@@ -210,6 +210,10 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { + // Given an immediate value, return an operand usable in logical ops. + LIR_Opr load_immediate(int x, BasicType type); + ++ // Get String value and offset ++ LIR_Opr load_String_value(LIR_Opr str); ++ LIR_Opr load_String_offset(LIR_Opr str); ++ + void set_result(Value x, LIR_Opr opr) { + assert(opr->is_valid(), "must set to valid value"); + assert(x->operand()->is_illegal(), "operand should never change"); +@@ -251,6 +255,8 @@ class LIRGenerator: public InstructionVisitor, public BlockClosure { + void do_FPIntrinsics(Intrinsic* x); + void do_Reference_get(Intrinsic* x); + void do_update_CRC32(Intrinsic* x); ++ void do_dgemm_dgemm(Intrinsic* x); ++ void do_dgemv_dgemv(Intrinsic* x); + + void do_UnsafePrefetch(UnsafePrefetch* x, bool is_store); + +diff --git a/hotspot/src/share/vm/c1/c1_Runtime1.cpp b/hotspot/src/share/vm/c1/c1_Runtime1.cpp +index f379a0395..3ece7f6ea 100644 +--- a/hotspot/src/share/vm/c1/c1_Runtime1.cpp ++++ b/hotspot/src/share/vm/c1/c1_Runtime1.cpp +@@ -305,6 +305,8 @@ const char* Runtime1::name_for_address(address entry) { + FUNCTION_CASE(entry, JFR_TIME_FUNCTION); + #endif + FUNCTION_CASE(entry, StubRoutines::updateBytesCRC32()); ++ FUNCTION_CASE(entry, StubRoutines::dgemmDgemm()); ++ FUNCTION_CASE(entry, StubRoutines::dgemvDgemv()); + + #undef FUNCTION_CASE + +diff --git a/hotspot/src/share/vm/classfile/vmSymbols.cpp b/hotspot/src/share/vm/classfile/vmSymbols.cpp +index a5f89dbf8..34514022a 100644 +--- a/hotspot/src/share/vm/classfile/vmSymbols.cpp ++++ b/hotspot/src/share/vm/classfile/vmSymbols.cpp +@@ -333,6 +333,8 @@ bool vmIntrinsics::should_be_pinned(vmIntrinsics::ID id) { + #endif + case vmIntrinsics::_currentTimeMillis: + case vmIntrinsics::_nanoTime: ++ case vmIntrinsics::_dgemm_dgemm: ++ case vmIntrinsics::_dgemv_dgemv: + return true; + default: + return false; +diff --git a/hotspot/src/share/vm/classfile/vmSymbols.hpp b/hotspot/src/share/vm/classfile/vmSymbols.hpp +index 6bd8dbedd..942d172a1 100644 +--- a/hotspot/src/share/vm/classfile/vmSymbols.hpp ++++ b/hotspot/src/share/vm/classfile/vmSymbols.hpp +@@ -857,6 +857,14 @@ + do_intrinsic(_f2jblas_ddot, com_github_fommil_netlib_f2jblas, ddot_name, ddot_signature, F_R) \ + do_name( ddot_name, "ddot") \ + do_signature(ddot_signature, "(I[DI[DI)D") \ ++ do_class(org_netlib_blas_dgemm, "org/netlib/blas/Dgemm") \ ++ do_intrinsic(_dgemm_dgemm, org_netlib_blas_dgemm, dgemm_name, dgemm_signature, F_S) \ ++ do_name( dgemm_name, "dgemm") \ ++ do_signature(dgemm_signature, "(Ljava/lang/String;Ljava/lang/String;IIID[DII[DIID[DII)V") \ ++ do_class(org_netlib_blas_dgemv, "org/netlib/blas/Dgemv") \ ++ do_intrinsic(_dgemv_dgemv, org_netlib_blas_dgemv, dgemv_name, dgemv_signature, F_S) \ ++ do_name( dgemv_name, "dgemv") \ ++ do_signature(dgemv_signature, "(Ljava/lang/String;IID[DII[DIID[DII)V") \ + \ + /* support for sun.security.provider.SHA2 */ \ + do_class(sun_security_provider_sha2, "sun/security/provider/SHA2") \ +diff --git a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp +index e14c50bf0..293382b3c 100644 +--- a/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/abstractInterpreter.hpp +@@ -100,6 +100,8 @@ class AbstractInterpreter: AllStatic { + java_util_zip_CRC32_update, // implementation of java.util.zip.CRC32.update() + java_util_zip_CRC32_updateBytes, // implementation of java.util.zip.CRC32.updateBytes() + java_util_zip_CRC32_updateByteBuffer, // implementation of java.util.zip.CRC32.updateByteBuffer() ++ org_netlib_blas_Dgemm_dgemm, // implementation of org.netlib.blas.Dgemm.dgemm() ++ org_netlib_blas_Dgemv_dgemv, // implementation of org.netlib.blas.Dgemv.dgemv() + number_of_method_entries, + invalid = -1 + }; +diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.cpp b/hotspot/src/share/vm/interpreter/cppInterpreter.cpp +index 0007aa8be..9e48a1d94 100644 +--- a/hotspot/src/share/vm/interpreter/cppInterpreter.cpp ++++ b/hotspot/src/share/vm/interpreter/cppInterpreter.cpp +@@ -31,17 +31,20 @@ + #ifdef CC_INTERP + # define __ _masm-> + +-void CppInterpreter::initialize() { ++void CppInterpreter::initialize_stub() { + if (_code != NULL) return; ++ int code_size = InterpreterCodeSize; ++ NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space ++ _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL, ++ "Interpreter"); ++} ++ ++void CppInterpreter::initialize_code() { + AbstractInterpreter::initialize(); + + // generate interpreter + { ResourceMark rm; + TraceTime timer("Interpreter generation", TraceStartupTime); +- int code_size = InterpreterCodeSize; +- NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space +- _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL, +- "Interpreter"); + InterpreterGenerator g(_code); + if (PrintInterpreter) print(); + } +diff --git a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp +index 6a6447503..58efcfaf2 100644 +--- a/hotspot/src/share/vm/interpreter/cppInterpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/cppInterpreter.hpp +@@ -54,7 +54,8 @@ class CppInterpreter: public AbstractInterpreter { + + public: + // Initialization/debugging +- static void initialize(); ++ static void initialize_stub(); ++ static void initialize_code(); + // this only returns whether a pc is within generated code for the interpreter. + + // This is a moderately dubious interface for the c++ interpreter. Only +diff --git a/hotspot/src/share/vm/interpreter/interpreter.cpp b/hotspot/src/share/vm/interpreter/interpreter.cpp +index 7ce4bdbb3..a313f2e63 100644 +--- a/hotspot/src/share/vm/interpreter/interpreter.cpp ++++ b/hotspot/src/share/vm/interpreter/interpreter.cpp +@@ -85,8 +85,6 @@ void InterpreterCodelet::print_on(outputStream* st) const { + // Implementation of platform independent aspects of Interpreter + + void AbstractInterpreter::initialize() { +- if (_code != NULL) return; +- + // make sure 'imported' classes are initialized + if (CountBytecodes || TraceBytecodes || StopInterpreterAt) BytecodeCounter::reset(); + if (PrintBytecodeHistogram) BytecodeHistogram::reset(); +@@ -114,8 +112,22 @@ void AbstractInterpreter::print() { + } + + +-void interpreter_init() { +- Interpreter::initialize(); ++// The reason that interpreter initialization is split into two parts is that the first part ++// needs to run before methods are loaded (which with CDS implies linked also), and the other ++// part needs to run after. The reason is that when methods are loaded (with CDS) or linked ++// (without CDS), the i2c adapters are generated that assert we are currently in the interpreter. ++// Asserting that requires knowledge about where the interpreter is in memory. Therefore, ++// establishing the interpreter address must be done before methods are loaded. However, ++// we would like to actually generate the interpreter after methods are loaded. That allows ++// us to remove otherwise hardcoded offsets regarding fields that are needed in the interpreter ++// code. This leads to a split if 1. reserving the memory for the interpreter, 2. loading methods ++// and 3. generating the interpreter. ++void interpreter_init_stub() { ++ Interpreter::initialize_stub(); ++} ++ ++void interpreter_init_code() { ++ Interpreter::initialize_code(); + #ifndef PRODUCT + if (TraceBytecodes) BytecodeTracer::set_closure(BytecodeTracer::std_closure()); + #endif // PRODUCT +@@ -251,6 +263,13 @@ AbstractInterpreter::MethodKind AbstractInterpreter::method_kind(methodHandle m) + return java_lang_ref_reference_get; + } + ++ if (UseF2jBLASIntrinsics) { ++ switch (m->intrinsic_id()) { ++ case vmIntrinsics::_dgemm_dgemm: return org_netlib_blas_Dgemm_dgemm; ++ case vmIntrinsics::_dgemv_dgemv: return org_netlib_blas_Dgemv_dgemv; ++ } ++ } ++ + // Accessor method? + if (m->is_accessor()) { + assert(m->size_of_parameters() == 1, "fast code for accessors assumes parameter size = 1"); +@@ -311,6 +330,8 @@ void AbstractInterpreter::print_method_kind(MethodKind kind) { + case java_util_zip_CRC32_update : tty->print("java_util_zip_CRC32_update"); break; + case java_util_zip_CRC32_updateBytes : tty->print("java_util_zip_CRC32_updateBytes"); break; + case java_util_zip_CRC32_updateByteBuffer : tty->print("java_util_zip_CRC32_updateByteBuffer"); break; ++ case org_netlib_blas_Dgemm_dgemm : tty->print("org_netlib_blas_Dgemm_dgemm"); break; ++ case org_netlib_blas_Dgemv_dgemv : tty->print("org_netlib_blas_Dgemv_dgemv"); break; + default: + if (kind >= method_handle_invoke_FIRST && + kind <= method_handle_invoke_LAST) { +diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp +index 1520c7b1c..f38f05117 100644 +--- a/hotspot/src/share/vm/interpreter/templateInterpreter.cpp ++++ b/hotspot/src/share/vm/interpreter/templateInterpreter.cpp +@@ -32,12 +32,20 @@ + + # define __ _masm-> + +-void TemplateInterpreter::initialize() { ++void TemplateInterpreter::initialize_stub() { + if (_code != NULL) return; + // assertions + assert((int)Bytecodes::number_of_codes <= (int)DispatchTable::length, + "dispatch table too small"); + ++ // allocate interpreter ++ int code_size = InterpreterCodeSize; ++ NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space ++ _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL, ++ "Interpreter"); ++} ++ ++void TemplateInterpreter::initialize_code() { + AbstractInterpreter::initialize(); + + TemplateTable::initialize(); +@@ -45,10 +53,6 @@ void TemplateInterpreter::initialize() { + // generate interpreter + { ResourceMark rm; + TraceTime timer("Interpreter generation", TraceStartupTime); +- int code_size = InterpreterCodeSize; +- NOT_PRODUCT(code_size *= 4;) // debug uses extra interpreter code space +- _code = new StubQueue(new InterpreterCodeletInterface, code_size, NULL, +- "Interpreter"); + InterpreterGenerator g(_code); + if (PrintInterpreter) print(); + } +@@ -401,6 +405,11 @@ void TemplateInterpreterGenerator::generate_all() { + method_entry(java_util_zip_CRC32_updateByteBuffer) + } + ++ if (UseF2jBLASIntrinsics) { ++ method_entry(org_netlib_blas_Dgemm_dgemm) ++ method_entry(org_netlib_blas_Dgemv_dgemv) ++ } ++ + initialize_method_handle_entries(); + + // all native method kinds (must be one contiguous block) +diff --git a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp +index 5f76dca8a..96da6353c 100644 +--- a/hotspot/src/share/vm/interpreter/templateInterpreter.hpp ++++ b/hotspot/src/share/vm/interpreter/templateInterpreter.hpp +@@ -132,7 +132,8 @@ class TemplateInterpreter: public AbstractInterpreter { + + public: + // Initialization/debugging +- static void initialize(); ++ static void initialize_stub(); ++ static void initialize_code(); + // this only returns whether a pc is within generated code for the interpreter. + static bool contains(address pc) { return _code != NULL && _code->contains(pc); } + +diff --git a/hotspot/src/share/vm/opto/escape.cpp b/hotspot/src/share/vm/opto/escape.cpp +index 68631dbf2..0e0cc1028 100644 +--- a/hotspot/src/share/vm/opto/escape.cpp ++++ b/hotspot/src/share/vm/opto/escape.cpp +@@ -979,7 +979,9 @@ void ConnectionGraph::process_call_arguments(CallNode *call) { + strcmp(call->as_CallLeaf()->_name, "mulAdd") == 0 || + strcmp(call->as_CallLeaf()->_name, "montgomery_multiply") == 0 || + strcmp(call->as_CallLeaf()->_name, "montgomery_square") == 0 || +- strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0) ++ strcmp(call->as_CallLeaf()->_name, "f2jblas_ddot") == 0 || ++ strcmp(call->as_CallLeaf()->_name, "dgemm_dgemm") == 0) || ++ strcmp(call->as_CallLeaf()->_name, "dgemv_dgemv") == 0 + ))) { + call->dump(); + fatal(err_msg_res("EA unexpected CallLeaf %s", call->as_CallLeaf()->_name)); +diff --git a/hotspot/src/share/vm/opto/graphKit.cpp b/hotspot/src/share/vm/opto/graphKit.cpp +index 41a067ce2..1c3bc2e8c 100644 +--- a/hotspot/src/share/vm/opto/graphKit.cpp ++++ b/hotspot/src/share/vm/opto/graphKit.cpp +@@ -2372,7 +2372,11 @@ Node* GraphKit::make_runtime_call(int flags, + Node* parm0, Node* parm1, + Node* parm2, Node* parm3, + Node* parm4, Node* parm5, +- Node* parm6, Node* parm7) { ++ Node* parm6, Node* parm7, ++ Node* parm8, Node* parm9, ++ Node* parm10, Node* parm11, ++ Node* parm12, Node* parm13, ++ Node* parm14, Node* parm15) { + // Slow-path call + bool is_leaf = !(flags & RC_NO_LEAF); + bool has_io = (!is_leaf && !(flags & RC_NO_IO)); +@@ -2415,7 +2419,15 @@ Node* GraphKit::make_runtime_call(int flags, + if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5); + if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6); + if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7); +- /* close each nested if ===> */ } } } } } } } } ++ if (parm8 != NULL) { call->init_req(TypeFunc::Parms+8, parm8); ++ if (parm9 != NULL) { call->init_req(TypeFunc::Parms+9, parm9); ++ if (parm10 != NULL) { call->init_req(TypeFunc::Parms+10, parm10); ++ if (parm11 != NULL) { call->init_req(TypeFunc::Parms+11, parm11); ++ if (parm12 != NULL) { call->init_req(TypeFunc::Parms+12, parm12); ++ if (parm13 != NULL) { call->init_req(TypeFunc::Parms+13, parm13); ++ if (parm14 != NULL) { call->init_req(TypeFunc::Parms+14, parm14); ++ if (parm15 != NULL) { call->init_req(TypeFunc::Parms+15, parm15); ++ /* close each nested if ===> */ } } } } } } } } } } } } } } } } + assert(call->in(call->req()-1) != NULL, "must initialize all parms"); + + if (!is_leaf) { +diff --git a/hotspot/src/share/vm/opto/graphKit.hpp b/hotspot/src/share/vm/opto/graphKit.hpp +index 7a363fd33..e9a061acf 100644 +--- a/hotspot/src/share/vm/opto/graphKit.hpp ++++ b/hotspot/src/share/vm/opto/graphKit.hpp +@@ -818,7 +818,11 @@ class GraphKit : public Phase { + Node* parm0 = NULL, Node* parm1 = NULL, + Node* parm2 = NULL, Node* parm3 = NULL, + Node* parm4 = NULL, Node* parm5 = NULL, +- Node* parm6 = NULL, Node* parm7 = NULL); ++ Node* parm6 = NULL, Node* parm7 = NULL, ++ Node* parm8 = NULL, Node* parm9 = NULL, ++ Node* parm10 = NULL, Node* parm11 = NULL, ++ Node* parm12 = NULL, Node* parm13 = NULL, ++ Node* parm14 = NULL, Node* parm15 = NULL); + enum { // flag values for make_runtime_call + RC_NO_FP = 1, // CallLeafNoFPNode + RC_NO_IO = 2, // do not hook IO edges +diff --git a/hotspot/src/share/vm/opto/library_call.cpp b/hotspot/src/share/vm/opto/library_call.cpp +index 5cbc0f012..10eeea217 100644 +--- a/hotspot/src/share/vm/opto/library_call.cpp ++++ b/hotspot/src/share/vm/opto/library_call.cpp +@@ -336,6 +336,8 @@ class LibraryCallKit : public GraphKit { + bool inline_montgomeryMultiply(); + bool inline_montgomerySquare(); + bool inline_ddotF2jBLAS(); ++ bool inline_dgemmDgemm(); ++ bool inline_dgemvDgemv(); + + bool inline_profileBoolean(); + }; +@@ -589,6 +591,8 @@ CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) { + break; + + case vmIntrinsics::_f2jblas_ddot: ++ case vmIntrinsics::_dgemm_dgemm: ++ case vmIntrinsics::_dgemv_dgemv: + if (!UseF2jBLASIntrinsics) return NULL; + break; + +@@ -988,9 +992,13 @@ bool LibraryCallKit::try_to_inline(int predicate) { + + case vmIntrinsics::_profileBoolean: + return inline_profileBoolean(); ++ + case vmIntrinsics::_f2jblas_ddot: + return inline_ddotF2jBLAS(); +- ++ case vmIntrinsics::_dgemm_dgemm: ++ return inline_dgemmDgemm(); ++ case vmIntrinsics::_dgemv_dgemv: ++ return inline_dgemvDgemv(); + default: + // If you get here, it may be that someone has added a new intrinsic + // to the list in vmSymbols.hpp without implementing it here. +@@ -6353,6 +6361,144 @@ bool LibraryCallKit::inline_ddotF2jBLAS() { + return true; + } + ++/** ++ * double org.netlib.blas.Dgemm.dgemm(java.lang.String transa, ++ * java.lang.String transb, int m, int n, int k, ++ * double alpha, double[] a, int offset_a, int lda, ++ * double[] b, int offset_b, int ldb, double beta, ++ * double[] c, int offset_c, int Ldc) ++ */ ++bool LibraryCallKit::inline_dgemmDgemm() { ++ assert(callee()->signature()->count() == 16, "Dgemm.dgemm has 16 parameters"); ++ ++ address stubAddr = StubRoutines::dgemmDgemm(); ++ if (stubAddr == NULL) return false; ++ ++ Node* transa = argument(0); ++ Node* transb = argument(1); ++ Node* m = argument(2); ++ Node* n = argument(3); ++ Node* k = argument(4); ++ Node* alpha = round_double_node(argument(5)); ++ Node* a = argument(7); ++ Node* a_offset = argument(8); ++ Node* lda = argument(9); ++ Node* b = argument(10); ++ Node* b_offset = argument(11); ++ Node* ldb = argument(12); ++ Node* beta = round_double_node(argument(13)); ++ Node* c = argument(15); ++ Node* c_offset = argument(16); ++ Node* ldc = argument(17); ++ ++ const Type* a_type = a->Value(&_gvn); ++ const Type* b_type = b->Value(&_gvn); ++ const Type* c_type = c->Value(&_gvn); ++ const TypeAryPtr* a_base_type = a_type->isa_aryptr(); ++ const TypeAryPtr* b_base_type = b_type->isa_aryptr(); ++ const TypeAryPtr* c_base_type = c_type->isa_aryptr(); ++ if (a_base_type == NULL || b_base_type == NULL || c_base_type == NULL) return false; ++ ++ ciKlass* a_klass = a_base_type->klass(); ++ ciKlass* b_klass = b_base_type->klass(); ++ ciKlass* c_klass = c_base_type->klass(); ++ if (a_klass == NULL || b_klass == NULL || c_klass == NULL) return false; ++ ++ BasicType a_elem_type = a_klass->as_array_klass()->element_type()->basic_type(); ++ BasicType b_elem_type = b_klass->as_array_klass()->element_type()->basic_type(); ++ BasicType c_elem_type = a_klass->as_array_klass()->element_type()->basic_type(); ++ if (a_elem_type != T_DOUBLE || b_elem_type != T_DOUBLE || c_elem_type != T_DOUBLE) return false; ++ ++ // get array a/b/c's addr ++ Node* a_start = array_element_address(a, a_offset, a_elem_type); ++ Node* b_start = array_element_address(b, b_offset, b_elem_type); ++ Node* c_start = array_element_address(c, c_offset, c_elem_type); ++ ++ // Get start addr of string ++ Node* transa_value = load_String_value(NULL, transa); ++ Node* transa_offset = load_String_offset(NULL, transa); ++ Node* transa_start = array_element_address(transa_value, transa_offset, T_CHAR); ++ Node* transb_value = load_String_value(NULL, transb); ++ Node* transb_offset = load_String_offset(NULL, transb); ++ Node* transb_start = array_element_address(transb_value, transb_offset, T_CHAR); ++ ++ const char *stubName = "dgemm_dgemm"; ++ make_runtime_call(RC_LEAF, OptoRuntime::dgemmDgemm_Type(), ++ stubAddr, stubName, TypePtr::BOTTOM, ++ transa_start, transb_start, m, n, k, alpha, top(), ++ a_start, lda, b_start, ldb, beta, top(), c_start, ldc); ++ ++ return true; ++} ++ ++/** ++ * void org.netlib.blas.Dgemv.dgemv(string trans, int m, int n, double alpha, ++ * double[] a, int _a_offset, int lda, ++ * double[] x, int _x_offset, int incx, double beta, ++ * double[] y, int _y_offset, int incy) ++ */ ++bool LibraryCallKit::inline_dgemvDgemv() { ++ assert(callee()->signature()->count() == 14, "F2jBLAS.dgemv has 14 parameters"); ++ Node* trans = argument(0); ++ Node* m = argument(1); ++ Node* n = argument(2); ++ Node* alpha = round_double_node(argument(3)); ++ Node* a = argument(5); ++ Node* a_offset = argument(6); ++ Node* lda = argument(7); ++ Node* x = argument(8); ++ Node* x_offset = argument(9); ++ Node* incx = argument(10); ++ Node* beta = round_double_node(argument(11)); ++ Node* y = argument(13); ++ Node* y_offset = argument(14); ++ Node* incy = argument(15); ++ ++ const Type* a_type = a->Value(&_gvn); ++ const Type* x_type = x->Value(&_gvn); ++ const Type* y_type = y->Value(&_gvn); ++ const TypeAryPtr* a_base_type = a_type->isa_aryptr(); ++ const TypeAryPtr* x_base_type = x_type->isa_aryptr(); ++ const TypeAryPtr* y_base_type = y_type->isa_aryptr(); ++ if (a_base_type == NULL || x_base_type == NULL || y_base_type == NULL) return false; ++ ++ ciKlass* a_klass = a_base_type->klass(); ++ ciKlass* x_klass = x_base_type->klass(); ++ ciKlass* y_klass = y_base_type->klass(); ++ ++ if (a_klass == NULL || x_klass == NULL || y_klass == NULL) return false; ++ ++ BasicType a_elem_type = a_klass->as_array_klass()->element_type()->basic_type(); ++ BasicType x_elem_type = x_klass->as_array_klass()->element_type()->basic_type(); ++ BasicType y_elem_type = y_klass->as_array_klass()->element_type()->basic_type(); ++ ++ if (a_elem_type != T_DOUBLE || x_elem_type != T_DOUBLE || y_elem_type != T_DOUBLE) return false; ++ ++ ++ address stubAddr = StubRoutines::dgemvDgemv(); ++ if (stubAddr == NULL) return false; ++ ++ // 'a_start' points to array a + scaled offset ++ Node* a_start = array_element_address(a, a_offset, a_elem_type); ++ // 'x_start' points to array x + scaled offset ++ Node* x_start = array_element_address(x, x_offset, x_elem_type); ++ // 'y_start' points to array y + scaled offset ++ Node* y_start = array_element_address(y, y_offset, y_elem_type); ++ ++ Node* no_ctrl = NULL; ++ ++ // get start addr of string ++ Node* trans_value = load_String_value(no_ctrl, trans); ++ Node* trans_offset = load_String_offset(no_ctrl, trans); ++ Node* trans_start = array_element_address(trans_value, trans_offset, T_CHAR); ++ ++ const char *stubName = "dgemv_dgemv"; ++ Node* call = make_runtime_call(RC_LEAF, OptoRuntime::dgemvDgemv_Type(), stubAddr, stubName, ++ TypePtr::BOTTOM, trans_start, m, n, alpha, top(), a_start, ++ lda, x_start, incx, beta, top(), y_start, incy); ++ return true; ++} ++ + /** + * Calculate CRC32 for ByteBuffer. + * int java.util.zip.CRC32.updateByteBuffer(int crc, long buf, int off, int len) +diff --git a/hotspot/src/share/vm/opto/runtime.cpp b/hotspot/src/share/vm/opto/runtime.cpp +index f1fe4d666..dc8f0c774 100644 +--- a/hotspot/src/share/vm/opto/runtime.cpp ++++ b/hotspot/src/share/vm/opto/runtime.cpp +@@ -944,6 +944,81 @@ const TypeFunc* OptoRuntime::ddotF2jBLAS_Type() { + return TypeFunc::make(domain, range); + } + ++/** ++ * double org.netlib.blas.Dgemm.dgemm(java.lang.String transa, ++ * java.lang.String transb, int m, int n, int k, ++ * double alpha, double[] a, int offset_a, int lda, ++ * double[] b, int offset_b, int ldb, double beta, ++ * double[] c, int offset_c, int Ldc) ++ */ ++const TypeFunc* OptoRuntime::dgemmDgemm_Type() { ++ // create input type (domain) ++ int num_args = 15; ++ int argcnt = num_args; ++ const Type** fields = TypeTuple::fields(argcnt); ++ int argp = TypeFunc::Parms; ++ ++ fields[argp++] = TypeAryPtr::CHARS; // char[] ++ fields[argp++] = TypeAryPtr::CHARS; // char[] ++ fields[argp++] = TypeInt::INT; // int m ++ fields[argp++] = TypeInt::INT; // int n ++ fields[argp++] = TypeInt::INT; // int k ++ fields[argp++] = Type::DOUBLE; // double alpha ++ fields[argp++] = Type::HALF; ++ fields[argp++] = TypeAryPtr::DOUBLES; // double[] a ++ fields[argp++] = TypeInt::INT; // int lda ++ fields[argp++] = TypeAryPtr::DOUBLES; // double[] b ++ fields[argp++] = TypeInt::INT; // int ldb ++ fields[argp++] = Type::DOUBLE; // double beta ++ fields[argp++] = Type::HALF; ++ fields[argp++] = TypeAryPtr::DOUBLES; // double[] c ++ fields[argp++] = TypeInt::INT; // int ldc ++ assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); ++ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields); ++ ++ // no result type needed ++ fields = TypeTuple::fields(1); ++ fields[TypeFunc::Parms + 0] = NULL; // void ++ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); ++ return TypeFunc::make(domain, range); ++} ++ ++/** ++ * void dgemv(String trans, int m, int n, double alpha, ++ * double[] a, int _a_offset, int lda, ++ * double[] x, int _x_offset, int incx, double beta, ++ * double[] y, int _y_offset, int incy) ++ */ ++const TypeFunc* OptoRuntime::dgemvDgemv_Type() { ++ // create input type (domain) ++ int num_args = 13; ++ int argcnt = num_args; ++ const Type** fields = TypeTuple::fields(argcnt); ++ int argp = TypeFunc::Parms; ++ ++ fields[argp++] = TypeAryPtr::CHARS; // char[] ++ fields[argp++] = TypeInt::INT; // int m ++ fields[argp++] = TypeInt::INT; // int n ++ fields[argp++] = Type::DOUBLE; // double alpha ++ fields[argp++] = Type::HALF; ++ fields[argp++] = TypeAryPtr::DOUBLES; // double[] a ++ fields[argp++] = TypeInt::INT; // int lda ++ fields[argp++] = TypeAryPtr::DOUBLES; // double[] x ++ fields[argp++] = TypeInt::INT; // int incx ++ fields[argp++] = Type::DOUBLE; // double beta ++ fields[argp++] = Type::HALF; ++ fields[argp++] = TypeAryPtr::DOUBLES; // double[] y ++ fields[argp++] = TypeInt::INT; // int incy ++ assert(argp == TypeFunc::Parms + argcnt, "correct decoding"); ++ const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms + argcnt, fields); ++ ++ // no result type needed ++ fields = TypeTuple::fields(1); ++ fields[TypeFunc::Parms + 0] = NULL; // void ++ const TypeTuple* range = TypeTuple::make(TypeFunc::Parms, fields); ++ return TypeFunc::make(domain, range); ++} ++ + // for cipherBlockChaining calls of aescrypt encrypt/decrypt, four pointers and a length, returning int + const TypeFunc* OptoRuntime::cipherBlockChaining_aescrypt_Type() { + // create input type (domain) +diff --git a/hotspot/src/share/vm/opto/runtime.hpp b/hotspot/src/share/vm/opto/runtime.hpp +index 66d393c5c..e07c34c15 100644 +--- a/hotspot/src/share/vm/opto/runtime.hpp ++++ b/hotspot/src/share/vm/opto/runtime.hpp +@@ -318,6 +318,8 @@ private: + static const TypeFunc* updateBytesCRC32_Type(); + + static const TypeFunc* ddotF2jBLAS_Type(); ++ static const TypeFunc* dgemmDgemm_Type(); ++ static const TypeFunc* dgemvDgemv_Type(); + + // leaf on stack replacement interpreter accessor types + static const TypeFunc* osr_end_Type(); +diff --git a/hotspot/src/share/vm/runtime/init.cpp b/hotspot/src/share/vm/runtime/init.cpp +index 1512ccc96..4c133bd4e 100644 +--- a/hotspot/src/share/vm/runtime/init.cpp ++++ b/hotspot/src/share/vm/runtime/init.cpp +@@ -54,7 +54,8 @@ void VM_Version_init(); + void os_init_globals(); // depends on VM_Version_init, before universe_init + void stubRoutines_init1(); + jint universe_init(); // depends on codeCache_init and stubRoutines_init +-void interpreter_init(); // before any methods loaded ++void interpreter_init_stub(); // before any methods loaded ++void interpreter_init_code(); // after methods loaded, but before they are linked + void invocationCounter_init(); // before any methods loaded + void marksweep_init(); + void accessFlags_init(); +@@ -106,7 +107,7 @@ jint init_globals() { + if (status != JNI_OK) + return status; + +- interpreter_init(); // before any methods loaded ++ interpreter_init_stub(); // before methods get loaded + invocationCounter_init(); // before any methods loaded + marksweep_init(); + accessFlags_init(); +@@ -114,6 +115,7 @@ jint init_globals() { + InterfaceSupport_init(); + SharedRuntime::generate_stubs(); + universe2_init(); // dependent on codeCache_init and stubRoutines_init1 ++ interpreter_init_code(); // after universe2_init and before any method gets linked + referenceProcessor_init(); + jni_handles_init(); + #if INCLUDE_VM_STRUCTS +diff --git a/hotspot/src/share/vm/runtime/stubRoutines.cpp b/hotspot/src/share/vm/runtime/stubRoutines.cpp +index 10f438bc5..f2106d13a 100644 +--- a/hotspot/src/share/vm/runtime/stubRoutines.cpp ++++ b/hotspot/src/share/vm/runtime/stubRoutines.cpp +@@ -136,7 +136,10 @@ address StubRoutines::_sha512_implCompressMB = NULL; + address StubRoutines::_updateBytesCRC32 = NULL; + address StubRoutines::_crc_table_adr = NULL; + ++address StubRoutines::_BLAS_library = NULL; + address StubRoutines::_ddotF2jBLAS = NULL; ++address StubRoutines::_dgemmDgemm = NULL; ++address StubRoutines::_dgemvDgemv = NULL; + + address StubRoutines::_multiplyToLen = NULL; + address StubRoutines::_squareToLen = NULL; +diff --git a/hotspot/src/share/vm/runtime/stubRoutines.hpp b/hotspot/src/share/vm/runtime/stubRoutines.hpp +index a4eeb910d..16075d9f4 100644 +--- a/hotspot/src/share/vm/runtime/stubRoutines.hpp ++++ b/hotspot/src/share/vm/runtime/stubRoutines.hpp +@@ -214,7 +214,10 @@ class StubRoutines: AllStatic { + static address _updateBytesCRC32; + static address _crc_table_adr; + ++ static address _BLAS_library; + static address _ddotF2jBLAS; ++ static address _dgemmDgemm; ++ static address _dgemvDgemv; + + static address _multiplyToLen; + static address _squareToLen; +@@ -380,6 +383,8 @@ class StubRoutines: AllStatic { + static address crc_table_addr() { return _crc_table_adr; } + + static address ddotF2jBLAS() { return _ddotF2jBLAS; } ++ static address dgemmDgemm() { return _dgemmDgemm; } ++ static address dgemvDgemv() { return _dgemvDgemv; } + + static address multiplyToLen() {return _multiplyToLen; } + static address squareToLen() {return _squareToLen; } diff --git a/openjdk-1.8.0.spec b/openjdk-1.8.0.spec index c70b9a0..a818907 100644 --- a/openjdk-1.8.0.spec +++ b/openjdk-1.8.0.spec @@ -918,7 +918,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r Name: java-%{javaver}-%{origin} Version: %{javaver}.%{updatever}.%{buildver} -Release: 11 +Release: 12 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -1107,6 +1107,7 @@ Patch193: improve_algorithmConstraints_checkAlgorithm_performance.patch Patch194: modify_the_default_iteration_time_and_forks_in_the_JMH_of_KAEProvider.patch Patch195: support_CMS_parallel_inspection.patch Patch196: g1gc-numa-aware-Implementation.patch +Patch197: implementation_of_Blas_hotspot_function_in_Intrinsics.patch ############################################# # @@ -1562,6 +1563,7 @@ pushd %{top_level_dir_name} %patch194 -p1 %patch195 -p1 %patch196 -p1 +%patch197 -p1 popd # System library fixes @@ -2178,7 +2180,10 @@ require "copy_jdk_configs.lua" %endif %changelog -* Sat Jun 12 2021 hu_bo_dao - 1:1.8.0.292-b10.11 +* Sat Jun 12 2021 kuenking111 - 1:1.8.0.292-b10.12 +- add implementation_of_Blas_hotspot_function_in_Intrinsics.patch + +* Sat Jun 12 2021 kuenking111 - 1:1.8.0.292-b10.11 - add g1gc-numa-aware-Implementation.patch * Wed Jun 10 2021 hu_bo_dao - 1:1.8.0.292-b10.10 -- Gitee From 68def87080d4a4d46548c7d8cbdad1c9fc0be364 Mon Sep 17 00:00:00 2001 From: kuenking111 Date: Wed, 16 Jun 2021 16:24:22 +0800 Subject: [PATCH 4/6] I3VT8V: fix G1GC memory leak in numa --- fix_G1GC_memory_leak_in_numa.patch | 38 ++++++++++++++++++++++++++++++ openjdk-1.8.0.spec | 7 +++++- 2 files changed, 44 insertions(+), 1 deletion(-) create mode 100755 fix_G1GC_memory_leak_in_numa.patch diff --git a/fix_G1GC_memory_leak_in_numa.patch b/fix_G1GC_memory_leak_in_numa.patch new file mode 100755 index 0000000..5bf5185 --- /dev/null +++ b/fix_G1GC_memory_leak_in_numa.patch @@ -0,0 +1,38 @@ +diff --git a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp +index 9b26168a8..f6a80bf8d 100644 +--- a/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp ++++ b/hotspot/src/share/vm/gc_implementation/g1/g1Allocator.hpp +@@ -45,6 +45,7 @@ protected: + public: + G1Allocator(G1CollectedHeap* heap) : + _g1h(heap), _summary_bytes_used(0) { } ++ virtual ~G1Allocator() { } + + // Node index of current thread. + virtual uint current_node_index() const = 0; +@@ -126,7 +127,7 @@ protected: + + public: + G1DefaultAllocator(G1CollectedHeap* heap); +- ~G1DefaultAllocator(); ++ virtual ~G1DefaultAllocator(); + + uint current_node_index() const; + uint num_nodes() { return (uint)_num_alloc_regions; } +@@ -253,6 +254,7 @@ protected: + + public: + G1ParGCAllocator(G1CollectedHeap* g1h); ++ virtual ~G1ParGCAllocator() { } + + static G1ParGCAllocator* create_allocator(G1CollectedHeap* g1h); + +@@ -308,7 +310,7 @@ class G1DefaultParGCAllocator : public G1ParGCAllocator { + + public: + G1DefaultParGCAllocator(G1CollectedHeap* g1h); +- ~G1DefaultParGCAllocator(); ++ virtual ~G1DefaultParGCAllocator(); + + virtual G1ParGCAllocBuffer* alloc_buffer(InCSetState dest, AllocationContext_t context, uint node_index) { + assert(dest.is_valid(), diff --git a/openjdk-1.8.0.spec b/openjdk-1.8.0.spec index a818907..1770944 100644 --- a/openjdk-1.8.0.spec +++ b/openjdk-1.8.0.spec @@ -918,7 +918,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r Name: java-%{javaver}-%{origin} Version: %{javaver}.%{updatever}.%{buildver} -Release: 12 +Release: 13 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -1108,6 +1108,7 @@ Patch194: modify_the_default_iteration_time_and_forks_in_the_JMH_of_KAEProvider. Patch195: support_CMS_parallel_inspection.patch Patch196: g1gc-numa-aware-Implementation.patch Patch197: implementation_of_Blas_hotspot_function_in_Intrinsics.patch +Patch198: fix_G1GC_memory_leak_in_numa.patch ############################################# # @@ -1564,6 +1565,7 @@ pushd %{top_level_dir_name} %patch195 -p1 %patch196 -p1 %patch197 -p1 +%patch198 -p1 popd # System library fixes @@ -2180,6 +2182,9 @@ require "copy_jdk_configs.lua" %endif %changelog +* Wed Jun 16 2021 kuenking111 - 1:1.8.0.292-b10.13 +- add fix_G1GC_memory_leak_in_numa.patch + * Sat Jun 12 2021 kuenking111 - 1:1.8.0.292-b10.12 - add implementation_of_Blas_hotspot_function_in_Intrinsics.patch -- Gitee From 20b163193944e373e4e0e9cb4927d373bdfd46d8 Mon Sep 17 00:00:00 2001 From: kuenking111 Date: Thu, 17 Jun 2021 19:02:54 +0800 Subject: [PATCH 5/6] I3W1BL: systemDictionary reslove class parser miss resourceMark --- openjdk-1.8.0.spec | 5 ++++- update-to-keep-same-with-master.patch | 3 ++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/openjdk-1.8.0.spec b/openjdk-1.8.0.spec index 1770944..f63e9a9 100644 --- a/openjdk-1.8.0.spec +++ b/openjdk-1.8.0.spec @@ -918,7 +918,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r Name: java-%{javaver}-%{origin} Version: %{javaver}.%{updatever}.%{buildver} -Release: 13 +Release: 14 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -2182,6 +2182,9 @@ require "copy_jdk_configs.lua" %endif %changelog +* Thu Jun 17 2021 kuenking111 - 1:1.8.0.292-b10.14 +- fix systemDictionary resolve_from_stream ResourceMark + * Wed Jun 16 2021 kuenking111 - 1:1.8.0.292-b10.13 - add fix_G1GC_memory_leak_in_numa.patch diff --git a/update-to-keep-same-with-master.patch b/update-to-keep-same-with-master.patch index aeae53f..032e067 100644 --- a/update-to-keep-same-with-master.patch +++ b/update-to-keep-same-with-master.patch @@ -23,7 +23,7 @@ index c3dec0a30..201dd9594 100644 *index = '\0'; // chop to just the package name while ((index = strchr(name, '/')) != NULL) { *index = '.'; // replace '/' with '.' in package name -@@ -1170,29 +1170,31 @@ Klass* SystemDictionary::resolve_from_stream(Symbol* class_name, +@@ -1170,29 +1170,32 @@ Klass* SystemDictionary::resolve_from_stream(Symbol* class_name, !class_loader.is_null() && parsed_name != NULL && parsed_name->utf8_length() >= (int)pkglen) { @@ -50,6 +50,7 @@ index c3dec0a30..201dd9594 100644 - Exceptions::_throw_msg(THREAD_AND_LOCATION, - vmSymbols::java_lang_SecurityException(), message); - } ++ ResourceMark rm(THREAD); + bool prohibited; + const jbyte* base = parsed_name->base(); + if ((base[0] | base[1] | base[2] | base[3] | base[4]) & 0x80) { -- Gitee From 1f4bec334afa64f094c5b4a12bb02043b0bb9454 Mon Sep 17 00:00:00 2001 From: kuenking111 Date: Mon, 28 Jun 2021 14:43:29 +0800 Subject: [PATCH 6/6] I3Y4ON: delete untrustworthy cacert soneraclass2ca --- ..._untrustworthy_cacert_soneraclass2ca.patch | 74 +++++++++++++++++++ openjdk-1.8.0.spec | 9 ++- 2 files changed, 81 insertions(+), 2 deletions(-) create mode 100755 delete_untrustworthy_cacert_soneraclass2ca.patch diff --git a/delete_untrustworthy_cacert_soneraclass2ca.patch b/delete_untrustworthy_cacert_soneraclass2ca.patch new file mode 100755 index 0000000..fe7f5c8 --- /dev/null +++ b/delete_untrustworthy_cacert_soneraclass2ca.patch @@ -0,0 +1,74 @@ +diff --git a/jdk/make/data/cacerts/soneraclass2ca b/jdk/make/data/cacerts/soneraclass2ca +deleted file mode 100644 +index 43faa5e2..00000000 +--- a/jdk/make/data/cacerts/soneraclass2ca ++++ /dev/null +@@ -1,26 +0,0 @@ +-Owner: CN=Sonera Class2 CA, O=Sonera, C=FI +-Issuer: CN=Sonera Class2 CA, O=Sonera, C=FI +-Serial number: 1d +-Valid from: Fri Apr 06 07:29:40 GMT 2001 until: Tue Apr 06 07:29:40 GMT 2021 +-Signature algorithm name: SHA1withRSA +-Subject Public Key Algorithm: 2048-bit RSA key +-Version: 3 +------BEGIN CERTIFICATE----- +-MIIDIDCCAgigAwIBAgIBHTANBgkqhkiG9w0BAQUFADA5MQswCQYDVQQGEwJGSTEP +-MA0GA1UEChMGU29uZXJhMRkwFwYDVQQDExBTb25lcmEgQ2xhc3MyIENBMB4XDTAx +-MDQwNjA3Mjk0MFoXDTIxMDQwNjA3Mjk0MFowOTELMAkGA1UEBhMCRkkxDzANBgNV +-BAoTBlNvbmVyYTEZMBcGA1UEAxMQU29uZXJhIENsYXNzMiBDQTCCASIwDQYJKoZI +-hvcNAQEBBQADggEPADCCAQoCggEBAJAXSjWdyvANlsdE+hY3/Ei9vX+ALTU74W+o +-Z6m/AxxNjG8yR9VBaKQTBME1DJqEQ/xcHf+Js+gXGM2RX/uJ4+q/Tl18GybTdXnt +-5oTjV+WtKcT0OijnpXuENmmz/V52vaMtmdOQTiMofRhj8VQ7Jp12W5dCsv+u8E7s +-3TmVToMGf+dJQMjFAbJUWmYdPfz56TwKnoG4cPABi+QjVHzIrviQHgCWctRUz2Ej +-vOr7nQKV0ba5cTppCD8PtOFCx4j1P5iop7oc4HFx71hXgVB6XGt0Rg6DA5jDjqhu +-8nYybieDwnPz3BjotJPqdURrBGAgcVeHnfO+oJAjPYok4doh28MCAwEAAaMzMDEw +-DwYDVR0TAQH/BAUwAwEB/zARBgNVHQ4ECgQISqCqWITTXjwwCwYDVR0PBAQDAgEG +-MA0GCSqGSIb3DQEBBQUAA4IBAQBazof5FnIVV0sd2ZvnoiYw7JNn39Yt0jSv9zil +-zqsWuasvfDXLrNAPtEwr/IDva4yRXzZ299uzGxnq9LIR/WFxRL8oszodv7ND6J+/ +-3DEIcbCdjdY0RzKQxmUk96BKfARzjzlvF4xytb1LyHr4e4PDKE6cCepnP7JnBBvD +-FNr450kkkdAdavphOe9r5yF1BgfYErQhIHBCcYHaPJo2vqZbDWpsmh+Re/n570K6 +-Tk6ezAyNlNzZRZxe7EJQY670XcSxEtzKO6gunRRaBXW37Ndj4ro1tgQIkejanZz2 +-ZrUYrAqmVCY0M9IbwdR/GjqOC6oybtv8TyWf2TLHllpwrN9M +------END CERTIFICATE----- +diff --git a/jdk/test/sun/security/lib/cacerts/VerifyCACerts.java b/jdk/test/sun/security/lib/cacerts/VerifyCACerts.java +index 9053b796..d1a7879d 100644 +--- a/jdk/test/sun/security/lib/cacerts/VerifyCACerts.java ++++ b/jdk/test/sun/security/lib/cacerts/VerifyCACerts.java +@@ -53,12 +53,12 @@ public class VerifyCACerts { + + File.separator + "security" + File.separator + "cacerts"; + + // The numbers of certs now. +- private static final int COUNT = 90; ++ private static final int COUNT = 89; + + // SHA-256 of cacerts, can be generated with + // shasum -a 256 cacerts | sed -e 's/../&:/g' | tr '[:lower:]' '[:upper:]' | cut -c1-95 + private static final String CHECKSUM +- = "DC:22:7E:D7:F3:46:1F:8B:A8:4E:EE:C2:A8:4B:8E:26:89:4F:95:5C:71:A3:1B:5A:6E:A6:48:FD:CB:C9:F2:95"; ++ = "E6:F5:ED:92:CE:E2:35:5C:84:56:78:C7:72:29:29:A9:83:99:19:D9:54:F4:FF:7F:F7:D4:DB:2D:34:36:20:B5"; + + // map of cert alias to SHA-256 fingerprint + @SuppressWarnings("serial") +@@ -167,8 +167,6 @@ public class VerifyCACerts { + "3B:22:2E:56:67:11:E9:92:30:0D:C0:B1:5A:B9:47:3D:AF:DE:F8:C8:4D:0C:EF:7D:33:17:B4:C1:82:1D:14:36"); + put("swisssignsilverg2ca [jdk]", + "BE:6C:4D:A2:BB:B9:BA:59:B6:F3:93:97:68:37:42:46:C3:C0:05:99:3F:A9:8F:02:0D:1D:ED:BE:D4:8A:81:D5"); +- put("soneraclass2ca [jdk]", +- "79:08:B4:03:14:C1:38:10:0B:51:8D:07:35:80:7F:FB:FC:F8:51:8A:00:95:33:71:05:BA:38:6B:15:3D:D9:27"); + put("securetrustca [jdk]", + "F1:C1:B5:0A:E5:A2:0D:D8:03:0E:C9:F6:BC:24:82:3D:D3:67:B5:25:57:59:B4:E7:1B:61:FC:E9:F7:37:5D:73"); + put("xrampglobalca [jdk]", +@@ -245,12 +243,7 @@ public class VerifyCACerts { + // Exception list to 90 days expiry policy + // No error will be reported if certificate in this list expires + @SuppressWarnings("serial") +- private static final HashSet EXPIRY_EXC_ENTRIES = new HashSet() { +- { +- // Valid until: Tue Apr 06 15:29:40 HKT 2021 +- add("soneraclass2ca [jdk]"); +- } +- }; ++ private static final HashSet EXPIRY_EXC_ENTRIES = new HashSet(); + + // Ninety days in milliseconds + private static final long NINETY_DAYS = 7776000000L; diff --git a/openjdk-1.8.0.spec b/openjdk-1.8.0.spec index f63e9a9..d83a699 100644 --- a/openjdk-1.8.0.spec +++ b/openjdk-1.8.0.spec @@ -918,7 +918,7 @@ Provides: java-%{javaver}-%{origin}-accessibility%{?1} = %{epoch}:%{version}-%{r Name: java-%{javaver}-%{origin} Version: %{javaver}.%{updatever}.%{buildver} -Release: 14 +Release: 15 # java-1.5.0-ibm from jpackage.org set Epoch to 1 for unknown reasons # and this change was brought into RHEL-4. java-1.5.0-ibm packages # also included the epoch in their virtual provides. This created a @@ -1109,6 +1109,7 @@ Patch195: support_CMS_parallel_inspection.patch Patch196: g1gc-numa-aware-Implementation.patch Patch197: implementation_of_Blas_hotspot_function_in_Intrinsics.patch Patch198: fix_G1GC_memory_leak_in_numa.patch +Patch199: delete_untrustworthy_cacert_soneraclass2ca.patch ############################################# # @@ -1566,6 +1567,7 @@ pushd %{top_level_dir_name} %patch196 -p1 %patch197 -p1 %patch198 -p1 +%patch199 -p1 popd # System library fixes @@ -1639,7 +1641,7 @@ export ARCH_DATA_MODEL=64 # We use ourcppflags because the OpenJDK build seems to # pass EXTRA_CFLAGS to the HotSpot C++ compiler... -EXTRA_CFLAGS="%ourcppflags -Wno-error -fcommon" +EXTRA_CFLAGS="%ourcppflags -Wno-error -fcommon -fsigned-char" EXTRA_CPP_FLAGS="%ourcppflags -Wno-error" EXTRA_ASFLAGS="${EXTRA_CFLAGS} -Wa,--generate-missing-build-notes=yes" @@ -2182,6 +2184,9 @@ require "copy_jdk_configs.lua" %endif %changelog +* Mon Jun 28 2021 kuenking111 - 1:1.8.0.292-b10.15 +- fix delete_untrustworthy_cacert_soneraclass2ca.patch + * Thu Jun 17 2021 kuenking111 - 1:1.8.0.292-b10.14 - fix systemDictionary resolve_from_stream ResourceMark -- Gitee