From 05158efe97e72345e243af6fc6045453cee86883 Mon Sep 17 00:00:00 2001
From: Artem Udovichenko <artem.udovichenko@huawei.com>
Date: Tue, 6 Sep 2022 11:53:41 +0300
Subject: [PATCH] Allocate whole region for TLAB in case of single-threaded VMs

Signed-off-by: Artem Udovichenko <artem.udovichenko@huawei.com>
---
 runtime/entrypoints/entrypoints.cpp           |  2 +-
 runtime/include/mem/allocator.h               |  3 +-
 runtime/mem/allocator.cpp                     | 17 +++--
 runtime/mem/gc/g1/g1-allocator.cpp            | 15 +++-
 .../gc/hybrid-gc/hybrid_object_allocator.cpp  |  4 +-
 runtime/mem/heap_manager.cpp                  |  2 +-
 runtime/mem/region_allocator-inl.h            | 76 +++++++++++--------
 runtime/mem/region_allocator.h                | 12 ++-
 8 files changed, 83 insertions(+), 48 deletions(-)
diff --git a/runtime/entrypoints/entrypoints.cpp b/runtime/entrypoints/entrypoints.cpp
index 86dc5bc07..03c909226 100644
--- a/runtime/entrypoints/entrypoints.cpp
+++ b/runtime/entrypoints/entrypoints.cpp
@@ -151,7 +151,7 @@ extern "C" void WriteTlabStatsEntrypoint(size_t size)
 {
     LOG_ENTRYPOINT();
 
-    ASSERT(size <= mem::PANDA_TLAB_SIZE);
+    ASSERT(size <= Thread::GetCurrent()->GetVM()->GetHeapManager()->GetTLABMaxAllocSize());
     if (mem::PANDA_TRACK_TLAB_ALLOCATIONS) {
         auto mem_stats = Thread::GetCurrent()->GetVM()->GetHeapManager()->GetMemStats();
         if (mem_stats == nullptr) {
diff --git a/runtime/include/mem/allocator.h b/runtime/include/mem/allocator.h
index 6e2d5288f..ec8f1e111 100644
--- a/runtime/include/mem/allocator.h
+++ b/runtime/include/mem/allocator.h
@@ -766,7 +766,7 @@ private:
 template <MTModeT MTMode = MT_MODE_MULTI>
 class ObjectAllocatorGen final : public ObjectAllocatorGenBase {
     // TODO(dtrubenkov): create a command line argument for this
-    static constexpr size_t YOUNG_TLAB_SIZE = 4_KB;  // TLAB size for young gen
+    static constexpr size_t DEFAULT_YOUNG_TLAB_SIZE = 4_KB;  // TLAB size for young gen
 
     using YoungGenAllocator = BumpPointerAllocator<ObjectAllocConfigWithCrossingMap,
                                                    BumpPointerAllocatorLockConfig::ParameterizedLock<MTMode>, true>;
@@ -886,6 +886,7 @@ private:
     MemStatsType *mem_stats_ = nullptr;
     ObjectAllocator *non_movable_object_allocator_ = nullptr;
     LargeObjectAllocator *large_non_movable_object_allocator_ = nullptr;
+    size_t tlab_size_ = DEFAULT_YOUNG_TLAB_SIZE;
 
     template <bool need_lock = true>
     void *AllocateTenuredImpl(size_t size);
diff --git a/runtime/mem/allocator.cpp b/runtime/mem/allocator.cpp
index 473a01ea4..09b61de18 100644
--- a/runtime/mem/allocator.cpp
+++ b/runtime/mem/allocator.cpp
@@ -425,10 +425,17 @@ ObjectAllocatorGen<MT_MODE>::ObjectAllocatorGen(MemStatsType *mem_stats, bool cr
     size_t young_space_size = heap_spaces_.GetCurrentMaxYoungSize();
     auto young_shared_space_size = Runtime::GetOptions().GetYoungSharedSpaceSize();
     ASSERT(young_space_size >= young_shared_space_size);
-    auto tlabs_count_in_young_gen = (young_space_size - young_shared_space_size) / YOUNG_TLAB_SIZE;
-    ASSERT(((young_space_size - young_shared_space_size) % YOUNG_TLAB_SIZE) == 0);
-    ASSERT(YOUNG_ALLOC_MAX_SIZE <= YOUNG_TLAB_SIZE);
-    ASSERT(tlabs_count_in_young_gen * YOUNG_TLAB_SIZE <= young_space_size);
+    size_t tlabs_count_in_young_gen;
+    if constexpr (MT_MODE == MT_MODE_SINGLE) {
+        // For single-threaded VMs allocate whole private young space for TLAB
+        tlab_size_ = young_space_size - young_shared_space_size;
+        tlabs_count_in_young_gen = 1;
+    } else {
+        tlabs_count_in_young_gen = (young_space_size - young_shared_space_size) / DEFAULT_YOUNG_TLAB_SIZE;
+        ASSERT(((young_space_size - young_shared_space_size) % DEFAULT_YOUNG_TLAB_SIZE) == 0);
+    }
+    ASSERT(YOUNG_ALLOC_MAX_SIZE <= tlab_size_);
+    ASSERT(tlabs_count_in_young_gen * tlab_size_ <= young_space_size);
 
     // TODO(aemelenko): Missed an allocator pointer
     // because we construct BumpPointer Allocator after calling AllocArena method
@@ -533,7 +540,7 @@ void ObjectAllocatorGen<MT_MODE>::ResetYoungAllocator()
 template <MTModeT MT_MODE>
 TLAB *ObjectAllocatorGen<MT_MODE>::CreateNewTLAB([[maybe_unused]] panda::ManagedThread *thread)
 {
-    return young_gen_allocator_->CreateNewTLAB(YOUNG_TLAB_SIZE);
+    return young_gen_allocator_->CreateNewTLAB(tlab_size_);
 }
 
 template <MTModeT MT_MODE>
diff --git a/runtime/mem/gc/g1/g1-allocator.cpp b/runtime/mem/gc/g1/g1-allocator.cpp
index 81802a527..691474da0 100644
--- a/runtime/mem/gc/g1/g1-allocator.cpp
+++ b/runtime/mem/gc/g1/g1-allocator.cpp
@@ -95,14 +95,23 @@ std::vector<MarkBitmap *> &ObjectAllocatorG1<MT_MODE>::GetYoungSpaceBitmaps()
 template <MTModeT MT_MODE>
 TLAB *ObjectAllocatorG1<MT_MODE>::CreateNewTLAB([[maybe_unused]] panda::ManagedThread *thread)
 {
-    // TODO(dtrubenkov): fix this
-    return object_allocator_->CreateNewTLAB(thread, TLAB_SIZE);
+    if constexpr (MT_MODE == MT_MODE_SINGLE) {
+        // For single-threaded VMs allocate a whole region for TLAB
+        return object_allocator_->CreateRegionSizeTLAB();
+    } else {
+        return object_allocator_->CreateTLAB(TLAB_SIZE);
+    }
 }
 
 template <MTModeT MT_MODE>
 size_t ObjectAllocatorG1<MT_MODE>::GetTLABMaxAllocSize()
 {
-    return PANDA_TLAB_MAX_ALLOC_SIZE;
+    if constexpr (MT_MODE == MT_MODE_SINGLE) {
+        // For single-threaded VMs we can allocate objects of size up to region size in TLABs.
+        return GetYoungAllocMaxSize();
+    } else {
+        return PANDA_TLAB_MAX_ALLOC_SIZE;
+    }
 }
 
 template <MTModeT MT_MODE>
diff --git a/runtime/mem/gc/hybrid-gc/hybrid_object_allocator.cpp b/runtime/mem/gc/hybrid-gc/hybrid_object_allocator.cpp
index e1f254d43..5e27fbc75 100644
--- a/runtime/mem/gc/hybrid-gc/hybrid_object_allocator.cpp
+++ b/runtime/mem/gc/hybrid-gc/hybrid_object_allocator.cpp
@@ -124,9 +124,9 @@ size_t HybridObjectAllocator::VerifyAllocatorStatus()
     return 0;
 }
 
-TLAB *HybridObjectAllocator::CreateNewTLAB(ManagedThread *thread)
+TLAB *HybridObjectAllocator::CreateNewTLAB([[maybe_unused]] ManagedThread *thread)
 {
-    return object_allocator_->CreateNewTLAB(thread);
+    return object_allocator_->CreateTLAB(object_allocator_->GetMaxRegularObjectSize());
 }
 
 size_t HybridObjectAllocator::GetTLABMaxAllocSize()
diff --git a/runtime/mem/heap_manager.cpp b/runtime/mem/heap_manager.cpp
index 46de7f85c..a1c0d1cb3 100644
--- a/runtime/mem/heap_manager.cpp
+++ b/runtime/mem/heap_manager.cpp
@@ -61,7 +61,7 @@ bool HeapManager::Initialize(GCType gc_type, bool single_threaded, bool use_tlab
             break;
     }
 #undef FWD_GC_INIT
-    if (!object_allocator_.AsObjectAllocator()->IsTLABSupported() || single_threaded) {
+    if (!object_allocator_.AsObjectAllocator()->IsTLABSupported()) {
         use_tlab = false;
     }
     use_tlab_for_allocations_ = use_tlab;
diff --git a/runtime/mem/region_allocator-inl.h b/runtime/mem/region_allocator-inl.h
index b6593d748..6c6f3c262 100644
--- a/runtime/mem/region_allocator-inl.h
+++ b/runtime/mem/region_allocator-inl.h
@@ -216,8 +216,7 @@ void *RegionAllocator<AllocConfigT, LockConfigT>::Alloc(size_t size, Alignment a
 }
 
 template <typename AllocConfigT, typename LockConfigT>
-TLAB *RegionAllocator<AllocConfigT, LockConfigT>::CreateNewTLAB([[maybe_unused]] panda::ManagedThread *thread,
-                                                                size_t size)
+TLAB *RegionAllocator<AllocConfigT, LockConfigT>::CreateTLAB(size_t size)
 {
     ASSERT(size <= GetMaxRegularObjectSize());
     ASSERT(AlignUp(size, GetAlignmentInBytes(DEFAULT_ALIGNMENT)) == size);
@@ -227,15 +226,12 @@ TLAB *RegionAllocator<AllocConfigT, LockConfigT>::CreateNewTLAB([[maybe_unused]]
         os::memory::LockHolder lock(this->region_lock_);
         Region *region = nullptr;
         // first search in partial tlab map
-        // NOLINTNEXTLINE(readability-braces-around-statements, bugprone-suspicious-semicolon)
-        if constexpr (USE_PARTIAL_TLAB) {
-            auto largest_tlab = retained_tlabs_.begin();
-            if (largest_tlab != retained_tlabs_.end() && largest_tlab->first >= size) {
-                LOG(DEBUG, ALLOC) << "Use retained tlabs region " << region;
-                region = largest_tlab->second;
-                retained_tlabs_.erase(largest_tlab);
-                ASSERT(region->HasFlag(RegionFlag::IS_EDEN));
-            }
+        auto largest_tlab = retained_tlabs_.begin();
+        if (largest_tlab != retained_tlabs_.end() && largest_tlab->first >= size) {
+            LOG(DEBUG, ALLOC) << "Use retained tlabs region " << region;
+            region = largest_tlab->second;
+            retained_tlabs_.erase(largest_tlab);
+            ASSERT(region->HasFlag(RegionFlag::IS_EDEN));
         }
 
         // allocate a free region if none partial tlab has enough space
@@ -246,27 +242,12 @@ TLAB *RegionAllocator<AllocConfigT, LockConfigT>::CreateNewTLAB([[maybe_unused]]
             }
         }
         if (region != nullptr) {
-            // NOLINTNEXTLINE(readability-braces-around-statements, bugprone-suspicious-semicolon)
-            if constexpr (!USE_PARTIAL_TLAB) {
-                // We don't reuse the same region for different TLABs.
-                // Therefore, update the size
-                size = region->GetRemainingSizeForTLABs();
-            }
-            tlab = region->CreateTLAB(size);
-            ASSERT(tlab != nullptr);
-            ASAN_UNPOISON_MEMORY_REGION(tlab->GetStartAddr(), tlab->GetSize());
-            AllocConfigT::MemoryInit(tlab->GetStartAddr(), tlab->GetSize());
-            ASAN_POISON_MEMORY_REGION(tlab->GetStartAddr(), tlab->GetSize());
-            LOG(DEBUG, ALLOC) << "Found a region " << region << " and create tlab " << tlab << " with memory starts at "
-                              << tlab->GetStartAddr() << " and with size " << tlab->GetSize();
-            // NOLINTNEXTLINE(readability-braces-around-statements, bugprone-suspicious-semicolon)
-            if constexpr (USE_PARTIAL_TLAB) {
-                auto remaining_size = region->GetRemainingSizeForTLABs();
-                if (remaining_size >= size) {
-                    LOG(DEBUG, ALLOC) << "Add a region " << region << " with remained size " << remaining_size
-                                      << " to retained_tlabs";
-                    retained_tlabs_.insert(std::make_pair(remaining_size, region));
-                }
+            tlab = CreateTLABInRegion(region, size);
+            auto remaining_size = region->GetRemainingSizeForTLABs();
+            if (remaining_size >= size) {
+                LOG(DEBUG, ALLOC) << "Add a region " << region << " with remained size " << remaining_size
+                                  << " to retained_tlabs";
+                retained_tlabs_.insert(std::make_pair(remaining_size, region));
             }
         }
     }
@@ -274,6 +255,37 @@ TLAB *RegionAllocator<AllocConfigT, LockConfigT>::CreateNewTLAB([[maybe_unused]]
     return tlab;
 }
 
+template <typename AllocConfigT, typename LockConfigT>
+TLAB *RegionAllocator<AllocConfigT, LockConfigT>::CreateRegionSizeTLAB()
+{
+    TLAB *tlab = nullptr;
+
+    os::memory::LockHolder lock(this->region_lock_);
+    Region *region = this->template CreateAndSetUpNewRegion<AllocConfigT>(REGION_SIZE, RegionFlag::IS_EDEN);
+    if (LIKELY(region != nullptr)) {
+        region->CreateTLABSupport();
+        size_t size = region->GetRemainingSizeForTLABs();
+        tlab = CreateTLABInRegion(region, size);
+    }
+
+    return tlab;
+}
+
+template <typename AllocConfigT, typename LockConfigT>
+TLAB *RegionAllocator<AllocConfigT, LockConfigT>::CreateTLABInRegion(Region *region, size_t size)
+{
+    // We don't reuse the same region for different TLABs.
+    // Therefore, update the size
+    TLAB *tlab = region->CreateTLAB(size);
+    ASSERT(tlab != nullptr);
+    ASAN_UNPOISON_MEMORY_REGION(tlab->GetStartAddr(), tlab->GetSize());
+    AllocConfigT::MemoryInit(tlab->GetStartAddr(), tlab->GetSize());
+    ASAN_POISON_MEMORY_REGION(tlab->GetStartAddr(), tlab->GetSize());
+    LOG(DEBUG, ALLOC) << "Found a region " << region << " and create tlab " << tlab << " with memory starts at "
+                      << tlab->GetStartAddr() << " and with size " << tlab->GetSize();
+    return tlab;
+}
+
 template <typename AllocConfigT, typename LockConfigT>
 // TODO(agrebenkin) add set of flags from which to pick the regions and make it compile time
 template <bool INCLUDE_CURRENT_REGION>
diff --git a/runtime/mem/region_allocator.h b/runtime/mem/region_allocator.h
index b772d210b..4c505a738 100644
--- a/runtime/mem/region_allocator.h
+++ b/runtime/mem/region_allocator.h
@@ -191,12 +191,17 @@ public:
     void Free([[maybe_unused]] void *mem) {}
 
     /**
-     * \brief Create new region allocator as thread local allocator buffer.
-     * @param thread - pointer to thread
+     * \brief Create a TLAB of the specified size
      * @param size - required size of tlab
      * @return newly allocated TLAB, TLAB is set to Empty is allocation failed.
      */
-    TLAB *CreateNewTLAB(panda::ManagedThread *thread, size_t size = GetMaxRegularObjectSize());
+    TLAB *CreateTLAB(size_t size);
+
+    /**
+     * \brief Create a TLAB in a new region. TLAB will occupy the whole region.
+     * @return newly allocated TLAB, TLAB is set to Empty is allocation failed.
+     */
+    TLAB *CreateRegionSizeTLAB();
 
     /**
      * \brief Iterates over all objects allocated by this allocator.
@@ -486,6 +491,7 @@ private:
 
     template <RegionFlag REGION_TYPE>
     void *AllocRegular(size_t align_size);
+    TLAB *CreateTLABInRegion(Region *region, size_t size);
 
     Region full_region_;
     Region *eden_current_region_;
-- 
Gitee