From 822e744210a07e20d006d42811586efa16b0c4ae Mon Sep 17 00:00:00 2001 From: rjgask Date: Wed, 27 Sep 2023 17:01:41 +0000 Subject: [PATCH 1/7] prepare --- runtime/mem/gc/epsilon-g1/epsilon-g1.cpp | 7 +- runtime/mem/gc/g1/g1-gc.cpp | 158 ++++++++++++--------- runtime/mem/gc/g1/g1-gc.h | 15 +- runtime/mem/gc/g1/ref_updater.h | 52 +++++-- runtime/mem/gc/g1/update_remset_thread.cpp | 20 +-- runtime/mem/gc/g1/update_remset_thread.h | 20 +-- runtime/mem/gc/gc_barrier_set.cpp | 23 ++- runtime/mem/gc/gc_barrier_set.h | 17 +-- 8 files changed, 185 insertions(+), 127 deletions(-) diff --git a/runtime/mem/gc/epsilon-g1/epsilon-g1.cpp b/runtime/mem/gc/epsilon-g1/epsilon-g1.cpp index 0964dc314..607e10fad 100644 --- a/runtime/mem/gc/epsilon-g1/epsilon-g1.cpp +++ b/runtime/mem/gc/epsilon-g1/epsilon-g1.cpp @@ -49,10 +49,9 @@ void EpsilonG1GC::InitializeImpl() this->CreateCardTable(allocator, PoolManager::GetMmapMemPool()->GetMinObjectAddress(), PoolManager::GetMmapMemPool()->GetTotalObjectSize()); - auto barrier_set = - allocator->New(allocator, &PreWrbFuncEntrypoint, &PostWrbUpdateCardFuncEntrypoint, - panda::helpers::math::GetIntLog2(this->GetG1ObjectAllocator()->GetRegionSize()), - this->GetCardTable(), this->updated_refs_queue_, &this->queue_lock_); + auto barrier_set = allocator->New( + allocator, &PreWrbFuncEntrypoint, &PostWrbUpdateCardFuncEntrypoint, + panda::helpers::math::GetIntLog2(this->GetG1ObjectAllocator()->GetRegionSize()), this->GetCardTable()); ASSERT(barrier_set != nullptr); this->SetGCBarrierSet(barrier_set); diff --git a/runtime/mem/gc/g1/g1-gc.cpp b/runtime/mem/gc/g1/g1-gc.cpp index 5d79ca0a2..f5e096e17 100644 --- a/runtime/mem/gc/g1/g1-gc.cpp +++ b/runtime/mem/gc/g1/g1-gc.cpp @@ -144,7 +144,6 @@ G1GC::G1GC(ObjectAllocatorBase *object_allocator, const GCSettin InternalAllocatorPtr allocator = this->GetInternalAllocator(); this->SetType(GCType::G1_GC); this->SetTLABsSupported(); - updated_refs_queue_ = allocator->New(); auto *first_ref_vector = allocator->New(); first_ref_vector->reserve(MAX_REFS); unique_refs_from_remsets_.push_back(first_ref_vector); @@ -160,7 +159,6 @@ G1GC::~G1GC() allocator->Delete(obj_vector); } } - allocator->Delete(updated_refs_queue_); ASSERT(unique_refs_from_remsets_.size() == 1); allocator->Delete(unique_refs_from_remsets_.front()); unique_refs_from_remsets_.clear(); @@ -642,15 +640,17 @@ void G1GC::WorkerTaskProcessing(GCWorkersTask *task, [[maybe_unu case GCWorkersTaskTypes::TASK_ENQUEUE_REMSET_REFS: { auto *moved_objects_range = task->Cast>()->GetMovedObjectsRange(); auto *task_updated_refs_queue = - this->GetInternalAllocator()->template New(); + this->GetInternalAllocator()->template New>(); EnqueueRemsetRefUpdater ref_updater(this->GetCardTable(), task_updated_refs_queue, region_size_bits_); DoUpdateReferencesToMovedObjectsRange(moved_objects_range, ref_updater); { os::memory::LockHolder lock(gc_worker_queue_lock_); - updated_refs_queue_->insert(updated_refs_queue_->end(), task_updated_refs_queue->begin(), - task_updated_refs_queue->end()); + auto *barriers = GetG1BarrierSet(); + for (auto card : *task_updated_refs_queue) { + barriers->EnqueueToShared(card); + } } this->GetInternalAllocator()->Delete(moved_objects_range); this->GetInternalAllocator()->Delete(task_updated_refs_queue); @@ -933,10 +933,9 @@ void G1GC::InitializeImpl() PoolManager::GetMmapMemPool()->GetTotalObjectSize()); // TODO(dtrubenkov): initialize barriers - auto barrier_set = - allocator->New(allocator, &PreWrbFuncEntrypoint, &PostWrbUpdateCardFuncEntrypoint, - panda::helpers::math::GetIntLog2(this->GetG1ObjectAllocator()->GetRegionSize()), - this->GetCardTable(), updated_refs_queue_, &queue_lock_); + auto barrier_set = allocator->New( + allocator, &PreWrbFuncEntrypoint, &PostWrbUpdateCardFuncEntrypoint, + panda::helpers::math::GetIntLog2(this->GetG1ObjectAllocator()->GetRegionSize()), this->GetCardTable()); ASSERT(barrier_set != nullptr); this->SetGCBarrierSet(barrier_set); @@ -952,14 +951,12 @@ void G1GC::InitializeImpl() ASSERT(gc_task_pool != nullptr); this->SetWorkersPool(gc_task_pool); } - { - // to make TSAN happy because we access updated_refs_queue_ inside constructor of UpdateRemsetThread - os::memory::LockHolder lock(queue_lock_); - update_remset_thread_ = allocator->template New>( - this, this->GetPandaVm(), updated_refs_queue_, &queue_lock_, this->GetG1ObjectAllocator()->GetRegionSize(), - this->GetSettings()->G1EnableConcurrentUpdateRemset(), this->GetSettings()->G1MinConcurrentCardsToProcess(), - this->GetCardTable()); - } + + update_remset_thread_ = allocator->template New>( + this, this->GetPandaVm(), barrier_set, this->GetG1ObjectAllocator()->GetRegionSize(), + this->GetSettings()->G1EnableConcurrentUpdateRemset(), this->GetSettings()->G1MinConcurrentCardsToProcess(), + this->GetCardTable()); + ASSERT(update_remset_thread_ != nullptr); LOG_DEBUG_GC << "G1GC initialized"; } @@ -1248,17 +1245,14 @@ bool G1GC::CollectAndMove(const CollectionSet &collection_set) moved_objects_container = &mixed_marked_objects_; } - { - os::memory::LockHolder lock(queue_lock_); - analytics_.ReportUpdateRefsStart(panda::time::GetCurrentTimeInNanos()); - if (this->GetSettings()->ParallelRefUpdatingEnabled()) { - UpdateRefsToMovedObjects(moved_objects_container); - } else { - UpdateRefsToMovedObjects(moved_objects_container); - } - analytics_.ReportUpdateRefsEnd(panda::time::GetCurrentTimeInNanos()); - ActualizeRemSets(); + analytics_.ReportUpdateRefsStart(panda::time::GetCurrentTimeInNanos()); + if (this->GetSettings()->ParallelRefUpdatingEnabled()) { + UpdateRefsToMovedObjects(moved_objects_container); + } else { + UpdateRefsToMovedObjects(moved_objects_container); } + analytics_.ReportUpdateRefsEnd(panda::time::GetCurrentTimeInNanos()); + ActualizeRemSets(); VerifyCollectAndMove(std::move(collect_verifier), collection_set); SweepRegularVmRefs(); @@ -1302,14 +1296,28 @@ bool G1GC::CollectAndMove(const CollectionSet &collection_set) } template -template -std::conditional_t, EnqueueRemsetRefUpdater> -G1GC::CreateRefUpdater([[maybe_unused]] GCG1BarrierSet::ThreadLocalCardQueues *updated_ref_queue) const +template +std::conditional_t, EnqueueRemsetRefUpdater> +G1GC::CreateConcurrentRefUpdater(PandaVector *queue) const +{ + if constexpr (FULL_GC) { + return UpdateRemsetRefUpdater(region_size_bits_); + } else { + return EnqueueRemsetRefUpdater(this->GetCardTable(), queue, region_size_bits_); + } +} + +template +template +std::conditional_t, + SharedEnqueueRemsetRefUpdater> +G1GC::CreateRefUpdater() const { if constexpr (FULL_GC) { - return UpdateRemsetRefUpdater(region_size_bits_); + return UpdateRemsetRefUpdater(region_size_bits_); } else { - return EnqueueRemsetRefUpdater(this->GetCardTable(), updated_ref_queue, region_size_bits_); + return SharedEnqueueRemsetRefUpdater(this->GetCardTable(), GetG1BarrierSet(), + region_size_bits_); } } @@ -1320,17 +1328,14 @@ void G1GC::UpdateRefsToMovedObjects(MovedObjectsContainer scope(__FUNCTION__, this); // Currently lock for RemSet too much influences for pause, so don't use workers on FULL-GC constexpr bool ENABLE_WORKERS = USE_WORKERS && !FULL_GC; - auto internal_allocator = this->GetInternalAllocator(); - auto *updated_ref_queue = (ENABLE_WORKERS) - ? internal_allocator->template New() - : updated_refs_queue_; - auto ref_updater = this->CreateRefUpdater(updated_ref_queue); - // update reference from objects which were moved while garbage collection - LOG_DEBUG_GC << "=== Update ex-cset -> ex-cset references. START. ==="; - { - ScopedTiming t("UpdateMovedObjectsReferences", *this->GetTiming()); - for (auto *moved_objects : *moved_objects_container) { - if constexpr (ENABLE_WORKERS) { + + if constexpr (ENABLE_WORKERS) { + auto internal_allocator = this->GetInternalAllocator(); + // update reference from objects which were moved while garbage collection + LOG_DEBUG_GC << "=== Update ex-cset -> ex-cset references. START. ==="; + { + ScopedTiming t("UpdateMovedObjectsReferences", *this->GetTiming()); + for (auto *moved_objects : *moved_objects_container) { auto range_begin = moved_objects->begin(); auto range_end = range_begin; while (range_begin != moved_objects->end()) { @@ -1350,33 +1355,57 @@ void G1GC::UpdateRefsToMovedObjects(MovedObjectsContainerWorkerTaskProcessing(&gc_worker_task, nullptr); } - } else { // GC workers are not used - typename GCUpdateRefsWorkersTask::MovedObjectsRange moved_objects_range(moved_objects->begin(), - moved_objects->end()); - DoUpdateReferencesToMovedObjectsRange( - &moved_objects_range, ref_updater); } } - } - LOG_DEBUG_GC << "=== Update ex-cset -> ex-cset references. END. ==="; + LOG_DEBUG_GC << "=== Update ex-cset -> ex-cset references. END. ==="; + + // update references from objects which are not part of collection set + LOG_DEBUG_GC << "=== Update non ex-cset -> ex-cset references. START. ==="; + + auto *updated_ref_queue = internal_allocator->template New>(); + auto ref_updater = this->CreateConcurrentRefUpdater(updated_ref_queue); + + if constexpr (FULL_GC) { + UpdateRefsFromRemSets(ref_updater); + } else { + VisitRemSets(ref_updater); + } + LOG_DEBUG_GC << "=== Update non ex-cset -> ex-cset references. END. ==="; - // update references from objects which are not part of collection set - LOG_DEBUG_GC << "=== Update non ex-cset -> ex-cset references. START. ==="; - if constexpr (FULL_GC) { - UpdateRefsFromRemSets(ref_updater); - } else { - VisitRemSets(ref_updater); - } - LOG_DEBUG_GC << "=== Update non ex-cset -> ex-cset references. END. ==="; - if constexpr (ENABLE_WORKERS) { { os::memory::LockHolder lock(gc_worker_queue_lock_); - updated_refs_queue_->insert(updated_refs_queue_->end(), updated_ref_queue->begin(), - updated_ref_queue->end()); + auto *barriers = GetG1BarrierSet(); + for (auto *card : *updated_ref_queue) { + barriers->EnqueueToShared(card); + } this->GetInternalAllocator()->Delete(updated_ref_queue); } this->GetWorkersPool()->WaitUntilTasksEnd(); + } else { + auto ref_updater = this->CreateRefUpdater(); + // update reference from objects which were moved while garbage collection + LOG_DEBUG_GC << "=== Update ex-cset -> ex-cset references. START. ==="; + { + ScopedTiming t("UpdateMovedObjectsReferences", *this->GetTiming()); + for (auto *moved_objects : *moved_objects_container) { + typename GCUpdateRefsWorkersTask::MovedObjectsRange moved_objects_range(moved_objects->begin(), + moved_objects->end()); + DoUpdateReferencesToMovedObjectsRange( + &moved_objects_range, ref_updater); + } + } + LOG_DEBUG_GC << "=== Update ex-cset -> ex-cset references. END. ==="; + + // update references from objects which are not part of collection set + LOG_DEBUG_GC << "=== Update non ex-cset -> ex-cset references. START. ==="; + if constexpr (FULL_GC) { + UpdateRefsFromRemSets(ref_updater); + } else { + VisitRemSets(ref_updater); + } + LOG_DEBUG_GC << "=== Update non ex-cset -> ex-cset references. END. ==="; } + this->CommonUpdateRefsToMovedObjects(); } @@ -1758,7 +1787,6 @@ CollectionSet G1GC::GetFullCollectionSet() { ASSERT(this->IsFullGC()); // FillRemSet should be always finished before GetCollectibleRegions - ASSERT(update_remset_thread_->GetQueueSize() == 0); auto g1_allocator = this->GetG1ObjectAllocator(); g1_allocator->ClearCurrentTenuredRegion(); CollectionSet collection_set(g1_allocator->GetYoungRegions()); @@ -1951,10 +1979,10 @@ template void G1GC::ReenqueueDirtyCards() { ScopedTiming t(__FUNCTION__, *this->GetTiming()); - os::memory::LockHolder lock(queue_lock_); - std::for_each(dirty_cards_.cbegin(), dirty_cards_.cend(), [this](auto card) { + auto *barriers = GetG1BarrierSet(); + std::for_each(dirty_cards_.cbegin(), dirty_cards_.cend(), [barriers](auto *card) { card->Mark(); - updated_refs_queue_->push_back(card); + barriers->EnqueueToShared(card); }); dirty_cards_.clear(); } diff --git a/runtime/mem/gc/g1/g1-gc.h b/runtime/mem/gc/g1/g1-gc.h index fa0a2649f..c0a9686c9 100644 --- a/runtime/mem/gc/g1/g1-gc.h +++ b/runtime/mem/gc/g1/g1-gc.h @@ -154,9 +154,6 @@ protected: } // NOLINTBEGIN(misc-non-private-member-variables-in-classes) - /// Queue with updated refs info - GCG1BarrierSet::ThreadLocalCardQueues *updated_refs_queue_ {nullptr}; - os::memory::Mutex queue_lock_; os::memory::Mutex gc_worker_queue_lock_; // NOLINTEND(misc-non-private-member-variables-in-classes) @@ -293,10 +290,14 @@ private: void VerifyCollectAndMove(HeapVerifierIntoGC &&collect_verifier, const CollectionSet &collection_set); - template - std::conditional_t, - EnqueueRemsetRefUpdater> - CreateRefUpdater(GCG1BarrierSet::ThreadLocalCardQueues *updated_ref_queue) const; + template + std::conditional_t, EnqueueRemsetRefUpdater> + CreateConcurrentRefUpdater(PandaVector *queue) const; + + template + std::conditional_t, + SharedEnqueueRemsetRefUpdater> + CreateRefUpdater() const; /// Update all refs to moved objects template diff --git a/runtime/mem/gc/g1/ref_updater.h b/runtime/mem/gc/g1/ref_updater.h index e2f879719..c22fc04dd 100644 --- a/runtime/mem/gc/g1/ref_updater.h +++ b/runtime/mem/gc/g1/ref_updater.h @@ -62,31 +62,65 @@ protected: }; template -class EnqueueRemsetRefUpdater : public BaseRefUpdater { +class BaseEnqueueRemsetRefUpdater : public BaseRefUpdater { public: - EnqueueRemsetRefUpdater(CardTable *card_table, GCG1BarrierSet::ThreadLocalCardQueues *updated_refs_queue, - uint32_t region_size_bits) - : BaseRefUpdater(region_size_bits), - card_table_(card_table), - updated_refs_queue_(updated_refs_queue) + BaseEnqueueRemsetRefUpdater(CardTable *card_table, uint32_t region_size_bits) + : BaseRefUpdater(region_size_bits), card_table_(card_table) { } protected: - void Process(ObjectHeader *object, size_t offset, ObjectHeader *ref) const override + void Process(ObjectHeader *object, size_t offset, ObjectHeader *ref) const override final { if (!this->IsSameRegion(object, ref)) { auto *card = card_table_->GetCardPtr(ToUintPtr(object) + offset); if (card->IsClear()) { card->Mark(); - updated_refs_queue_->push_back(card); + Enqueue(card); } } } + virtual void Enqueue(CardTable::CardPtr card) const = 0; + private: CardTable *card_table_; - GCG1BarrierSet::ThreadLocalCardQueues *updated_refs_queue_; +}; + +template +class EnqueueRemsetRefUpdater : public BaseEnqueueRemsetRefUpdater { +public: + EnqueueRemsetRefUpdater(CardTable *card_table, PandaVector *queue, uint32_t region_size_bits) + : BaseEnqueueRemsetRefUpdater(card_table, region_size_bits), queue_(queue) + { + } + +protected: + void Enqueue(CardTable::CardPtr card) const override + { + queue_->push_back(card); + } + +private: + PandaVector *queue_; +}; + +template +class SharedEnqueueRemsetRefUpdater : public BaseEnqueueRemsetRefUpdater { +public: + SharedEnqueueRemsetRefUpdater(CardTable *card_table, GCG1BarrierSet *barriers, uint32_t region_size_bits) + : BaseEnqueueRemsetRefUpdater(card_table, region_size_bits), barriers_(barriers) + { + } + +protected: + void Enqueue(CardTable::CardPtr card) const override + { + barriers_->Enqueue(card); + } + +private: + GCG1BarrierSet *barriers_; }; } // namespace panda::mem diff --git a/runtime/mem/gc/g1/update_remset_thread.cpp b/runtime/mem/gc/g1/update_remset_thread.cpp index a07ac0b32..d55e8bd65 100644 --- a/runtime/mem/gc/g1/update_remset_thread.cpp +++ b/runtime/mem/gc/g1/update_remset_thread.cpp @@ -29,16 +29,13 @@ namespace panda::mem { static constexpr size_t PREALLOCATED_SET_SIZE = 256; template -UpdateRemsetThread::UpdateRemsetThread(GC *gc, PandaVM *vm, - GCG1BarrierSet::ThreadLocalCardQueues *queue, - os::memory::Mutex *queue_lock, size_t region_size, - bool update_concurrent, size_t min_concurrent_cards_to_process, - CardTable *card_table) +UpdateRemsetThread::UpdateRemsetThread(GC *gc, PandaVM *vm, GCG1BarrierSet *barriers, + size_t region_size, bool update_concurrent, + size_t min_concurrent_cards_to_process, CardTable *card_table) : gc_(gc), vm_(vm), + barriers_(barriers), card_table_(card_table), - queue_(queue), - queue_lock_(queue_lock), update_concurrent_(update_concurrent), region_size_bits_(panda::helpers::math::GetIntLog2(region_size)), min_concurrent_cards_to_process_(min_concurrent_cards_to_process) @@ -134,7 +131,6 @@ void UpdateRemsetThread::WaitUntilTasksEnd() thread_cond_var_.Wait(&loop_lock_); } thread_cond_var_.Signal(); - ASSERT(GetQueueSize() == 0); } else { os::memory::LockHolder holder(loop_lock_); // we will handle all remsets even when thread is stopped (we are trying to destroy Runtime, but it's the last @@ -144,7 +140,6 @@ void UpdateRemsetThread::WaitUntilTasksEnd() } stats_.PrintStats(); stats_.Reset(); - ASSERT(GetQueueSize() == 0); ASSERT(!pause_thread_); } @@ -201,7 +196,6 @@ void UpdateRemsetThread::ThreadLoop() template void UpdateRemsetThread::FillFromDefered(PandaUnorderedSet *cards) { - os::memory::LockHolder holder(*queue_lock_); std::copy(cards_.begin(), cards_.end(), std::inserter(*cards, cards->end())); cards_.clear(); } @@ -209,9 +203,7 @@ void UpdateRemsetThread::FillFromDefered(PandaUnorderedSet void UpdateRemsetThread::FillFromQueue(PandaUnorderedSet *cards) { - os::memory::LockHolder holder(*queue_lock_); - std::copy(queue_->begin(), queue_->end(), std::inserter(*cards, cards->end())); - queue_->clear(); + barriers_->Dump(cards); } template @@ -262,7 +254,7 @@ void UpdateRemsetThread::FillFromPostBarrierBuffer( } template -void UpdateRemsetThread::FillFromPostBarrierBuffer(GCG1BarrierSet::ThreadLocalCardQueues *post_wrb, +void UpdateRemsetThread::FillFromPostBarrierBuffer(PandaVector *post_wrb, PandaUnorderedSet *cards) { while (!post_wrb->empty()) { diff --git a/runtime/mem/gc/g1/update_remset_thread.h b/runtime/mem/gc/g1/update_remset_thread.h index c98a505c6..493f8c4a7 100644 --- a/runtime/mem/gc/g1/update_remset_thread.h +++ b/runtime/mem/gc/g1/update_remset_thread.h @@ -87,9 +87,8 @@ private: template class UpdateRemsetThread { public: - explicit UpdateRemsetThread(GC *gc, PandaVM *vm, GCG1BarrierSet::ThreadLocalCardQueues *queue, - os::memory::Mutex *queue_lock, size_t region_size, bool update_concurrent, - size_t min_concurrent_cards_to_process, CardTable *card_table); + explicit UpdateRemsetThread(GC *gc, PandaVM *vm, GCG1BarrierSet *barriers, size_t region_size, + bool update_concurrent, size_t min_concurrent_cards_to_process, CardTable *card_table); ~UpdateRemsetThread(); NO_COPY_SEMANTIC(UpdateRemsetThread); NO_MOVE_SEMANTIC(UpdateRemsetThread); @@ -105,13 +104,6 @@ public: void ThreadLoop(); - // only debug purpose - size_t GetQueueSize() const - { - os::memory::LockHolder holder(*queue_lock_); - return queue_->size(); - } - void SetUpdateConcurrent(bool value) { os::memory::LockHolder holder(loop_lock_); @@ -164,7 +156,7 @@ private: void FillFromPostBarrierBuffers(PandaUnorderedSet *cards); void FillFromPostBarrierBuffer(GCG1BarrierSet::G1PostBarrierRingBufferType *post_wrb, PandaUnorderedSet *cards); - void FillFromPostBarrierBuffer(GCG1BarrierSet::ThreadLocalCardQueues *post_wrb, + void FillFromPostBarrierBuffer(PandaVector *post_wrb, PandaUnorderedSet *cards); size_t ProcessAllCards() REQUIRES(loop_lock_); @@ -177,12 +169,12 @@ private: GC *gc_ {nullptr}; PandaVM *vm_ {nullptr}; + GCG1BarrierSet *barriers_; CardTable *card_table_ {nullptr}; - GCG1BarrierSet::ThreadLocalCardQueues *queue_ GUARDED_BY(queue_lock_) {nullptr}; - os::memory::Mutex *queue_lock_ {nullptr}; PandaUnorderedSet cards_; + PandaUnorderedSet old_region_cards_; PandaVector *invalidate_regions_ GUARDED_BY(loop_lock_) {nullptr}; - PandaVector post_barrier_buffers_ GUARDED_BY(post_barrier_buffers_lock_); + PandaVector *> post_barrier_buffers_ GUARDED_BY(post_barrier_buffers_lock_); os::memory::Mutex post_barrier_buffers_lock_; /* diff --git a/runtime/mem/gc/gc_barrier_set.cpp b/runtime/mem/gc/gc_barrier_set.cpp index 2d7a927b6..b8c16d4ed 100644 --- a/runtime/mem/gc/gc_barrier_set.cpp +++ b/runtime/mem/gc/gc_barrier_set.cpp @@ -178,10 +178,7 @@ void GCG1BarrierSet::Invalidate(const void *begin, const void *last) void GCG1BarrierSet::Enqueue(CardTable::CardPtr card) { auto *thread = ManagedThread::GetCurrent(); - if (thread == nullptr) { // slow path via shared-queue for VM threads: gc/compiler/etc - os::memory::LockHolder lock(*queue_lock_); - updated_refs_queue_->push_back(card); - } else { + if (thread != nullptr) { // general fast-path for mutators ASSERT(thread->GetPreBuff() != nullptr); // write barrier cant be called after Terminate auto *buffer = thread->GetG1PostBarrierBuffer(); @@ -195,8 +192,22 @@ void GCG1BarrierSet::Enqueue(CardTable::CardPtr card) } // After 2 unsuccessfull pushing, we see that current buffer still full // so, reuse shared buffer - os::memory::LockHolder lock(*queue_lock_); - updated_refs_queue_->push_back(card); } + + // slow path via shared-queue for VM threads: gc/compiler/etc + os::memory::LockHolder lock(shared_cards_queue_lock_); + EnqueueToShared(card); +} + +void GCG1BarrierSet::EnqueueToShared(CardTable::CardPtr card) +{ + shared_cards_queue_.push_back(card); +} + +void GCG1BarrierSet::Dump(PandaUnorderedSet *cards) +{ + os::memory::LockHolder lock(shared_cards_queue_lock_); + std::copy(shared_cards_queue_.begin(), shared_cards_queue_.end(), std::inserter(*cards, cards->end())); + shared_cards_queue_.clear(); } } // namespace panda::mem diff --git a/runtime/mem/gc/gc_barrier_set.h b/runtime/mem/gc/gc_barrier_set.h index 50348c540..3eb48f4e6 100644 --- a/runtime/mem/gc/gc_barrier_set.h +++ b/runtime/mem/gc/gc_barrier_set.h @@ -199,7 +199,7 @@ private: class GCG1BarrierSet : public GCBarrierSet { public: - using ThreadLocalCardQueues = PandaVector; + using SharedCardsQueue = PandaVector; static constexpr size_t G1_POST_BARRIER_RING_BUFFER_SIZE = 1024 * 8; using G1PostBarrierRingBufferType = mem::LockFreeBuffer; @@ -207,16 +207,13 @@ public: // PRE ARGS: ObjRefProcessFunc pre_store_func, // POST ARGS: - ObjTwoRefProcessFunc post_func, uint8_t region_size_bits_count, CardTable *card_table, - ThreadLocalCardQueues *updated_refs_queue, os::memory::Mutex *queue_lock) + ObjTwoRefProcessFunc post_func, uint8_t region_size_bits_count, CardTable *card_table) : GCBarrierSet(allocator, BarrierType::PRE_SATB_BARRIER, BarrierType::POST_INTERREGION_BARRIER), pre_store_func_(pre_store_func), post_func_(post_func), region_size_bits_count_(region_size_bits_count), card_table_(card_table), - min_addr_(ToVoidPtr(card_table->GetMinAddress())), - updated_refs_queue_(updated_refs_queue), - queue_lock_(queue_lock) + min_addr_(ToVoidPtr(card_table->GetMinAddress())) { ASSERT(pre_store_func_ != nullptr); ASSERT(post_func_ != nullptr); @@ -250,6 +247,10 @@ public: void Enqueue(CardTable::CardPtr card); + void EnqueueToShared(CardTable::CardPtr card); + + void Dump(PandaUnorderedSet *cards); + ~GCG1BarrierSet() override = default; CardTable *GetCardTable() const @@ -275,8 +276,8 @@ private: CardTable *card_table_ {nullptr}; /// Minimal address used by VM. Used as a base for card index calculation void *min_addr_ {nullptr}; - ThreadLocalCardQueues *updated_refs_queue_; - os::memory::Mutex *queue_lock_; + os::memory::Mutex shared_cards_queue_lock_; + SharedCardsQueue shared_cards_queue_ GUARDED_BY(shared_cards_queue_lock_); }; } // namespace panda::mem -- Gitee From b25dab67763b2868b69101ed65b93fc1bd8d7485 Mon Sep 17 00:00:00 2001 From: rjgask Date: Thu, 28 Sep 2023 06:51:04 +0000 Subject: [PATCH 2/7] split queue --- runtime/mem/gc/g1/g1-gc.cpp | 10 +++++++--- runtime/mem/gc/g1/ref_updater.h | 2 +- runtime/mem/gc/g1/update_remset_thread.cpp | 11 +++++++---- runtime/mem/gc/g1/update_remset_thread.h | 1 + runtime/mem/gc/gc_barrier_set.cpp | 12 ++++++++++++ runtime/mem/gc/gc_barrier_set.h | 7 ++++++- 6 files changed, 34 insertions(+), 9 deletions(-) diff --git a/runtime/mem/gc/g1/g1-gc.cpp b/runtime/mem/gc/g1/g1-gc.cpp index f5e096e17..83819d1ae 100644 --- a/runtime/mem/gc/g1/g1-gc.cpp +++ b/runtime/mem/gc/g1/g1-gc.cpp @@ -649,7 +649,7 @@ void G1GC::WorkerTaskProcessing(GCWorkersTask *task, [[maybe_unu os::memory::LockHolder lock(gc_worker_queue_lock_); auto *barriers = GetG1BarrierSet(); for (auto card : *task_updated_refs_queue) { - barriers->EnqueueToShared(card); + barriers->EnqueueToOld(card); } } this->GetInternalAllocator()->Delete(moved_objects_range); @@ -1070,7 +1070,11 @@ void G1GC::RunGC(GCTask &task, const CollectionSet &collectible_ { // TODO(bwx983476) Measure only those that are on pause time::Timer timer(&young_pause_time, true); - HandlePendingDirtyCards(); + auto is_mixed = collectible_regions.Tenured().size() > 0; + ProcessDirtyCards(); + if (is_mixed) { + HandlePendingDirtyCards(); + } MemRange dirty_cards_range = MixedMarkAndCacheRefs(task, collectible_regions); ClearDirtyAndYoungCards(dirty_cards_range); CollectAndMove(collectible_regions); @@ -1982,7 +1986,7 @@ void G1GC::ReenqueueDirtyCards() auto *barriers = GetG1BarrierSet(); std::for_each(dirty_cards_.cbegin(), dirty_cards_.cend(), [barriers](auto *card) { card->Mark(); - barriers->EnqueueToShared(card); + barriers->EnqueueToOld(card); }); dirty_cards_.clear(); } diff --git a/runtime/mem/gc/g1/ref_updater.h b/runtime/mem/gc/g1/ref_updater.h index c22fc04dd..2e8e2f3dd 100644 --- a/runtime/mem/gc/g1/ref_updater.h +++ b/runtime/mem/gc/g1/ref_updater.h @@ -116,7 +116,7 @@ public: protected: void Enqueue(CardTable::CardPtr card) const override { - barriers_->Enqueue(card); + barriers_->EnqueueToOld(card); } private: diff --git a/runtime/mem/gc/g1/update_remset_thread.cpp b/runtime/mem/gc/g1/update_remset_thread.cpp index d55e8bd65..3162956ca 100644 --- a/runtime/mem/gc/g1/update_remset_thread.cpp +++ b/runtime/mem/gc/g1/update_remset_thread.cpp @@ -206,6 +206,12 @@ void UpdateRemsetThread::FillFromQueue(PandaUnorderedSetDump(cards); } +template +void UpdateRemsetThread::FillFromOldQueue(PandaUnorderedSet *cards) +{ + barriers_->DumpOld(cards); +} + template void UpdateRemsetThread::FillFromThreads(PandaUnorderedSet *cards) { @@ -325,10 +331,7 @@ void UpdateRemsetThread::DrainAllCards(PandaUnorderedSet *cards) REQUIRES(loop_lock_); void FillFromQueue(PandaUnorderedSet *cards) REQUIRES(loop_lock_); + void FillFromOldQueue(PandaUnorderedSet *cards) REQUIRES(loop_lock_); void FillFromThreads(PandaUnorderedSet *cards) REQUIRES(loop_lock_); void FillFromPostBarrierBuffers(PandaUnorderedSet *cards); void FillFromPostBarrierBuffer(GCG1BarrierSet::G1PostBarrierRingBufferType *post_wrb, diff --git a/runtime/mem/gc/gc_barrier_set.cpp b/runtime/mem/gc/gc_barrier_set.cpp index b8c16d4ed..0d6813bdc 100644 --- a/runtime/mem/gc/gc_barrier_set.cpp +++ b/runtime/mem/gc/gc_barrier_set.cpp @@ -204,10 +204,22 @@ void GCG1BarrierSet::EnqueueToShared(CardTable::CardPtr card) shared_cards_queue_.push_back(card); } +void GCG1BarrierSet::EnqueueToOld(CardTable::CardPtr card) +{ + old_region_cards_queue_.push_back(card); +} + void GCG1BarrierSet::Dump(PandaUnorderedSet *cards) { os::memory::LockHolder lock(shared_cards_queue_lock_); std::copy(shared_cards_queue_.begin(), shared_cards_queue_.end(), std::inserter(*cards, cards->end())); shared_cards_queue_.clear(); } + +void GCG1BarrierSet::DumpOld(PandaUnorderedSet *cards) +{ + os::memory::LockHolder lock(shared_cards_queue_lock_); + std::copy(old_region_cards_queue_.begin(), old_region_cards_queue_.end(), std::inserter(*cards, cards->end())); + old_region_cards_queue_.clear(); +} } // namespace panda::mem diff --git a/runtime/mem/gc/gc_barrier_set.h b/runtime/mem/gc/gc_barrier_set.h index 3eb48f4e6..2b9303fd2 100644 --- a/runtime/mem/gc/gc_barrier_set.h +++ b/runtime/mem/gc/gc_barrier_set.h @@ -249,8 +249,12 @@ public: void EnqueueToShared(CardTable::CardPtr card); + void EnqueueToOld(CardTable::CardPtr card); + void Dump(PandaUnorderedSet *cards); + void DumpOld(PandaUnorderedSet *cards); + ~GCG1BarrierSet() override = default; CardTable *GetCardTable() const @@ -277,7 +281,8 @@ private: /// Minimal address used by VM. Used as a base for card index calculation void *min_addr_ {nullptr}; os::memory::Mutex shared_cards_queue_lock_; - SharedCardsQueue shared_cards_queue_ GUARDED_BY(shared_cards_queue_lock_); + SharedCardsQueue shared_cards_queue_; + SharedCardsQueue old_region_cards_queue_; }; } // namespace panda::mem -- Gitee From 6c5282e34f5c8a6488de53b8f6c049d51681b95f Mon Sep 17 00:00:00 2001 From: rjgask Date: Thu, 28 Sep 2023 08:29:20 +0000 Subject: [PATCH 3/7] fix remark --- runtime/mem/gc/g1/g1-gc.cpp | 15 ++++++--------- runtime/mem/gc/g1/g1-gc.h | 2 +- runtime/mem/gc/g1/update_remset_thread.cpp | 17 ++++++++++------- runtime/mem/gc/g1/update_remset_thread.h | 4 ++-- 4 files changed, 19 insertions(+), 19 deletions(-) diff --git a/runtime/mem/gc/g1/g1-gc.cpp b/runtime/mem/gc/g1/g1-gc.cpp index 83819d1ae..ea0fd270e 100644 --- a/runtime/mem/gc/g1/g1-gc.cpp +++ b/runtime/mem/gc/g1/g1-gc.cpp @@ -918,10 +918,10 @@ bool G1GC::HaveGarbageRegions(const PandaPriorityQueue -void G1GC::ProcessDirtyCards() +void G1GC::ProcessDirtyCards(bool process_old_cards) { ScopedTiming t(__FUNCTION__, *this->GetTiming()); - update_remset_thread_->GCProcessCards(); + update_remset_thread_->GCProcessCards(process_old_cards); } template @@ -1071,10 +1071,7 @@ void G1GC::RunGC(GCTask &task, const CollectionSet &collectible_ // TODO(bwx983476) Measure only those that are on pause time::Timer timer(&young_pause_time, true); auto is_mixed = collectible_regions.Tenured().size() > 0; - ProcessDirtyCards(); - if (is_mixed) { - HandlePendingDirtyCards(); - } + ProcessDirtyCards(is_mixed); MemRange dirty_cards_range = MixedMarkAndCacheRefs(task, collectible_regions); ClearDirtyAndYoungCards(dirty_cards_range); CollectAndMove(collectible_regions); @@ -1162,7 +1159,7 @@ MemRange G1GC::MixedMarkAndCacheRefs(const GCTask &task, const C analytics_.ReportMarkingEnd(panda::time::GetCurrentTimeInNanos()); // HandleReferences could write a new barriers - so we need to handle them before moving - ProcessDirtyCards(); + ProcessDirtyCards(collectible_regions.Tenured().size() > 0); return dirty_cards_range; } @@ -1466,7 +1463,7 @@ void G1GC::FullMarking(panda::GCTask &task) } } // Force card updater here, after swapping bitmap, to skip dead objects - ProcessDirtyCards(); + ProcessDirtyCards(true); auto garbage_regions = GetG1ObjectAllocator()->template GetTopGarbageRegions(); auto empty_tenured_regions = GetEmptyTenuredRegularRegionsFromQueue(std::move(garbage_regions)); CollectEmptyRegions(task, &empty_tenured_regions); @@ -1642,7 +1639,7 @@ void G1GC::Remark(panda::GCTask const &task) } } // Force card updater here, after swapping bitmap, to skip dead objects - ProcessDirtyCards(); + ProcessDirtyCards(true); } template diff --git a/runtime/mem/gc/g1/g1-gc.h b/runtime/mem/gc/g1/g1-gc.h index c0a9686c9..90c01b3b8 100644 --- a/runtime/mem/gc/g1/g1-gc.h +++ b/runtime/mem/gc/g1/g1-gc.h @@ -159,7 +159,7 @@ protected: private: void WaitForUpdateRemsetThread(); - void ProcessDirtyCards(); + void ProcessDirtyCards(bool process_old_cads); bool HaveGarbageRegions(); bool HaveGarbageRegions(const PandaPriorityQueue> ®ions); diff --git a/runtime/mem/gc/g1/update_remset_thread.cpp b/runtime/mem/gc/g1/update_remset_thread.cpp index 3162956ca..7af12eee8 100644 --- a/runtime/mem/gc/g1/update_remset_thread.cpp +++ b/runtime/mem/gc/g1/update_remset_thread.cpp @@ -123,7 +123,7 @@ void UpdateRemsetThread::WaitUntilTasksEnd() while (pause_thread_) { // runtime is destroying, handle all refs anyway for now if (stop_thread_ || update_thread_ == nullptr) { - ProcessAllCards(); // Process all cards inside gc + ProcessAllCards(false); // Process all cards inside gc pause_thread_ = false; break; } @@ -135,7 +135,7 @@ void UpdateRemsetThread::WaitUntilTasksEnd() os::memory::LockHolder holder(loop_lock_); // we will handle all remsets even when thread is stopped (we are trying to destroy Runtime, but it's the last // GC), try to eliminate it in the future for faster shutdown - ProcessAllCards(); // Process all cards inside gc + ProcessAllCards(false); // Process all cards inside gc pause_thread_ = false; } stats_.PrintStats(); @@ -162,7 +162,7 @@ void UpdateRemsetThread::ThreadLoop() // gc is waiting for us to handle all updates // possible improvements: let GC thread to help us to handle elements in queue in parallel, instead of // waiting - ProcessAllCards(); // Process all cards inside gc + ProcessAllCards(false); // Process all cards inside gc pause_thread_ = false; thread_cond_var_.Signal(); // notify GC thread that we processed all updates thread_cond_var_.Wait(&loop_lock_); // let WaitUntilTasksEnd to finish @@ -183,7 +183,7 @@ void UpdateRemsetThread::ThreadLoop() continue; } ASSERT(!paused_by_gc_thread_); - auto processed_cards = ProcessAllCards(); + auto processed_cards = ProcessAllCards(false); if (processed_cards < min_concurrent_cards_to_process_) { Sleep(); @@ -301,11 +301,14 @@ private: }; template -size_t UpdateRemsetThread::ProcessAllCards() +size_t UpdateRemsetThread::ProcessAllCards(bool process_all_cards) { FillFromQueue(&cards_); FillFromThreads(&cards_); FillFromPostBarrierBuffers(&cards_); + if (process_all_cards) { + barriers_->DumpOld(&cards_); + } if (!cards_.empty()) { LOG(DEBUG, GC) << "Remset thread started process: " << cards_.size() << " cards"; } @@ -357,11 +360,11 @@ void UpdateRemsetThread::SuspendThread() } template -void UpdateRemsetThread::GCProcessCards() +void UpdateRemsetThread::GCProcessCards(bool process_all_cards) { ASSERT(gc_pause_thread_); os::memory::LockHolder holder(loop_lock_); - ProcessAllCards(); + ProcessAllCards(process_all_cards); } template diff --git a/runtime/mem/gc/g1/update_remset_thread.h b/runtime/mem/gc/g1/update_remset_thread.h index 639227cd8..b4a49a00e 100644 --- a/runtime/mem/gc/g1/update_remset_thread.h +++ b/runtime/mem/gc/g1/update_remset_thread.h @@ -142,7 +142,7 @@ public: * Process all cards in the GC thread. * Can be called only if UpdateRemsetThread is suspended */ - void GCProcessCards(); + void GCProcessCards(bool process_all_cards); /** * Invalidate regions in the GC thread * Can be called only if UpdateRemsetThread is suspended @@ -160,7 +160,7 @@ private: void FillFromPostBarrierBuffer(PandaVector *post_wrb, PandaUnorderedSet *cards); - size_t ProcessAllCards() REQUIRES(loop_lock_); + size_t ProcessAllCards(bool process_old_cards) REQUIRES(loop_lock_); void Sleep() REQUIRES(loop_lock_) { -- Gitee From ebbf9535363552becd09dbd9d4d5cb380303ed4a Mon Sep 17 00:00:00 2001 From: rjgask Date: Thu, 28 Sep 2023 12:58:21 +0000 Subject: [PATCH 4/7] fix old regions cards --- runtime/mem/gc/g1/g1-gc.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/runtime/mem/gc/g1/g1-gc.cpp b/runtime/mem/gc/g1/g1-gc.cpp index ea0fd270e..abcb24682 100644 --- a/runtime/mem/gc/g1/g1-gc.cpp +++ b/runtime/mem/gc/g1/g1-gc.cpp @@ -1070,8 +1070,11 @@ void G1GC::RunGC(GCTask &task, const CollectionSet &collectible_ { // TODO(bwx983476) Measure only those that are on pause time::Timer timer(&young_pause_time, true); - auto is_mixed = collectible_regions.Tenured().size() > 0; + auto is_mixed = collectible_regions.size() > collectible_regions.Young().size(); ProcessDirtyCards(is_mixed); + if (!is_mixed) { + HandlePendingDirtyCards(); + } MemRange dirty_cards_range = MixedMarkAndCacheRefs(task, collectible_regions); ClearDirtyAndYoungCards(dirty_cards_range); CollectAndMove(collectible_regions); @@ -1159,7 +1162,7 @@ MemRange G1GC::MixedMarkAndCacheRefs(const GCTask &task, const C analytics_.ReportMarkingEnd(panda::time::GetCurrentTimeInNanos()); // HandleReferences could write a new barriers - so we need to handle them before moving - ProcessDirtyCards(collectible_regions.Tenured().size() > 0); + ProcessDirtyCards(true); return dirty_cards_range; } @@ -2092,7 +2095,6 @@ MemRange G1GC::CacheRefsFromRemsets(const MemRangeRefsChecker &r analytics_.ReportScanRemsetEnd(panda::time::GetCurrentTimeInNanos()); if (!this->IsFullGC()) { - CacheRefsFromDirtyCards(visitor); #ifndef NDEBUG unique_cards_initialized_ = true; #endif // NDEBUG -- Gitee From d094845e4d9396c5043f9a9aee07af3821fa812a Mon Sep 17 00:00:00 2001 From: rjgask Date: Mon, 2 Oct 2023 10:23:50 +0000 Subject: [PATCH 5/7] do not mark old cards --- runtime/mem/gc/g1/g1-gc.cpp | 4 +++- runtime/mem/gc/gc_barrier_set.cpp | 7 +++++++ runtime/mem/gc/gc_barrier_set.h | 2 ++ 3 files changed, 12 insertions(+), 1 deletion(-) diff --git a/runtime/mem/gc/g1/g1-gc.cpp b/runtime/mem/gc/g1/g1-gc.cpp index abcb24682..47bc63a51 100644 --- a/runtime/mem/gc/g1/g1-gc.cpp +++ b/runtime/mem/gc/g1/g1-gc.cpp @@ -649,6 +649,7 @@ void G1GC::WorkerTaskProcessing(GCWorkersTask *task, [[maybe_unu os::memory::LockHolder lock(gc_worker_queue_lock_); auto *barriers = GetG1BarrierSet(); for (auto card : *task_updated_refs_queue) { + // do not mark barriers->EnqueueToOld(card); } } @@ -1410,6 +1411,7 @@ void G1GC::UpdateRefsToMovedObjects(MovedObjectsContainer ex-cset references. END. ==="; } + GetG1BarrierSet()->ClearOldCards(); this->CommonUpdateRefsToMovedObjects(); } @@ -1985,7 +1987,7 @@ void G1GC::ReenqueueDirtyCards() ScopedTiming t(__FUNCTION__, *this->GetTiming()); auto *barriers = GetG1BarrierSet(); std::for_each(dirty_cards_.cbegin(), dirty_cards_.cend(), [barriers](auto *card) { - card->Mark(); + // do not mark barriers->EnqueueToOld(card); }); dirty_cards_.clear(); diff --git a/runtime/mem/gc/gc_barrier_set.cpp b/runtime/mem/gc/gc_barrier_set.cpp index 0d6813bdc..4ac6e36de 100644 --- a/runtime/mem/gc/gc_barrier_set.cpp +++ b/runtime/mem/gc/gc_barrier_set.cpp @@ -222,4 +222,11 @@ void GCG1BarrierSet::DumpOld(PandaUnorderedSet *cards) std::copy(old_region_cards_queue_.begin(), old_region_cards_queue_.end(), std::inserter(*cards, cards->end())); old_region_cards_queue_.clear(); } + +void GCG1BarrierSet::ClearOldCards() +{ + for (auto *card : old_region_cards_queue_) { + card->Clear(); + } +} } // namespace panda::mem diff --git a/runtime/mem/gc/gc_barrier_set.h b/runtime/mem/gc/gc_barrier_set.h index 2b9303fd2..10c303aef 100644 --- a/runtime/mem/gc/gc_barrier_set.h +++ b/runtime/mem/gc/gc_barrier_set.h @@ -255,6 +255,8 @@ public: void DumpOld(PandaUnorderedSet *cards); + void ClearOldCards(); + ~GCG1BarrierSet() override = default; CardTable *GetCardTable() const -- Gitee From b66579484a22388e45325d57212285ac4654ab13 Mon Sep 17 00:00:00 2001 From: rjgask Date: Mon, 2 Oct 2023 10:56:07 +0000 Subject: [PATCH 6/7] revert cards defering --- runtime/mem/gc/g1/g1-gc.cpp | 43 ++++++++++++++++------ runtime/mem/gc/g1/g1-gc.h | 7 ++++ runtime/mem/gc/g1/update_remset_thread.cpp | 9 ++++- runtime/mem/gc/g1/update_remset_thread.h | 2 +- 4 files changed, 47 insertions(+), 14 deletions(-) diff --git a/runtime/mem/gc/g1/g1-gc.cpp b/runtime/mem/gc/g1/g1-gc.cpp index 47bc63a51..91c790926 100644 --- a/runtime/mem/gc/g1/g1-gc.cpp +++ b/runtime/mem/gc/g1/g1-gc.cpp @@ -1071,11 +1071,7 @@ void G1GC::RunGC(GCTask &task, const CollectionSet &collectible_ { // TODO(bwx983476) Measure only those that are on pause time::Timer timer(&young_pause_time, true); - auto is_mixed = collectible_regions.size() > collectible_regions.Young().size(); - ProcessDirtyCards(is_mixed); - if (!is_mixed) { - HandlePendingDirtyCards(); - } + HandlePendingDirtyCards(); MemRange dirty_cards_range = MixedMarkAndCacheRefs(task, collectible_regions); ClearDirtyAndYoungCards(dirty_cards_range); CollectAndMove(collectible_regions); @@ -1977,8 +1973,9 @@ template void G1GC::HandlePendingDirtyCards() { ScopedTiming t(__FUNCTION__, *this->GetTiming()); - update_remset_thread_->DrainAllCards(&dirty_cards_); - std::for_each(dirty_cards_.cbegin(), dirty_cards_.cend(), [](auto card) { card->Clear(); }); + update_remset_thread_->DrainAllCards(&dirty_cards_, &old_dirty_cards_); + std::for_each(dirty_cards_.cbegin(), dirty_cards_.cend(), [](auto *card) { card->Clear(); }); + std::for_each(old_dirty_cards_.cbegin(), old_dirty_cards_.cend(), [](auto *card) { card->Clear(); }); } template @@ -1987,10 +1984,15 @@ void G1GC::ReenqueueDirtyCards() ScopedTiming t(__FUNCTION__, *this->GetTiming()); auto *barriers = GetG1BarrierSet(); std::for_each(dirty_cards_.cbegin(), dirty_cards_.cend(), [barriers](auto *card) { - // do not mark + card->Mark(); + barriers->EnqueueToShared(card); + }); + std::for_each(old_dirty_cards_.cbegin(), old_dirty_cards_.cend(), [barriers](auto *card) { + // do not mark old cards barriers->EnqueueToOld(card); }); dirty_cards_.clear(); + old_dirty_cards_.clear(); } template @@ -2097,6 +2099,10 @@ MemRange G1GC::CacheRefsFromRemsets(const MemRangeRefsChecker &r analytics_.ReportScanRemsetEnd(panda::time::GetCurrentTimeInNanos()); if (!this->IsFullGC()) { + CacheRefsFromDirtyCards(visitor); + if (collection_set_.size() > collection_set_.Young().size()) { + CacheRefsFromOldDirtyCards(visitor); + } #ifndef NDEBUG unique_cards_initialized_ = true; #endif // NDEBUG @@ -2112,9 +2118,24 @@ template void G1GC::CacheRefsFromDirtyCards(Visitor visitor) { ScopedTiming t(__FUNCTION__, *this->GetTiming()); + CacheRefsFromDirtyCards(visitor, &dirty_cards_); +} + +template +template +void G1GC::CacheRefsFromOldDirtyCards(Visitor visitor) +{ + ScopedTiming t(__FUNCTION__, *this->GetTiming()); + CacheRefsFromDirtyCards(visitor, &old_dirty_cards_); +} + +template +template +void G1GC::CacheRefsFromDirtyCards(Visitor visitor, PandaUnorderedSet *dirty_cards) +{ auto card_table = this->GetCardTable(); constexpr size_t MEM_SIZE = DEFAULT_REGION_SIZE / RemSet<>::Bitmap::GetNumBits(); - for (auto it = dirty_cards_.cbegin(); it != dirty_cards_.cend();) { + for (auto it = dirty_cards->cbegin(); it != dirty_cards->cend();) { auto range = card_table->GetMemoryRange(*it); auto addr = range.GetStartAddress(); ASSERT_DO(IsHeapSpace(PoolManager::GetMmapMemPool()->GetSpaceTypeForAddr(ToVoidPtr(addr))), @@ -2122,7 +2143,7 @@ void G1GC::CacheRefsFromDirtyCards(Visitor visitor) auto end_addr = range.GetEndAddress(); auto region = panda::mem::AddrToRegion(ToVoidPtr(addr)); if (!RemsetRegionPredicate(region)) { - it = dirty_cards_.erase(it); + it = dirty_cards->erase(it); continue; } @@ -2134,7 +2155,7 @@ void G1GC::CacheRefsFromDirtyCards(Visitor visitor) addr += MEM_SIZE; } if (all_cross_region_refs_processed) { - it = dirty_cards_.erase(it); + it = dirty_cards->erase(it); continue; } ++it; diff --git a/runtime/mem/gc/g1/g1-gc.h b/runtime/mem/gc/g1/g1-gc.h index 90c01b3b8..6a64e8237 100644 --- a/runtime/mem/gc/g1/g1-gc.h +++ b/runtime/mem/gc/g1/g1-gc.h @@ -192,6 +192,12 @@ private: template void CacheRefsFromDirtyCards(Visitor visitor); + template + void CacheRefsFromOldDirtyCards(Visitor visitor); + + template + void CacheRefsFromDirtyCards(Visitor visitor, PandaUnorderedSet *dirty_cards); + void InitializeImpl() override; bool NeedFullGC(const panda::GCTask &task); @@ -449,6 +455,7 @@ private: // Dirty cards which are not fully processed before collection. // These cards are processed later. PandaUnorderedSet dirty_cards_; + PandaUnorderedSet old_dirty_cards_; #ifndef NDEBUG bool unique_cards_initialized_ = false; #endif // NDEBUG diff --git a/runtime/mem/gc/g1/update_remset_thread.cpp b/runtime/mem/gc/g1/update_remset_thread.cpp index 7af12eee8..c4d007b6a 100644 --- a/runtime/mem/gc/g1/update_remset_thread.cpp +++ b/runtime/mem/gc/g1/update_remset_thread.cpp @@ -327,14 +327,19 @@ size_t UpdateRemsetThread::ProcessAllCards(bool process_all_card } template -void UpdateRemsetThread::DrainAllCards(PandaUnorderedSet *cards) +void UpdateRemsetThread::DrainAllCards(PandaUnorderedSet *cards, + PandaUnorderedSet *old_cards) { pause_thread_ = true; // Atomic with relaxed order reason: memory order is not required defer_cards_.store(true, std::memory_order_relaxed); os::memory::LockHolder holder(loop_lock_); - FillFromOldQueue(cards); + FillFromDefered(cards); + FillFromQueue(cards); + FillFromThreads(cards); + FillFromPostBarrierBuffers(cards); + FillFromOldQueue(old_cards); pause_thread_ = false; // Atomic with relaxed order reason: memory order is not required diff --git a/runtime/mem/gc/g1/update_remset_thread.h b/runtime/mem/gc/g1/update_remset_thread.h index b4a49a00e..4e44161fd 100644 --- a/runtime/mem/gc/g1/update_remset_thread.h +++ b/runtime/mem/gc/g1/update_remset_thread.h @@ -132,7 +132,7 @@ public: } // Interrupts card processing and returns all unprocessed cards - void DrainAllCards(PandaUnorderedSet *cards); + void DrainAllCards(PandaUnorderedSet *cards, PandaUnorderedSet *old_cards); /// Suspend UpdateRemsetThread to reduce CPU usage void SuspendThread(); -- Gitee From 7b21aa07531d42703eb3301262f1d7755c71af67 Mon Sep 17 00:00:00 2001 From: rjgask Date: Tue, 3 Oct 2023 10:39:22 +0000 Subject: [PATCH 7/7] handle old in remset thread --- runtime/mem/gc/g1/update_remset_thread.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/runtime/mem/gc/g1/update_remset_thread.cpp b/runtime/mem/gc/g1/update_remset_thread.cpp index c4d007b6a..f7f1c9d67 100644 --- a/runtime/mem/gc/g1/update_remset_thread.cpp +++ b/runtime/mem/gc/g1/update_remset_thread.cpp @@ -315,6 +315,16 @@ size_t UpdateRemsetThread::ProcessAllCards(bool process_all_card size_t cards_size = 0; RemsetCardHandler card_handler(card_table_, region_size_bits_, defer_cards_); + for (auto it = cards_.begin(); it != cards_.end();) { + if (!card_handler.Handle(*it)) { + return cards_size; + } + cards_size++; + + it = cards_.erase(it); + } + + barriers_->DumpOld(&cards_); for (auto it = cards_.begin(); it != cards_.end();) { if (!card_handler.Handle(*it)) { break; @@ -323,6 +333,13 @@ size_t UpdateRemsetThread::ProcessAllCards(bool process_all_card it = cards_.erase(it); } + + for (auto it = cards_.begin(); it != cards_.end(); ++it) { + barriers_->EnqueueToOld(*it); + } + + cards_.clear(); + return cards_size; } -- Gitee