diff --git a/common_components/heap/collector/trace_collector.cpp b/common_components/heap/collector/trace_collector.cpp index 21f1e9b69b5e0f8601336788ac5199a35046ece3..f0a65ff8694ccaf78e26a5dac0ee25a3d4def8c6 100755 --- a/common_components/heap/collector/trace_collector.cpp +++ b/common_components/heap/collector/trace_collector.cpp @@ -27,6 +27,74 @@ namespace common { const size_t TraceCollector::MAX_MARKING_WORK_SIZE = 16; // fork task if bigger const size_t TraceCollector::MIN_MARKING_WORK_SIZE = 8; // forbid forking task if smaller +template +class RunableTask : public common::Task { +public: + explicit RunableTask(Runable &&runable) : Task(0), fn_(std::forward(runable)) {} + bool Run(uint32_t) override + { + (void)(fn_()); + return true; + } + Runable fn_; +}; + +template +static inline std::unique_ptr CreateLambdaTask(T task) +{ + Task *res = new RunableTask>(std::move(task)); + return std::unique_ptr(res); +} + +template +class ArrayTaskDispatcher { +public: + explicit ArrayTaskDispatcher(CArrayList &&task, size_t batch = 1) + : tasks_(std::move(task)), index_(0), running_(0), batch_(batch) + { + } + void Dispatch(Taskpool *pool) + { + auto threadRun = [this]() { + RunImpl(); + std::unique_lock lock(mtx_); + running_--; + cv_.notify_one(); + }; + const int threads = pool->GetTotalThreadNum(); + running_ = threads; + for (int i = 0; i < threads; ++i) { + pool->PostTask(CreateLambdaTask(threadRun)); + } + } + void Wait() + { + RunImpl(); + std::unique_lock lock(mtx_); + cv_.wait(lock, [this]() { return running_ == 0; }); + } + auto &Tasks() { return tasks_; } + void SetTaskBatch(size_t batch) { batch_ = batch; } + +private: + void RunImpl() + { + const size_t size = tasks_.size(); + size_t i = index_.fetch_add(batch_, std::memory_order_relaxed); + for (; i < size; i = index_.fetch_add(batch_, std::memory_order_relaxed)) { + auto begin = &tasks_[i]; + auto end = begin + std::min(size - i, batch_); + callback(begin, end); + } + } + std::mutex mtx_; + std::condition_variable cv_; + CArrayList tasks_; + std::atomic_size_t index_; + size_t running_; + size_t batch_; +}; + void StaticRootTable::VisitRoots(const RefFieldVisitor& visitor) { std::lock_guard lock(gcRootsLock_); @@ -86,45 +154,6 @@ private: GlobalWorkStackQueue &globalQueue_; }; -class ClearWeakStackTask : public common::Task { -public: - ClearWeakStackTask(uint32_t id, TraceCollector &tc, Taskpool *pool, TaskPackMonitor &monitor, - GlobalWeakStackQueue &globalQueue) - : Task(id), collector_(tc), threadPool_(pool), monitor_(monitor), globalQueue_(globalQueue) - {} - - // single work task without thread pool - ClearWeakStackTask(uint32_t id, TraceCollector& tc, TaskPackMonitor &monitor, - GlobalWeakStackQueue &globalQueue) - : Task(id), collector_(tc), threadPool_(nullptr), monitor_(monitor), globalQueue_(globalQueue) - {} - - ~ClearWeakStackTask() override - { - threadPool_ = nullptr; - } - - // run concurrent marking task. - bool Run([[maybe_unused]] uint32_t threadIndex) override - { - while (true) { - WeakStack weakStack = globalQueue_.PopWorkStack(); - if (weakStack.empty()) { - break; - } - collector_.ProcessWeakStack(weakStack); - } - monitor_.NotifyFinishOne(); - return true; - } - -private: - TraceCollector &collector_; - Taskpool *threadPool_; - TaskPackMonitor &monitor_; - GlobalWeakStackQueue &globalQueue_; -}; - void TraceCollector::TryForkTask(Taskpool *threadPool, WorkStack &workStack, GlobalWorkStackQueue &globalQueue) { size_t size = workStack.size(); @@ -147,18 +176,19 @@ void TraceCollector::TryForkTask(Taskpool *threadPool, WorkStack &workStack, Glo } } -void TraceCollector::ProcessWeakStack(WeakStack &weakStack) +static void ClearWeakRef(WeakStack::value_type *begin, WeakStack::value_type *end) { - while (!weakStack.empty()) { - auto [fieldPointer, offset] = *weakStack.back(); - weakStack.pop_back(); + for (auto iter = begin; iter != end; ++iter) { + RefField<> *fieldPointer = iter->first; + size_t offset = iter->second; + ASSERT(fieldPointer != nullptr && (*iter = {nullptr, 0}).first == nullptr); // debug only. clear memory ASSERT_LOGF(offset % sizeof(RefField<>) == 0, "offset is not aligned"); RefField<> &field = reinterpret_cast&>(*fieldPointer); RefField<> oldField(field); BaseObject* targetObj = oldField.GetTargetObject(); - if (!Heap::IsHeapAddress(targetObj) || IsMarkedObject(targetObj) || + if (!Heap::IsHeapAddress(targetObj) || RegionSpace::IsMarkedObject(targetObj) || RegionSpace::IsNewObjectSinceTrace(targetObj)) { continue; } @@ -178,6 +208,13 @@ void TraceCollector::ProcessWeakStack(WeakStack &weakStack) } } +void TraceCollector::ProcessWeakStack(WeakStack &weakStack) +{ + auto begin = &weakStack[0]; + auto end = begin + weakStack.size(); + ClearWeakRef(begin, end); +} + void TraceCollector::ProcessMarkStack([[maybe_unused]] uint32_t threadIndex, Taskpool *threadPool, WorkStack &workStack, GlobalWorkStackQueue &globalQueue) { @@ -236,7 +273,7 @@ void TraceCollector::ProcessMarkStack([[maybe_unused]] uint32_t threadIndex, Tas void TraceCollector::MergeWeakStack(WeakStack& weakStack) { std::lock_guard lock(weakStackLock_); - globalWeakStack_.insert(weakStack); + globalWeakStack_.insert(globalWeakStack_.end(), weakStack.begin(), weakStack.end()); } void TraceCollector::EnumConcurrencyModelRoots(RootSet& rootSet) const @@ -335,23 +372,6 @@ bool TraceCollector::AddConcurrentTracingWork(WorkStack& workStack, GlobalWorkSt return true; } -bool TraceCollector::AddWeakStackClearWork(WeakStack &weakStack, - GlobalWeakStackQueue &globalQueue, - size_t threadCount) -{ - if (weakStack.size() <= threadCount * MIN_MARKING_WORK_SIZE) { - return false; // too less init tasks, which may lead to workload imbalance, add work rejected - } - DCHECK_CC(threadCount > 0); - const size_t chunkSize = std::min(weakStack.size() / threadCount + 1, MIN_MARKING_WORK_SIZE); - // Split the current work stack into work tasks. - while (!weakStack.empty()) { - WeakStackBuf *hSplit = weakStack.split(chunkSize); - globalQueue.AddWorkStack(WeakStack(hSplit)); - } - return true; -} - bool TraceCollector::PushRootToWorkStack(RootSet *workStack, BaseObject *obj) { RegionDesc *regionInfo = RegionDesc::GetAliveRegionDescAt(reinterpret_cast(obj)); @@ -437,34 +457,18 @@ void TraceCollector::ClearWeakStack(bool parallel) } Taskpool *threadPool = GetThreadPool(); ASSERT_LOGF(threadPool != nullptr, "thread pool is null"); - if (parallel) { - uint32_t parallelCount = GetGCThreadCount(true); - uint32_t threadCount = parallelCount + 1; - TaskPackMonitor monitor(parallelCount, parallelCount); - GlobalWeakStackQueue globalQueue; - for (uint32_t i = 0; i < parallelCount; ++i) { - threadPool->PostTask(std::make_unique(0, *this, threadPool, monitor, globalQueue)); - } - if (!AddWeakStackClearWork(globalWeakStack_, globalQueue, static_cast(threadCount))) { - ProcessWeakStack(globalWeakStack_); - } - bool exitLoop = false; - while (!exitLoop) { - WeakStack stack = globalQueue.DrainAllWorkStack(); - if (stack.empty()) { - exitLoop = true; - } - ProcessWeakStack(stack); - } - globalQueue.NotifyFinish(); - monitor.WaitAllFinished(); + constexpr size_t BATCH_N = 32; + if (parallel && globalWeakStack_.size() > BATCH_N) { + ArrayTaskDispatcher dispatcher(std::move(globalWeakStack_), BATCH_N); + dispatcher.Dispatch(threadPool); + dispatcher.Wait(); + globalWeakStack_ = std::move(dispatcher.Tasks()); // do not destroy globalWeakStack_ at here } else { ProcessWeakStack(globalWeakStack_); } } } - bool TraceCollector::MarkSatbBuffer(WorkStack& workStack) { OHOS_HITRACE(HITRACE_LEVEL_COMMERCIAL, "CMCGC::MarkSatbBuffer", ""); diff --git a/common_components/heap/collector/trace_collector.h b/common_components/heap/collector/trace_collector.h index 3f41128961cc611cf55ae837e6a0dcf7feb0ee77..74835f2989bbb2f5a1b1b855e506148c72f79937 100755 --- a/common_components/heap/collector/trace_collector.h +++ b/common_components/heap/collector/trace_collector.h @@ -107,10 +107,9 @@ class ConcurrentMarkingWork; using RootSet = MarkStack; using WorkStack = MarkStack; using WorkStackBuf = MarkStackBuffer; -using WeakStack = MarkStack*, size_t>>>; -using WeakStackBuf = MarkStackBuffer*, size_t>>>; using GlobalWorkStackQueue = GlobalStackQueue; -using GlobalWeakStackQueue = GlobalStackQueue; + +using WeakStack = CArrayList*, size_t>>; class TraceCollector : public Collector { friend MarkingWork; @@ -323,7 +322,6 @@ protected: void TracingImpl(WorkStack& workStack, bool parallel, bool Remark); bool AddConcurrentTracingWork(WorkStack& workStack, GlobalWorkStackQueue &globalQueue, size_t threadCount); - bool AddWeakStackClearWork(WeakStack& workStack, GlobalWeakStackQueue &globalQueue, size_t threadCount); private: void MarkRememberSetImpl(BaseObject* object, WorkStack& workStack); void ConcurrentRemark(WorkStack& remarkStack, bool parallel); diff --git a/common_components/heap/w_collector/w_collector.cpp b/common_components/heap/w_collector/w_collector.cpp index 5fbff3e5a212e90ec656d183960ab159eadfdb82..77422c92288262577c0d35369b51fa8401f027b5 100755 --- a/common_components/heap/w_collector/w_collector.cpp +++ b/common_components/heap/w_collector/w_collector.cpp @@ -159,8 +159,7 @@ static void TraceRefField(BaseObject *obj, RefField<> &field, WorkStack &workSta DLOG(TRACE, "trace: skip weak obj when full gc, object: %p@%p, targetObj: %p", obj, &field, targetObj); // weak ref is cleared after roots pre-forward, so there might be a to-version weak ref which also need to be // cleared, offset recorded here will help us find it - weakStack.push_back(std::make_shared*, size_t>>( - &field, reinterpret_cast(&field) - reinterpret_cast(obj))); + weakStack.emplace_back(&field, reinterpret_cast(&field) - reinterpret_cast(obj)); return; } @@ -644,6 +643,13 @@ void WCollector::PreforwardFlip() if (LIKELY_CC(allocBuffer != nullptr)) { allocBuffer->ClearRegions(); } + if (gcReason_ == GC_REASON_YOUNG || globalWeakStack_.empty()) { + return; + } + ASSERT(std::all_of(globalWeakStack_.begin(), globalWeakStack_.end(), + [](const WeakStack::value_type &pair) { return pair.first == nullptr; }) && + "weak ref must be clear by `TraceCollector::ClearWeakStack` !!"); + globalWeakStack_.clear(); } void WCollector::Preforward() diff --git a/ecmascript/ecma_vm.cpp b/ecmascript/ecma_vm.cpp index 6365f8406c13f7c6cb217bd5625d146bcd046dbf..de6ba1a4e83609ead9a057614a0628976f497bcb 100644 --- a/ecmascript/ecma_vm.cpp +++ b/ecmascript/ecma_vm.cpp @@ -913,6 +913,9 @@ void EcmaVM::CollectGarbage(TriggerGCType gcType, panda::ecmascript::GCReason re void EcmaVM::IterateConcurrentRoots(RootVisitor &v) { moduleManagers_.Iterate(v); + if (pgoProfiler_ != nullptr) { + pgoProfiler_->Iterate(v); + } } void EcmaVM::Iterate(RootVisitor &v) @@ -928,9 +931,6 @@ void EcmaVM::IterateSTWRoots(RootVisitor &v) v.VisitRangeRoot(Root::ROOT_VM, ObjectSlot(ToUintPtr(&internalNativeMethods_.front())), ObjectSlot(ToUintPtr(&internalNativeMethods_.back()) + JSTaggedValue::TaggedTypeSize())); } - if (pgoProfiler_ != nullptr) { - pgoProfiler_->Iterate(v); - } if (aotFileManager_) { aotFileManager_->Iterate(v); } diff --git a/ecmascript/mem/cmc_gc/hooks.cpp b/ecmascript/mem/cmc_gc/hooks.cpp index 8bceb8122c86c2c9ff25c3e6deddeb10a105404b..36e3b0094e1da2339fd272d462e097672718e850 100644 --- a/ecmascript/mem/cmc_gc/hooks.cpp +++ b/ecmascript/mem/cmc_gc/hooks.cpp @@ -132,17 +132,16 @@ void VisitDynamicLocalRoots(const RefFieldVisitor &visitorFunc) runtime->GCIterateThreadList([&](JSThread *thread) { auto vm = thread->GetEcmaVM(); ObjectXRay::VisitSTWVMRoots(vm, visitor); - - auto profiler = vm->GetPGOProfiler(); - if (profiler != nullptr) { - profiler->IteratePGOPreFuncList(visitor); - } }); if (!panda::ecmascript::g_isEnableCMCGCConcurrentRootMarking) { runtime->GCIterateThreadList([&](JSThread *thread) { auto vm = thread->GetEcmaVM(); ObjectXRay::VisitConcurrentVMRoots(vm, visitor); + auto profiler = vm->GetPGOProfiler(); + if (profiler != nullptr) { + profiler->IteratePGOPreFuncList(visitor); + } }); } } @@ -212,6 +211,10 @@ void VisitDynamicConcurrentRoots(const RefFieldVisitor &visitorFunc) runtime->GCIterateThreadList([&](JSThread *thread) { auto vm = thread->GetEcmaVM(); ObjectXRay::VisitConcurrentVMRoots(vm, visitor); + auto profiler = vm->GetPGOProfiler(); + if (profiler != nullptr) { + profiler->IteratePGOPreFuncList(visitor); + } }); } @@ -226,11 +229,10 @@ void VisitDynamicThreadRoot(const RefFieldVisitor &visitorFunc, void *vm) ObjectXRay::VisitSTWVMRoots(ecmaVm, visitor); if (!panda::ecmascript::g_isEnableCMCGCConcurrentRootMarking) { ObjectXRay::VisitConcurrentVMRoots(ecmaVm, visitor); - } - - auto profiler = ecmaVm->GetPGOProfiler(); - if (profiler != nullptr) { - profiler->IteratePGOPreFuncList(visitor); + auto profiler = ecmaVm->GetPGOProfiler(); + if (profiler != nullptr) { + profiler->IteratePGOPreFuncList(visitor); + } } } diff --git a/ecmascript/pgo_profiler/pgo_profiler.h b/ecmascript/pgo_profiler/pgo_profiler.h index c2247f91ce85b39c6d6a38ce8c459be576aa278f..35306c3457f39ba46fd6940ac5344578cebf6d26 100644 --- a/ecmascript/pgo_profiler/pgo_profiler.h +++ b/ecmascript/pgo_profiler/pgo_profiler.h @@ -31,6 +31,7 @@ #include "ecmascript/mem/native_area_allocator.h" #include "ecmascript/mem/region.h" #include "ecmascript/mem/visitor.h" +#include "ecmascript/mem/gc_root.h" #include "ecmascript/pgo_profiler/pgo_extra_profiler.h" #include "ecmascript/pgo_profiler/pgo_state.h" #include "ecmascript/pgo_profiler/pgo_utils.h" @@ -296,7 +297,7 @@ private: void SetValue(JSTaggedType value) { - value_ = value; + value_ = GCRoot(value); } void SetWorkList(WorkList* workList) @@ -314,9 +315,9 @@ private: return next_; } - JSTaggedType GetValue() const + JSTaggedType GetValue() { - return value_; + return value_.Read().GetRawData(); } uintptr_t GetValueAddr() const @@ -333,7 +334,7 @@ private: WorkList* workList_ {nullptr}; WorkNode* prev_ {nullptr}; WorkNode* next_ {nullptr}; - JSTaggedType value_ {JSTaggedValue::Undefined().GetRawData()}; + GCRoot value_; }; class WorkList {