diff --git a/compiler/optimizer/code_generator/codegen.cpp b/compiler/optimizer/code_generator/codegen.cpp index c20b7ceb3709a07732f9fd2bf30017255fb2613c..715e1cd9dc09d936ad1559922241f72e092674fa 100644 --- a/compiler/optimizer/code_generator/codegen.cpp +++ b/compiler/optimizer/code_generator/codegen.cpp @@ -1023,11 +1023,15 @@ void Codegen::CallIntrinsic(Inst *inst, RuntimeInterface::IntrinsicId id) } } -bool Codegen::EmitCallRuntimeCode(Inst *inst, EntrypointId id) +bool Codegen::EmitCallRuntimeCode(Inst *inst, EntrypointId id, Reg reg) { - MemRef entry(ThreadReg(), GetRuntime()->GetEntrypointTlsOffset(GetArch(), id)); auto encoder = GetEncoder(); - encoder->MakeCall(entry); + if (reg.IsValid()) { + encoder->MakeCall(reg); + } else { + MemRef entry(ThreadReg(), GetRuntime()->GetEntrypointTlsOffset(GetArch(), id)); + encoder->MakeCall(entry); + } SaveStateInst *save_state = (inst == nullptr || inst->IsSaveState()) ? static_cast(inst) : inst->GetSaveState(); @@ -1772,23 +1776,23 @@ void Codegen::CreatePreWRB(Inst *inst, MemRef mem, RegMask preserved, bool store } SCOPED_DISASM_STR(this, "Pre WRB"); ASSERT(barrier_type == panda::mem::BarrierType::PRE_SATB_BARRIER); - ScopedTmpReg tmp(enc); + ScopedTmpReg entrypoint_reg(enc, enc->IsLrAsTempRegEnabledAndReleased()); if (GetGraph()->IsOfflineCompilationMode()) { - GetEncoder()->EncodeLdr(tmp, false, MemRef(ThreadReg(), runtime->GetTlsConcurrentMarkingAddrOffset(GetArch()))); + GetEncoder()->EncodeLdr(entrypoint_reg, false, + MemRef(ThreadReg(), runtime->GetTlsConcurrentMarkingAddrOffset(GetArch()))); } else { - auto concurrent_marker = reinterpret_cast(GetBarrierOperandValue *>( - runtime, panda::mem::BarrierPosition::BARRIER_POSITION_PRE, "CONCURRENT_MARKING_ADDR")); - enc->EncodeMov(tmp, Imm(concurrent_marker)); - } - // Check marker - auto marker_mem = MemRef(tmp); - auto tmp_b = ConvertRegister(tmp.GetReg().GetId(), DataType::INT8); - enc->EncodeLdr(tmp_b, false, marker_mem); + auto pre_entrypoint_addr = reinterpret_cast(GetBarrierOperandValue *>( + runtime, panda::mem::BarrierPosition::BARRIER_POSITION_PRE, "PRE_WRITE_BARRIER_ADDR")); + enc->EncodeMov(entrypoint_reg, Imm(pre_entrypoint_addr)); + } + // Check entrypoint address + auto marker_mem = MemRef(entrypoint_reg); + enc->EncodeLdr(entrypoint_reg, false, marker_mem); auto label = GetEncoder()->CreateLabel(); - enc->EncodeJump(label, tmp_b, Condition::EQ); + enc->EncodeJump(label, entrypoint_reg, Condition::EQ); auto ref_type = inst->GetType() == DataType::REFERENCE ? DataType::GetIntTypeForReference(enc->GetArch()) : DataType::INT64; - auto tmp_ref = ConvertRegister(tmp.GetReg().GetId(), ref_type); + ScopedTmpReg tmp_ref(enc, ConvertDataType(ref_type, GetArch())); auto prev_offset = enc->GetCursorOffset(); // Load old value if (IsVolatileMemInst(inst)) { @@ -1800,7 +1804,7 @@ void Codegen::CreatePreWRB(Inst *inst, MemRef mem, RegMask preserved, bool store CheckObject(tmp_ref, label); auto [live_regs, live_vregs] = GetLiveRegisters(inst); live_regs |= preserved; - CallBarrier(live_regs, live_vregs, EntrypointId::PRE_WRB_FUNC_NO_BRIDGE, tmp_ref); + CallBarrier(live_regs, live_vregs, EntrypointId::PRE_WRB_FUNC_NO_BRIDGE, entrypoint_reg.GetReg(), tmp_ref); if (store_pair) { // store pair doesn't support index and scalar @@ -1817,7 +1821,7 @@ void Codegen::CreatePreWRB(Inst *inst, MemRef mem, RegMask preserved, bool store enc->EncodeLdr(tmp_ref, false, MemRef(mem.GetBase(), second_offset)); } CheckObject(tmp_ref, label); - CallBarrier(live_regs, live_vregs, EntrypointId::PRE_WRB_FUNC_NO_BRIDGE, tmp_ref); + CallBarrier(live_regs, live_vregs, EntrypointId::PRE_WRB_FUNC_NO_BRIDGE, entrypoint_reg.GetReg(), tmp_ref); } enc->BindLabel(label); } @@ -2027,7 +2031,7 @@ void Codegen::CreatePostInterRegionBarrier(Inst *inst, Reg base, Reg reg1, Reg r enc->EncodeJump(label, tmp, Condition::EQ); auto [live_regs, live_vregs] = GetLiveRegisters(inst); - CallBarrier(live_regs, live_vregs, EntrypointId::POST_WRB_UPDATE_CARD_FUNC_NO_BRIDGE, base, reg1); + CallBarrier(live_regs, live_vregs, EntrypointId::POST_WRB_UPDATE_CARD_FUNC_NO_BRIDGE, INVALID_REGISTER, base, reg1); enc->BindLabel(label); if (reg2.IsValid() && reg1 != reg2) { @@ -2040,7 +2044,8 @@ void Codegen::CreatePostInterRegionBarrier(Inst *inst, Reg base, Reg reg1, Reg r enc->EncodeXor(tmp, base, reg2); enc->EncodeShr(tmp, tmp, Imm(region_size_bit)); enc->EncodeJump(label1, tmp, Condition::EQ); - CallBarrier(live_regs, live_vregs, EntrypointId::POST_WRB_UPDATE_CARD_FUNC_NO_BRIDGE, base, reg2); + CallBarrier(live_regs, live_vregs, EntrypointId::POST_WRB_UPDATE_CARD_FUNC_NO_BRIDGE, INVALID_REGISTER, base, + reg2); enc->BindLabel(label1); } } diff --git a/compiler/optimizer/code_generator/codegen.h b/compiler/optimizer/code_generator/codegen.h index 476602a7221ca3cfd2828518ef3d189c87c0ff66..e767e4d51966fc39967c1e18d4caa6ba255d3944 100644 --- a/compiler/optimizer/code_generator/codegen.h +++ b/compiler/optimizer/code_generator/codegen.h @@ -385,11 +385,11 @@ public: // Creates call to IRtoC PostWrb Entrypoint. Online means JIT compilation -> we know GC type. void CreateOnlineIrtocPostWrb(Inst *inst, Reg base, Reg reg1, Reg reg2, bool check_object); template - void CallBarrier(RegMask live_regs, VRegMask live_vregs, EntrypointId id, Args &&...params) + void CallBarrier(RegMask live_regs, VRegMask live_vregs, EntrypointId id, Reg reg, Args &&...params) { SaveCallerRegisters(live_regs, live_vregs, true); FillCallParams(std::forward(params)...); - EmitCallRuntimeCode(nullptr, id); + EmitCallRuntimeCode(nullptr, id, reg); LoadCallerRegisters(live_regs, live_vregs, true); } @@ -519,7 +519,7 @@ protected: void EmitJump(const BasicBlock *bb); - bool EmitCallRuntimeCode(Inst *inst, EntrypointId id); + bool EmitCallRuntimeCode(Inst *inst, EntrypointId id, Reg reg = INVALID_REGISTER); void IntfInlineCachePass(ResolveVirtualInst *resolver, Reg method_reg, Reg tmp_reg, Reg obj_reg); diff --git a/compiler/optimizer/code_generator/encode.h b/compiler/optimizer/code_generator/encode.h index 70d5975234d843f38439bf5379c70f32c63bb5d4..a9063a1fbcec96e5dd8fb2fc6967e50be89887a6 100644 --- a/compiler/optimizer/code_generator/encode.h +++ b/compiler/optimizer/code_generator/encode.h @@ -1042,6 +1042,11 @@ public: return enable_lr_as_temp_reg_; } + bool IsLrAsTempRegEnabledAndReleased() + { + return IsLrAsTempRegEnabled() && IsScratchRegisterReleased(GetTarget().GetLinkReg()); + } + NO_COPY_SEMANTIC(Encoder); NO_MOVE_SEMANTIC(Encoder); @@ -1116,6 +1121,7 @@ public: ScopedTmpRegImpl(ScopedTmpRegImpl &&other) noexcept { + encoder_ = other.encoder_; reg_ = other.reg_; other.reg_ = Reg(); ASSERT(!other.reg_.IsValid()); diff --git a/compiler/optimizer/ir/runtime_interface.h b/compiler/optimizer/ir/runtime_interface.h index 2c2d205462ce07c2e1fc191e2408e2e333a0625c..69259b8077c17e0b84a75ef4a759c98f6ac3f33a 100644 --- a/compiler/optimizer/ir/runtime_interface.h +++ b/compiler/optimizer/ir/runtime_interface.h @@ -457,7 +457,7 @@ public: [[maybe_unused]] ::panda::mem::BarrierPosition barrier_position, [[maybe_unused]] std::string_view operand_name) const { - return ::panda::mem::BarrierOperand(::panda::mem::BarrierOperandType::BOOL_ADDRESS, false); + return ::panda::mem::BarrierOperand(::panda::mem::BarrierOperandType::PRE_WRITE_BARRIER_ADDRESS, false); } virtual uint32_t GetTlsGlobalObjectOffset([[maybe_unused]] Arch arch) const diff --git a/libpandabase/mem/gc_barrier.h b/libpandabase/mem/gc_barrier.h index 84ecc9101e1966a1c3cdfb6224e1be937ca3ccae..43ced046d8d5d150e6bda5fa892c475fbaa0f513 100644 --- a/libpandabase/mem/gc_barrier.h +++ b/libpandabase/mem/gc_barrier.h @@ -66,7 +66,7 @@ enum BarrierType : uint8_t { /** * Pre barrier for SATB. * Pseudocode: - * load CONCURRENT_MARKING_ADDR -> concurrent_marking + * load PRE_WRITE_BARRIER_ADDR -> concurrent_marking * if (UNLIKELY(concurrent_marking)) { * load obj.field -> pre_val // note: if store volatile - we need to have volatile load here * if (pre_val != nullptr) { @@ -76,7 +76,8 @@ enum BarrierType : uint8_t { * store obj.field <- new_val // STORE for which barrier generated * * Runtime should provide these parameters: - * CONCURRENT_MARKING_ADDR - address of bool flag which indicates that we have concurrent marking on + * PRE_WRITE_BARRIER_ADDR - address of pre WRB function (!= nullptr) ---> indicates that we have concurrent + * marking on * STORE_IN_BUFF_TO_MARK_FUNC - address of function to store replaced reference */ PRE_SATB_BARRIER = EncodeBarrierType(2U, BarrierPosition::BARRIER_POSITION_PRE, BarrierActionType::WRITE_BARRIER), @@ -167,15 +168,16 @@ using ObjTwoRefProcessFunc = void (*)(const void *, const void *); enum class BarrierOperandType { ADDRESS = 0, // just an address (void*) - BOOL_ADDRESS, // contains address of bool value (bool*) + PRE_WRITE_BARRIER_ADDRESS, // contains nullptr or address of function (during concurrent marking) with this + // sig: void foo(void* ); UINT8_ADDRESS, // contains address of uint8_t value FUNC_WITH_OBJ_REF_ADDRESS, // contains address of function with this sig: void foo(void* ); UINT8_LITERAL, // contains uint8_t value FUNC_WITH_TWO_OBJ_REF_ADDRESSES, // contains address of function with this sig: void foo(void* , void* ); }; -using BarrierOperandValue = - std::variant *, uint8_t *, ObjRefProcessFunc, uint8_t, ObjTwoRefProcessFunc>; +using BarrierOperandValue = std::variant *, uint8_t *, ObjRefProcessFunc, + uint8_t, ObjTwoRefProcessFunc>; class BarrierOperand { public: diff --git a/runtime/include/managed_thread.h b/runtime/include/managed_thread.h index 486a614f9cd426326e532792259d53b9d68a38f8..dbcd3811207fb555c56eb20e4dfe2f21989a267a 100644 --- a/runtime/include/managed_thread.h +++ b/runtime/include/managed_thread.h @@ -375,7 +375,7 @@ public: } static constexpr uint32_t GetTlsConcurrentMarkingAddrOffset() { - return MEMBER_OFFSET(ManagedThread, concurrent_marking_addr_); + return MEMBER_OFFSET(ManagedThread, pre_wrb_addr_); } static constexpr uint32_t GetTlsStringClassPointerOffset() { diff --git a/runtime/include/thread.h b/runtime/include/thread.h index 27f3720dc71fe15187fd2079bdabbb9df4d75ee9..3554f68c51d12e6f4095aa1973bdd32242387ea1 100644 --- a/runtime/include/thread.h +++ b/runtime/include/thread.h @@ -348,11 +348,11 @@ protected: mem::TLAB *tlab_ {nullptr}; void *card_table_addr_ {nullptr}; void *card_table_min_addr_ {nullptr}; + void *pre_wrb_addr_ {nullptr}; // keeps IRtoC GC PostWrb impl for storing one object void *post_wrb_one_object_ {nullptr}; // keeps IRtoC GC PostWrb impl for storing two objects void *post_wrb_two_objects_ {nullptr}; - void *concurrent_marking_addr_ {nullptr}; void *string_class_ptr_ {nullptr}; PandaVector *pre_buff_ {nullptr}; void *language_extension_data_ {nullptr}; diff --git a/runtime/mem/gc/g1/g1-gc.cpp b/runtime/mem/gc/g1/g1-gc.cpp index d5867d110e3ce19af5a90bca13871401b81724a2..f2fd017308dab0e59ae77d3a9dbfa2a869d1278d 100644 --- a/runtime/mem/gc/g1/g1-gc.cpp +++ b/runtime/mem/gc/g1/g1-gc.cpp @@ -195,7 +195,7 @@ void G1GC::InitGCBits(panda::ObjectHeader *obj_header) // In this case GC may don't mark it (for example only vregs may contain reference to the new object) // and collect. To avoid such situations add objects to a special buffer which // will be processed at remark stage. - if (this->GetCardTable()->GetCardPtr(ToUintPtr(obj_header))->IsYoung() || !concurrent_marking_flag_) { + if (this->GetCardTable()->GetCardPtr(ToUintPtr(obj_header))->IsYoung() || pre_wrb_entrypoint_ == nullptr) { return; } os::memory::LockHolder lock(satb_and_newobj_buf_lock_); @@ -552,7 +552,8 @@ void G1GC::RunPhasesImpl(panda::GCTask &task) this->GetPandaVm()->GetMemStats()->RecordGCPauseStart(); // Check there is no concurrent mark running by another thread. // Atomic with relaxed order reason: concurrent access with another thread which can running GC now - ASSERT(!concurrent_marking_flag_.load(std::memory_order_relaxed)); + ASSERT(pre_wrb_entrypoint_.load(std::memory_order_relaxed) == nullptr); + WaitForUpdateRemsetThread(); if (NeedFullGC(task)) { task.collection_type = GCCollectionType::FULL; @@ -677,16 +678,18 @@ void G1GC::ScheduleMixedGCAndConcurrentMark(panda::GCTask &task) } else if (!interrupt_concurrent_flag_ && this->ShouldRunTenuredGC(task)) { ASSERT(collection_set_.empty()); // Init concurrent marking - concurrent_marking_flag_ = true; + auto addr = this->GetBarrierSet()->GetBarrierOperand(panda::mem::BarrierPosition::BARRIER_POSITION_PRE, + "STORE_IN_BUFF_TO_MARK_FUNC"); + pre_wrb_entrypoint_ = std::get(addr.GetValue()); } } template void G1GC::RunConcurrentMarkIfNeeded(panda::GCTask &task) { - if (concurrent_marking_flag_ && !interrupt_concurrent_flag_) { + if (pre_wrb_entrypoint_ != nullptr && !interrupt_concurrent_flag_) { StartMarking(task); - concurrent_marking_flag_ = false; + pre_wrb_entrypoint_ = nullptr; // interrupt_concurrent_flag_ may be set during concurrent marking. if (!interrupt_concurrent_flag_) { Remark(task); @@ -729,7 +732,7 @@ void G1GC::InitializeImpl() // TODO(dtrubenkov): initialize barriers auto barrier_set = allocator->New( - allocator, &concurrent_marking_flag_, &PreWrbFuncEntrypoint, &PostWrbUpdateCardFuncEntrypoint, + allocator, &pre_wrb_entrypoint_, &PreWrbFuncEntrypoint, &PostWrbUpdateCardFuncEntrypoint, panda::helpers::math::GetIntLog2(this->GetG1ObjectAllocator()->GetRegionSize()), this->GetCardTable(), updated_refs_queue_, &queue_lock_); ASSERT(barrier_set != nullptr); diff --git a/runtime/mem/gc/g1/g1-gc.h b/runtime/mem/gc/g1/g1-gc.h index 83992487adbbf47ccd0e8fdc10808b1cc98e0a50..861b217396c894dfe90aae8c8fd1a355a39633b5 100644 --- a/runtime/mem/gc/g1/g1-gc.h +++ b/runtime/mem/gc/g1/g1-gc.h @@ -330,7 +330,9 @@ private: void BuildCrossYoungRemSets(const Container &young); Marker marker_; - std::atomic concurrent_marking_flag_ {false}; //! flag indicates if we currently in concurrent marking phase + std::atomic pre_wrb_entrypoint_ { + nullptr}; //! if NOT nullptr, stores pointer to PreWrbFunc and indicates we are currently in concurrent + //! marking phase std::atomic interrupt_concurrent_flag_ {false}; //! flag indicates if we need to interrupt concurrent marking std::function post_queue_func_ {nullptr}; //! function called in the post WRB /** diff --git a/runtime/mem/gc/gc_barrier_set.cpp b/runtime/mem/gc/gc_barrier_set.cpp index 6d1db68075258980ab8d2f6509cc80e764f0ece7..b665e7ef4a47d6213a78b397134d681c25b1f015 100644 --- a/runtime/mem/gc/gc_barrier_set.cpp +++ b/runtime/mem/gc/gc_barrier_set.cpp @@ -128,13 +128,13 @@ void GCGenBarrierSet::PostBarrierEveryObjectFieldWrite(const void *obj_addr, [[m bool GCG1BarrierSet::IsPreBarrierEnabled() { // Atomic with relaxed order reason: no data race because G1GC sets this flag on pause - return concurrent_marking_flag_->load(std::memory_order_relaxed); + return pre_wrb_entrypoint_->load(std::memory_order_relaxed) != nullptr; } void GCG1BarrierSet::PreBarrier(void *pre_val_addr) { LOG_IF(pre_val_addr != nullptr, DEBUG, GC) << "GC PreBarrier: with pre-value " << pre_val_addr; - ASSERT(*concurrent_marking_flag_); + ASSERT(*pre_wrb_entrypoint_ != nullptr); PreSATBBarrier(reinterpret_cast(pre_val_addr)); } diff --git a/runtime/mem/gc/gc_barrier_set.h b/runtime/mem/gc/gc_barrier_set.h index ab2c5f3438dab478b0b29cf049d426629bcaaf67..05b1e808682499e3ccd1938be6958a139a8f3024 100644 --- a/runtime/mem/gc/gc_barrier_set.h +++ b/runtime/mem/gc/gc_barrier_set.h @@ -201,12 +201,12 @@ public: GCG1BarrierSet(mem::InternalAllocatorPtr allocator, // PRE ARGS: - std::atomic *concurrent_marking_flag, ObjRefProcessFunc pre_store_func, + std::atomic *pre_wrb_entrypoint, ObjRefProcessFunc pre_store_func, // POST ARGS: ObjTwoRefProcessFunc post_func, uint8_t region_size_bits_count, CardTable *card_table, ThreadLocalCardQueues *updated_refs_queue, os::memory::Mutex *queue_lock) : GCBarrierSet(allocator, BarrierType::PRE_SATB_BARRIER, BarrierType::POST_INTERREGION_BARRIER), - concurrent_marking_flag_(concurrent_marking_flag), + pre_wrb_entrypoint_(pre_wrb_entrypoint), pre_store_func_(pre_store_func), post_func_(post_func), region_size_bits_count_(region_size_bits_count), @@ -219,8 +219,8 @@ public: ASSERT(post_func_ != nullptr); // PRE AddBarrierOperand( - BarrierPosition::BARRIER_POSITION_PRE, "CONCURRENT_MARKING_ADDR", - BarrierOperand(BarrierOperandType::BOOL_ADDRESS, BarrierOperandValue(concurrent_marking_flag))); + BarrierPosition::BARRIER_POSITION_PRE, "PRE_WRITE_BARRIER_ADDR", + BarrierOperand(BarrierOperandType::PRE_WRITE_BARRIER_ADDRESS, BarrierOperandValue(pre_wrb_entrypoint_))); AddBarrierOperand( BarrierPosition::BARRIER_POSITION_PRE, "STORE_IN_BUFF_TO_MARK_FUNC", BarrierOperand(BarrierOperandType::FUNC_WITH_OBJ_REF_ADDRESS, BarrierOperandValue(pre_store_func_))); @@ -272,7 +272,7 @@ private: using PostFuncT = std::function *; // Store operands explicitly for interpreter perf // PRE BARRIER - std::atomic *concurrent_marking_flag_ {nullptr}; + std::atomic *pre_wrb_entrypoint_ {nullptr}; ObjRefProcessFunc pre_store_func_ {nullptr}; // POST BARRIER ObjTwoRefProcessFunc post_func_; //! function which is called for the post barrier if all conditions diff --git a/runtime/thread.cpp b/runtime/thread.cpp index 256b28c68cbd24f85ac891c22961c14861abf9d2..f76ccccf95d52160224c1a64af5fa8d0c33523b9 100644 --- a/runtime/thread.cpp +++ b/runtime/thread.cpp @@ -83,8 +83,8 @@ Thread::Thread(PandaVM *vm, ThreadType thread_type) InitCardTableData(barrier_set_); if (barrier_set_->GetPreType() != panda::mem::BarrierType::PRE_WRB_NONE) { auto addr = barrier_set_->GetBarrierOperand(panda::mem::BarrierPosition::BARRIER_POSITION_PRE, - "CONCURRENT_MARKING_ADDR"); - concurrent_marking_addr_ = std::get *>(addr.GetValue()); + "PRE_WRITE_BARRIER_ADDR"); + pre_wrb_addr_ = std::get *>(addr.GetValue()); } } } @@ -203,8 +203,8 @@ ManagedThread::ManagedThread(ThreadId id, mem::InternalAllocatorPtr allocator, P auto barrier_set = gc->GetBarrierSet(); if (barrier_set->GetPreType() != panda::mem::BarrierType::PRE_WRB_NONE) { auto addr = barrier_set->GetBarrierOperand(panda::mem::BarrierPosition::BARRIER_POSITION_PRE, - "CONCURRENT_MARKING_ADDR"); - concurrent_marking_addr_ = std::get *>(addr.GetValue()); + "PRE_WRITE_BARRIER_ADDR"); + pre_wrb_addr_ = std::get *>(addr.GetValue()); pre_buff_ = allocator->New>(); // need to initialize in constructor because we have barriers between constructor and InitBuffers in // InitializedClasses