diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h index 9fc9e8f102fa69e87d9e2b48661e9017cff11dff..a957c4a6dc0f345f035f15c891dcdcf6a35d977b 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h @@ -30,6 +30,7 @@ class AArch64GenProEpilog : public GenProEpilog { explicit AArch64GenProEpilog(CGFunc &func) : GenProEpilog(func) { useFP = func.UseFP(); stackBaseReg = useFP ? R29 : RSP; + callSitesMap.clear(); } ~AArch64GenProEpilog() override = default; @@ -41,7 +42,7 @@ class AArch64GenProEpilog : public GenProEpilog { BB &GenStackGuardCheckInsn(BB&); bool HasLoop(); bool OptimizeTailBB(BB &bb, std::set &callInsns); - void TailCallBBOpt(const BB &exitBB, std::set &callInsns); + void TailCallBBOpt(BB &bb, std::set &callInsns); bool InsertOpndRegs(Operand &opnd, std::set &vecRegs); bool InsertInsnRegs(Insn &insn, bool insetSource, std::set &vecSourceRegs, bool insertTarget, std::set &vecTargetRegs); @@ -70,13 +71,24 @@ class AArch64GenProEpilog : public GenProEpilog { void AppendJump(const MIRSymbol &func); void GenerateEpilog(BB&); void GenerateEpilogForCleanup(BB&); + void ConvertToTailCalls(std::set &callInsnsMap); Insn &CreateAndAppendInstructionForAllocateCallFrame(int64 argsToStkPassSize, AArch64reg reg0, AArch64reg reg1, RegType rty); Insn &AppendInstructionForAllocateOrDeallocateCallFrame(int64 argsToStkPassSize, AArch64reg reg0, AArch64reg reg1, RegType rty, bool isAllocate); + std::set &GetCallSitesMap() { + return callSitesMap; + } + void SetTailcallExitBB(BB *bb) { + tailcallExitBB = bb; + } + BB *GetTailcallExitBB() { + return tailcallExitBB; + } static constexpr const int32 kOffset8MemPos = 8; static constexpr const int32 kOffset16MemPos = 16; - + std::set callSitesMap; + BB* tailcallExitBB = nullptr; bool useFP = true; /* frame pointer(x29) is available as a general-purpose register if useFP is set as false */ AArch64reg stackBaseReg = RFP; diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_fixshortbranch.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_fixshortbranch.cpp index bcee73373bce633272b9deb6454630590777777d..a8c534c9eaeab8c069e80ac30c5a0911bd7949b5 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_fixshortbranch.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_fixshortbranch.cpp @@ -32,6 +32,9 @@ bool AArch64FixShortBranch::DistanceCheck(const BB &bb, LabelIdx targLabIdx, uin while (tInsn == nullptr || !tInsn->IsMachineInstruction()) { if (tInsn == nullptr) { tBB = tBB->GetNext(); + if (tBB == nullptr) { /* tailcallopt may make the target block empty */ + return true; + } tInsn = tBB->GetFirstInsn(); } else { tInsn = tInsn->GetNext(); diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp index af84671c3c42a1503d9c50770492436943b8c0a8..fb4b2c9d002ea9b3796e7ef3633cdb81694f3a2c 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_insn.cpp @@ -1155,6 +1155,9 @@ void AArch64Insn::Emit(const CG &cg, Emitter &emitter) const { EmitStringIndexOf(emitter); return; } + case MOP_pseudo_none: { + return; + } default: break; } diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp index 5608700ef183897b1cee9d9bda5da88acdfa89ef..699763a7ab99ce2bafc09328aee179da242f1950 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp @@ -88,9 +88,15 @@ bool AArch64GenProEpilog::HasLoop() { /* * Remove redundant mov and mark optimizable bl/blr insn in the BB. - * Return value: true if is empty bb, otherwise false. + * Return value: true to call this modified block again. */ bool AArch64GenProEpilog::OptimizeTailBB(BB &bb, std::set &callInsns) { + if (bb.NumInsn() == 1 && + (bb.GetLastInsn()->GetMachineOpcode() != MOP_xbr && + bb.GetLastInsn()->GetMachineOpcode() != MOP_xblr && + bb.GetLastInsn()->GetMachineOpcode() != MOP_xuncond)) { + return false; + } FOR_BB_INSNS_REV_SAFE(insn, &bb, prev_insn) { if (!insn->IsMachineInstruction()) { continue; @@ -111,9 +117,24 @@ bool AArch64GenProEpilog::OptimizeTailBB(BB &bb, std::set &callInsns) { bb.RemoveInsn(*insn); break; } - case MOP_xbl: case MOP_xblr: { - (void)callInsns.insert(insn); + if (insn->GetOperand(0).IsRegister()) { + RegOperand ® = static_cast(insn->GetOperand(0)); + if (AArch64Abi::IsCalleeSavedReg(static_cast(reg.GetRegisterNumber()))) { + return false; /* can't tailcall, register will be overwritten by restore */ + } + } + /* flow through */ + } + case MOP_xbl: { + callInsns.insert(insn); + return false; + } + case MOP_xuncond: { + LabelOperand &bLab = static_cast(insn->GetOperand(0)); + if (cgFunc.GetExitBB(0)->GetLabIdx() == bLab.GetLabelIndex()) { + break; + } return false; } default: @@ -124,10 +145,22 @@ bool AArch64GenProEpilog::OptimizeTailBB(BB &bb, std::set &callInsns) { return true; } -/* Recursively invoke this function until exitBB's precursor not exist. */ -void AArch64GenProEpilog::TailCallBBOpt(const BB &exitBB, std::set &callInsns) { - for (auto tmpBB : exitBB.GetPreds()) { - if (tmpBB->GetSuccs().size() != 1 || !tmpBB->GetEhSuccs().empty() || tmpBB->GetKind() != BB::kBBFallthru) { +/* Recursively invoke this function for all predecessors of exitBB */ +void AArch64GenProEpilog::TailCallBBOpt(BB &bb, std::set &callInsns) { + /* callsite also in the return block as in "if () return; else foo();" + call in the exit block */ + if (!bb.IsEmpty()) { + if (bb.GetLastInsn()->GetMachineOpcode() == MOP_xbl || bb.GetLastInsn()->GetMachineOpcode() == MOP_xblr) { + if (OptimizeTailBB(bb, callInsns)) { + TailCallBBOpt(bb, callInsns); + } + return; + } + } + + for (auto tmpBB : bb.GetPreds()) { + if (tmpBB->GetSuccs().size() != 1 || !tmpBB->GetEhSuccs().empty() || + (tmpBB->GetKind() != BB::kBBFallthru && tmpBB->GetKind() != BB::kBBGoto)) { continue; } @@ -170,6 +203,7 @@ bool AArch64GenProEpilog::TailCallOpt() { } size_t exitBBSize = cgFunc.GetExitBBsVec().size(); + /* For now to reduce complexity */ if (exitBBSize > 1) { return false; } @@ -185,64 +219,95 @@ bool AArch64GenProEpilog::TailCallOpt() { } else { exitBB = cgFunc.GetExitBBsVec().front(); } + SetTailcallExitBB(exitBB); + FOR_BB_INSNS(insn, exitBB) { if (insn->IsMachineInstruction() && !insn->IsPseudoInstruction()) { CHECK_FATAL(false, "exit bb should be empty."); } } - std::set callInsns; - TailCallBBOpt(*exitBB, callInsns); + std::set &callInsns = GetCallSitesMap(); + callInsns.clear(); + if (exitBBSize == 1) { + TailCallBBOpt(*exitBB, callInsns); + } else { + CHECK_FATAL(0, "No tailopt for multiple exit blocks"); + } + /* regular calls exist in function */ if (nCount != callInsns.size()) { return false; } - /* Replace all of the call insns. */ - for (auto callInsn : callInsns) { - MOperator insnMop = callInsn->GetMachineOpcode(); - switch (insnMop) { - case MOP_xbl: { - callInsn->SetMOP(MOP_tail_call_opt_xbl); - break; - } - case MOP_xblr: { - callInsn->SetMOP(MOP_tail_call_opt_xblr); - break; + return true; +} + +static +bool IsAddOrSubOp(MOperator mOp) { + switch (mOp) { + case MOP_xaddrrr: + case MOP_xaddrrrs: + case MOP_xxwaddrrre: + case MOP_xaddrri24: + case MOP_xaddrri12: + case MOP_xsubrrr: + case MOP_xsubrrrs: + case MOP_xxwsubrrre: + case MOP_xsubrri12: + return true; + default: + return false; + } +} + +/* tailcallopt cannot be used if stack address of this function is taken and passed, + not checking the passing for now, just taken */ +static +bool IsStackAddrTaken(CGFunc &cgFunc) { + FOR_ALL_BB(bb, &cgFunc) { + FOR_BB_INSNS_REV(insn, bb) { + if (IsAddOrSubOp(insn->GetMachineOpcode())) { + for (uint32 i = 0; i < insn->GetOperandSize(); i++) { + if (insn->GetOperand(i).IsRegister()) { + RegOperand ® = static_cast(insn->GetOperand(i)); + if (reg.GetRegisterNumber() == R29 || reg.GetRegisterNumber() == R31) { + return true; + } + } + } } - default: - ASSERT(false, "Internal error."); - break; } } - return true; + return false; } bool AArch64GenProEpilog::NeedProEpilog() { if (cgFunc.GetMirModule().GetSrcLang() != kSrcLangC) { return true; - } else if (static_cast(cgFunc.GetMemlayout())->GetSizeOfLocals() > 0 || - cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || cgFunc.HasVLAOrAlloca()) { + } else if (cgFunc.GetFunction().GetAttr(FUNCATTR_varargs) || cgFunc.HasVLAOrAlloca()) { return true; } - auto &aarchCGFunc = static_cast(cgFunc); - const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); - size_t calleeSavedRegSize = kTwoRegister; - CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?"); - if (regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() || - cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive)) { - return true; - } - if (cgFunc.GetCG()->DoPrologueEpilogue()) { - return !TailCallOpt(); + bool funcHasCalls = false; + if (cgFunc.GetCG()->DoPrologueEpilogue() && !IsStackAddrTaken(cgFunc)) { + funcHasCalls = !TailCallOpt(); // return value == "no call instr/only or 1 tailcall" } else { FOR_ALL_BB(bb, &cgFunc) { FOR_BB_INSNS_REV(insn, bb) { if (insn->IsCall()) { - return true; + funcHasCalls = true; } } } } + auto &aarchCGFunc = static_cast(cgFunc); + const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); + size_t calleeSavedRegSize = kTwoRegister; + CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?"); + if (funcHasCalls || regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() || + static_cast(cgFunc.GetMemlayout())->GetSizeOfLocals() > 0 || + cgFunc.GetFunction().GetAttr(FUNCATTR_callersensitive)) { + return true; + } return false; } @@ -1756,6 +1821,89 @@ void AArch64GenProEpilog::GenerateEpilogForCleanup(BB &bb) { } } + +void AArch64GenProEpilog::ConvertToTailCalls(std::set &callInsnsMap) { + BB *exitBB = GetTailcallExitBB(); + + /* ExitBB is filled only by now. If exitBB has restore of SP indicating extra stack space has + been allocated, such as a function call with more than 8 args, argument with large aggr etc */ + FOR_BB_INSNS(insn, exitBB) { + if (insn->GetMachineOpcode() == MOP_xaddrri12 || insn->GetMachineOpcode() == MOP_xaddrri24) { + RegOperand ® = static_cast(insn->GetOperand(0)); + if (reg.GetRegisterNumber() == RSP) { + return; + } + } + } + + /* Replace all of the call insns. */ + for (Insn *callInsn : callInsnsMap) { + MOperator insnMop = callInsn->GetMachineOpcode(); + switch (insnMop) { + case MOP_xbl: { + callInsn->SetMOP(MOP_tail_call_opt_xbl); + break; + } + case MOP_xblr: { + callInsn->SetMOP(MOP_tail_call_opt_xblr); + break; + } + default: + CHECK_FATAL(false, "Internal error."); + break; + } + BB *bb = callInsn->GetBB(); + if (bb->GetKind() == BB::kBBGoto) { + bb->SetKind(BB::kBBFallthru); + if (bb->GetLastInsn()->GetMachineOpcode() == MOP_xuncond) { + bb->RemoveInsn(*bb->GetLastInsn()); + } + } + for (auto sBB: bb->GetSuccs()) { + bb->RemoveSuccs(*sBB); + sBB->RemovePreds(*bb); + break; + } + } + + /* copy instrs from exit block */ + for (Insn *callInsn: callInsnsMap) { + BB *toBB = callInsn->GetBB(); + BB *fromBB = exitBB; + if (toBB == fromBB) { + /* callsite also in the return exit block, just change the return to branch */ + Insn *lastInsn = toBB->GetLastInsn(); + if (lastInsn->GetMachineOpcode() == MOP_xret) { + Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*callInsn); + toBB->ReplaceInsn(*lastInsn, *newInsn); + toBB->RemoveInsn(*callInsn); + return; + } + CHECK_FATAL(0, "Tailcall in incorrect block"); + } + FOR_BB_INSNS_SAFE(insn, fromBB, next) { + if (insn->IsMachineInstruction() && insn->GetMachineOpcode() != MOP_xret) { + Insn *newInsn = cgFunc.GetTheCFG()->CloneInsn(*insn); + newInsn->SetDoNotRemove(true); + toBB->InsertInsnBefore(*callInsn, *newInsn); + } + } + } + + /* remove instrs in exit block */ + BB *bb = exitBB; + if (bb->GetPreds().size() > 0) { + return; /* exit block still needed by other non-tailcall blocks */ + } + Insn &junk = cgFunc.GetCG()->BuildInstruction(MOP_pseudo_none); + bb->AppendInsn(junk); + FOR_BB_INSNS_SAFE(insn, bb, next) { + if (insn->GetMachineOpcode() != MOP_pseudo_none) { + bb->RemoveInsn(*insn); + } + } +} + void AArch64GenProEpilog::Run() { CHECK_FATAL(cgFunc.GetFunction().GetBody()->GetFirst()->GetOpCode() == OP_label, "The first statement should be a label"); @@ -1793,5 +1941,10 @@ void AArch64GenProEpilog::Run() { if (cgFunc.GetFunction().IsJava()) { GenerateEpilogForCleanup(*(cgFunc.GetCleanupBB())); } + + std::set &callInsnsMap = GetCallSitesMap(); + if (cgFunc.GetMirModule().IsCModule() && !callInsnsMap.empty()) { + ConvertToTailCalls(callInsnsMap); + } } } /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/cfgo.cpp b/src/mapleall/maple_be/src/cg/cfgo.cpp index 2dad1daf98f341feeaa03640f386fd845e921d36..b1e0e764e53db7193bbe7a38f6bc0c5fe61e4273 100644 --- a/src/mapleall/maple_be/src/cg/cfgo.cpp +++ b/src/mapleall/maple_be/src/cg/cfgo.cpp @@ -233,6 +233,11 @@ bool ChainingPattern::Optimize(BB &curBB) { } if (curBB.GetKind() == BB::kBBGoto && !curBB.IsEmpty()) { + Insn* last = curBB.GetLastInsn(); + if (last->GetMachineOpcode() == MOP_tail_call_opt_xbl || last->GetMachineOpcode() == MOP_tail_call_opt_xblr) { + return false; + } + BB *sucBB = cgFunc->GetTheCFG()->GetTargetSuc(curBB); /* * BB2 can be merged into BB1, if diff --git a/src/mapleall/maple_be/src/cg/cgbb.cpp b/src/mapleall/maple_be/src/cg/cgbb.cpp index fe542baadf04bfec86b823de972b87fe0ca50d89..c8c0d42c70de93e9532cd42b6983b8a326c59b2a 100644 --- a/src/mapleall/maple_be/src/cg/cgbb.cpp +++ b/src/mapleall/maple_be/src/cg/cgbb.cpp @@ -183,7 +183,7 @@ void BB::InsertAtBeginning(BB &bb) { int32 BB::NumInsn() const { int32 bbSize = 0; FOR_BB_INSNS_CONST(i, this) { - if (i->IsImmaterialInsn()) { + if (i->IsImmaterialInsn() || i->IsDbgInsn()) { continue; } ++bbSize;