diff --git a/src/mapleall/maple_be/BUILD.gn b/src/mapleall/maple_be/BUILD.gn index d26085e3921c56046e89a2137c24e464b8ea0636..cf3799fdfb08fc16299d2a2de90d734784e0d1e8 100644 --- a/src/mapleall/maple_be/BUILD.gn +++ b/src/mapleall/maple_be/BUILD.gn @@ -108,6 +108,7 @@ src_libcgaarch64 = [ "src/cg/aarch64/aarch64_lsra.cpp", "src/cg/aarch64/aarch64_ra_opt.cpp", "src/cg/aarch64/aarch64_alignment.cpp", + "src/cg/aarch64/aarch64_regsaves.cpp", ] src_libcgriscv64 = [ @@ -186,6 +187,7 @@ src_libcg = [ "src/cg/alignment.cpp", "src/cg/cg_ssu_pre.cpp", "src/cg/cg_ssa_pre.cpp", + "src/cg/regsaves.cpp", ] cflags_cc -= [ "-DRC_NO_MMAP" ] diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h index 5f151e98e53019c2a38db4233688c6b243aa0c93..30289b5199479dc12d8a361f3515e80284cf6e27 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h @@ -30,6 +30,7 @@ class AArch64CGFunc : public CGFunc { MemPool &memPool, StackMemPool &stackMp, MapleAllocator &mallocator, uint32 funcId) : CGFunc(mod, c, f, b, memPool, stackMp, mallocator, funcId), calleeSavedRegs(mallocator.Adapter()), + proEpilogSavedRegs(mallocator.Adapter()), formalRegList(mallocator.Adapter()), phyRegOperandTable(mallocator.Adapter()), hashLabelOpndTable(mallocator.Adapter()), @@ -624,6 +625,10 @@ class AArch64CGFunc : public CGFunc { MIRPreg *GetPseudoRegFromVirtualRegNO(const regno_t vRegNO, bool afterSSA = false) const; + MapleVector &GetProEpilogSavedRegs() { + return proEpilogSavedRegs; + } + private: enum RelationOperator : uint8 { kAND, @@ -646,6 +651,7 @@ class AArch64CGFunc : public CGFunc { using MovkLslOperandArray = std::array; MapleVector calleeSavedRegs; + MapleVector proEpilogSavedRegs; MapleVector formalRegList; /* store the parameters register used by this function */ uint32 refCount = 0; /* Ref count number. 0 if function don't have "bl MCC_InitializeLocalStackRef" */ int32 beginOffset = 0; /* Begin offset based x29. */ diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h index 4fa72c3caf652df1179ecef3f3be9ce575c9693d..40bd31e0a994a6eb777ed906d28d38c363120396 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h @@ -36,6 +36,12 @@ class AArch64GenProEpilog : public GenProEpilog { bool TailCallOpt() override; bool NeedProEpilog() override; + static AArch64MemOperand *SplitStpLdpOffsetForCalleeSavedWithAddInstruction( + CGFunc &cgFunc, const AArch64MemOperand &mo, uint32 bitLen, AArch64reg baseReg = AArch64reg::kRinvalid); + static void AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); + static void AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int offset); + static void AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int offset); + static void AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); void Run() override; private: void GenStackGuard(BB&); @@ -50,10 +56,6 @@ class AArch64GenProEpilog : public GenProEpilog { bool BackwardFindDependency(BB &ifbb, std::set &vecReturnSourceReg, std::list &existingInsns, std::list &moveInsns); BB *IsolateFastPath(BB&); - AArch64MemOperand *SplitStpLdpOffsetForCalleeSavedWithAddInstruction(const AArch64MemOperand &mo, uint32 bitLen, - AArch64reg baseReg = AArch64reg::kRinvalid); - void AppendInstructionPushPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); - void AppendInstructionPushSingle(AArch64reg reg, RegType rty, int offset); void AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty); void AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty); void GeneratePushRegs(); @@ -63,8 +65,6 @@ class AArch64GenProEpilog : public GenProEpilog { void GenerateRet(BB &bb); bool TestPredsOfRetBB(const BB &exitBB); - void AppendInstructionPopSingle(AArch64reg reg, RegType rty, int offset); - void AppendInstructionPopPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); void AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty); void AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty); void GeneratePopRegs(); diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_regsaves.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_regsaves.h new file mode 100644 index 0000000000000000000000000000000000000000..ef5271771a9aa7d7ee5d9bf9c43ae83b8c0d32f5 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_regsaves.h @@ -0,0 +1,239 @@ +/* + * Copyright (c) [2022] Futurewei Technologies Co., Ltd. All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan Permissive Software License v2. + * You can use this software according to the terms and conditions of the MulanPSL - 2.0. + * You may obtain a copy of MulanPSL - 2.0 at: + * + * https://opensource.org/licenses/MulanPSL-2.0 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the MulanPSL - 2.0 for more details. + */ + +#ifndef MAPLEBE_INCLUDE_CG_AARCH64REGSAVESOPT_H +#define MAPLEBE_INCLUDE_CG_AARCH64REGSAVESOPT_H + +#include "cg.h" +#include "regsaves.h" +#include "aarch64_cg.h" +#include "aarch64_insn.h" +#include "aarch64_operand.h" + +namespace maplebe { + +/* Saved reg info. This class is created to avoid the complexity of + nested Maple Containers */ +class SavedRegInfo { + public: + bool insertAtLastMinusOne = false; + explicit SavedRegInfo(MapleAllocator &alloc) + : saveSet(alloc.Adapter()), + restoreEntrySet(alloc.Adapter()), + restoreExitSet(alloc.Adapter()) {} + + bool ContainSaveReg(regno_t r) { + if (saveSet.find(r) != saveSet.end()) { + return true; + } + return false; + } + + bool ContainEntryReg(regno_t r) { + if (restoreEntrySet.find(r) != restoreEntrySet.end()) { + return true; + } + return false; + } + + bool ContainExitReg(regno_t r) { + if (restoreExitSet.find(r) != restoreExitSet.end()) { + return true; + } + return false; + } + + void InsertSaveReg(regno_t r) { + saveSet.insert(r); + } + + void RemoveSaveReg(regno_t r) { + saveSet.erase(r); + } + + void InsertEntryReg(regno_t r) { + restoreEntrySet.insert(r); + } + + void InsertExitReg(regno_t r) { + restoreExitSet.insert(r); + } + + MapleSet &GetSaveSet() { + return saveSet; + } + + MapleSet &GetEntrySet() { + return restoreEntrySet; + } + + MapleSet &GetExitSet() { + return restoreExitSet; + } + + private: + MapleSet saveSet; + MapleSet restoreEntrySet; + MapleSet restoreExitSet; +}; + +class SavedBBInfo { + public: + explicit SavedBBInfo(MapleAllocator &alloc) + : bbList (alloc.Adapter()) {} + + MapleSet &GetBBList() { + return bbList; + } + + void InsertBB(BB *bb) { + bbList.insert(bb); + } + + void RemoveBB(BB *bb) { + bbList.erase(bb); + } + + private: + MapleSet bbList; +}; + +class AArch64RegSavesOpt : public RegSavesOpt { + public: + AArch64RegSavesOpt(CGFunc &func, MemPool &pool, DomAnalysis &dom, PostDomAnalysis &pdom) : + RegSavesOpt(func, pool), + domInfo(&dom), + pDomInfo(&pdom), + bbSavedRegs(alloc.Adapter()), + regSavedBBs(alloc.Adapter()), + regOffset(alloc.Adapter()) { + bbSavedRegs.resize(func.NumBBs()); + regSavedBBs.resize(sizeof(CalleeBitsType)<<3); + for (int i = 0; i < bbSavedRegs.size(); i++) { + bbSavedRegs[i] = nullptr; + } + for (int i = 0; i < regSavedBBs.size(); i++) { + regSavedBBs[i] = nullptr; + } + } + ~AArch64RegSavesOpt() override = default; + + typedef uint64 CalleeBitsType; + + void InitData(); + void CollectLiveInfo(BB &bb, const Operand &opnd, bool isDef, bool isUse); + void GenerateReturnBBDefUse(BB &bb); + void ProcessCallInsnParam(BB &bb); + void ProcessAsmListOpnd(BB &bb, Operand &opnd, uint32 idx); + void ProcessListOpnd(BB &bb, Operand &opnd); + void ProcessMemOpnd(BB &bb, Operand &opnd); + void ProcessCondOpnd(BB &bb); + void GetLocalDefUse(); + void PrintBBs() const; + int CheckCriteria(BB *bb, regno_t reg) const; + bool AlreadySavedInDominatorList(BB *bb, regno_t reg) const; + void DetermineCalleeSaveLocationsDoms(); + void DetermineCalleeSaveLocationsPre(); + void DetermineCalleeRestoreLocations(); + int32 FindNextOffsetForCalleeSave(); + void InsertCalleeSaveCode(); + void InsertCalleeRestoreCode(); + void Verify(regno_t reg, BB* bb, std::set *visited, uint32 *s, uint32 *r); + void Run() override; + + DomAnalysis *GetDomInfo() const { + return domInfo; + } + + PostDomAnalysis *GetPostDomInfo() const { + return pDomInfo; + } + + Bfs *GetBfs() const { + return bfs; + } + + CalleeBitsType *GetCalleeBitsDef() { + return calleeBitsDef; + } + + CalleeBitsType *GetCalleeBitsUse() { + return calleeBitsUse; + } + + CalleeBitsType GetBBCalleeBits(CalleeBitsType *data, uint32 bid) const { + return data[bid]; + } + + void SetCalleeBit(CalleeBitsType *data, uint32 bid, regno_t reg) { + CalleeBitsType mask = 1ULL << RegBitMap(reg); + if ((GetBBCalleeBits(data, bid) & mask) == 0) { + data[bid] = GetBBCalleeBits(data, bid) | mask; + } + } + + void ResetCalleeBit(CalleeBitsType * data, uint32 bid, regno_t reg) { + CalleeBitsType mask = 1ULL << RegBitMap(reg); + data[bid] = GetBBCalleeBits(data, bid) & ~mask; + } + + bool IsCalleeBitSet(CalleeBitsType * data, uint32 bid, regno_t reg) { + CalleeBitsType mask = 1ULL << RegBitMap(reg); + return GetBBCalleeBits(data, bid) & mask; + } + + /* AArch64 specific callee-save registers bit positions + 0 9 10 33 -- position + R19 .. R28 V8 .. V15 V16 .. V31 -- regs */ + uint32 RegBitMap(regno_t reg) { + uint32 r; + if (reg <= R28) { + r = (reg - R19); + } else { + r = (R28 - R19 + 1) + (reg - V8); + } + return r; + } + + regno_t ReverseRegBitMap(uint32 reg) { + if (reg < 10) { + return static_cast(R19 + reg); + } else { + return static_cast(V8 + (reg - R28 - R19 - 1)); + } + } + + SavedRegInfo *GetbbSavedRegsEntry(uint32 bid) { + if (bbSavedRegs[bid] == nullptr) { + bbSavedRegs[bid] = memPool->New(alloc); + } + return bbSavedRegs[bid]; + } + + private: + DomAnalysis *domInfo; + PostDomAnalysis *pDomInfo; + Bfs *bfs = nullptr; + CalleeBitsType *calleeBitsDef = nullptr; + CalleeBitsType *calleeBitsUse = nullptr; + MapleVector bbSavedRegs; /* set of regs to be saved in a BB */ + MapleVector regSavedBBs; /* set of BBs to be saved for a reg */ + MapleMap regOffset; /* save offset of each register */ + bool oneAtaTime = false; + regno_t oneAtaTimeReg = 0; +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64REGSAVESOPT_H */ diff --git a/src/mapleall/maple_be/include/cg/cg_option.h b/src/mapleall/maple_be/include/cg/cg_option.h index ddf04a69f732ba7c790f938ef5654dffc8c5f666..9b253d3245c1767ba0587340a66a1852cd1b8e59 100644 --- a/src/mapleall/maple_be/include/cg/cg_option.h +++ b/src/mapleall/maple_be/include/cg/cg_option.h @@ -641,6 +641,41 @@ class CGOptions : public MapleDriverOptionBase { return doCFGO; } + static void EnableRegSavesOpt() { + doRegSavesOpt = true; + } + + static void DisableRegSavesOpt() { + doRegSavesOpt = false; + } + + static bool DoRegSavesOpt() { + return doRegSavesOpt; + } + + static void EnableSsaPreSave() { + useSsaPreSave = true; + } + + static void DisableSsaPreSave() { + useSsaPreSave = false; + } + + static bool UseSsaPreSave() { + return useSsaPreSave; + } + static void EnableSsuPreRestore() { + useSsuPreRestore = true; + } + + static void DisableSsuPreRestore() { + useSsuPreRestore = false; + } + + static bool UseSsuPreRestore() { + return useSsuPreRestore; + } + static void EnableICO() { doICO = true; } @@ -1196,6 +1231,9 @@ class CGOptions : public MapleDriverOptionBase { static bool doSchedule; static bool doAlignAnalysis; static bool doWriteRefFieldOpt; + static bool doRegSavesOpt; + static bool useSsaPreSave; + static bool useSsuPreRestore; static bool dumpOptimizeCommonLog; static bool checkArrayStore; static bool exclusiveEH; diff --git a/src/mapleall/maple_be/include/cg/cgbb.h b/src/mapleall/maple_be/include/cg/cgbb.h index 576b3946d40ecc7d5932c41057ded896b57aaa8c..060f0aab59772d3a862df44c1347c2814499d536 100644 --- a/src/mapleall/maple_be/include/cg/cgbb.h +++ b/src/mapleall/maple_be/include/cg/cgbb.h @@ -199,6 +199,8 @@ class BB { /* append all insns from bb into this bb */ void InsertAtBeginning(BB &bb); + void InsertAtEnd(BB &bb); + void InsertAtEndMinus1(BB &bb); /* clear BB but don't remove insns of this */ void ClearInsns() { diff --git a/src/mapleall/maple_be/include/cg/regsaves.h b/src/mapleall/maple_be/include/cg/regsaves.h new file mode 100644 index 0000000000000000000000000000000000000000..29539c574d7420569f45746b25d5609095710389 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/regsaves.h @@ -0,0 +1,64 @@ +/* + * Copyright (c) [2022] Futurewei Technologies Co., Ltd. All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan Permissive Software License v2. + * You can use this software according to the terms and conditions of the MulanPSL - 2.0. + * You may obtain a copy of MulanPSL - 2.0 at: + * + * https://opensource.org/licenses/MulanPSL-2.0 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the MulanPSL - 2.0 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_REGSAVES_OPT_H +#define MAPLEBE_INCLUDE_CG_REGSAVES_OPT_H + +#include "cgfunc.h" +#include "cg_phase.h" + +namespace maplebe { +class RegSavesOpt { + public: + RegSavesOpt(CGFunc &func, MemPool &pool) + : cgFunc(&func), + memPool(&pool), + alloc(&pool) {} + + virtual ~RegSavesOpt() = default; + + virtual void Run() {} + + std::string PhaseName() const { + return "regsavesopt"; + } + + CGFunc *GetCGFunc() const { + return cgFunc; + } + + MemPool *GetMemPool() const { + return memPool; + } + + bool GetEnabledDebug() const { + return enabledDebug; + } + + void SetEnabledDebug(bool d) { + enabledDebug = d; + } + + protected: + CGFunc *cgFunc; + MemPool *memPool; + MapleAllocator alloc; + bool enabledDebug = false; +}; + +MAPLE_FUNC_PHASE_DECLARE_BEGIN(CgRegSavesOpt, maplebe::CGFunc) +MAPLE_FUNC_PHASE_DECLARE_END +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_REGSAVES_OPT_H */ diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp index 5f1cc682670628a2e9579c371a2c5ff17739775c..f77d24ed5b4ac28ef77759ca53e60b5bed286ded 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp @@ -293,7 +293,9 @@ bool AArch64GenProEpilog::NeedProEpilog() { } } auto &aarchCGFunc = static_cast(cgFunc); - const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToRestore = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); size_t calleeSavedRegSize = kTwoRegister; CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?"); if (funcHasCalls || regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() || @@ -822,9 +824,10 @@ BB *AArch64GenProEpilog::IsolateFastPath(BB &bb) { return coldBB; } -AArch64MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(const AArch64MemOperand &mo, - uint32 bitLen, - AArch64reg baseRegNum) { +AArch64MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, + const AArch64MemOperand &mo, + uint32 bitLen, + AArch64reg baseRegNum) { auto &aarchCGFunc = static_cast(cgFunc); CHECK_FATAL(mo.GetAddrMode() == AArch64MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi"); AArch64OfstOperand *ofstOp = mo.GetOffsetImmediate(); @@ -846,7 +849,8 @@ AArch64MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddIn return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, offsetVal); } -void AArch64GenProEpilog::AppendInstructionPushPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, + AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; @@ -857,7 +861,7 @@ void AArch64GenProEpilog::AppendInstructionPushPair(AArch64reg reg0, AArch64reg uint32 dataSize = kSizeOfPtr * kBitsPerByte; CHECK_FATAL(offset >= 0, "offset must >= 0"); if (offset > kStpLdpImm64UpperBound) { - o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(*static_cast(o2), dataSize, R16); + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast(o2), dataSize, R16); } Insn &pushInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); std::string comment = "SAVE CALLEE REGISTER PAIR"; @@ -876,7 +880,8 @@ void AArch64GenProEpilog::AppendInstructionPushPair(AArch64reg reg0, AArch64reg } } -void AArch64GenProEpilog::AppendInstructionPushSingle(AArch64reg reg, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, + AArch64reg reg, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle]; @@ -1164,7 +1169,9 @@ void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg void AArch64GenProEpilog::GeneratePushRegs() { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); - const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToSave = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?"); @@ -1242,19 +1249,19 @@ void AArch64GenProEpilog::GeneratePushRegs() { /* remember it */ firstHalf = reg; } else { - AppendInstructionPushPair(firstHalf, reg, regType, offset); + AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset); GetNextOffsetCalleeSaved(offset); firstHalf = kRinvalid; } } if (intRegFirstHalf != kRinvalid) { - AppendInstructionPushSingle(intRegFirstHalf, kRegTyInt, offset); + AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset); GetNextOffsetCalleeSaved(offset); } if (fpRegFirstHalf != kRinvalid) { - AppendInstructionPushSingle(fpRegFirstHalf, kRegTyFloat, offset); + AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset); GetNextOffsetCalleeSaved(offset); } @@ -1382,7 +1389,9 @@ void AArch64GenProEpilog::GenerateProlog(BB &bb) { } } - const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToSave = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); if (!regsToSave.empty()) { /* * Among other things, push the FP & LR pair. @@ -1477,7 +1486,7 @@ bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB) { return true; } -void AArch64GenProEpilog::AppendInstructionPopSingle(AArch64reg reg, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle]; @@ -1499,7 +1508,7 @@ void AArch64GenProEpilog::AppendInstructionPopSingle(AArch64reg reg, RegType rty } } -void AArch64GenProEpilog::AppendInstructionPopPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; @@ -1510,7 +1519,7 @@ void AArch64GenProEpilog::AppendInstructionPopPair(AArch64reg reg0, AArch64reg r uint32 dataSize = kSizeOfPtr * kBitsPerByte; CHECK_FATAL(offset >= 0, "offset must >= 0"); if (offset > kStpLdpImm64UpperBound) { - o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(*static_cast(o2), dataSize, R16); + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast(o2), dataSize, R16); } Insn &popInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); popInsn.SetComment("RESTORE RESTORE"); @@ -1657,7 +1666,10 @@ void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg r void AArch64GenProEpilog::GeneratePopRegs() { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); - const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); + + const MapleVector ®sToRestore = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?"); @@ -1709,19 +1721,19 @@ void AArch64GenProEpilog::GeneratePopRegs() { firstHalf = reg; } else { /* flush the pair */ - AppendInstructionPopPair(firstHalf, reg, regType, offset); + AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset); GetNextOffsetCalleeSaved(offset); firstHalf = kRinvalid; } } if (intRegFirstHalf != kRinvalid) { - AppendInstructionPopSingle(intRegFirstHalf, kRegTyInt, offset); + AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset); GetNextOffsetCalleeSaved(offset); } if (fpRegFirstHalf != kRinvalid) { - AppendInstructionPopSingle(fpRegFirstHalf, kRegTyFloat, offset); + AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset); GetNextOffsetCalleeSaved(offset); } @@ -1792,7 +1804,9 @@ void AArch64GenProEpilog::GenerateEpilog(BB &bb) { } } - const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToSave = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); if (!regsToSave.empty()) { GeneratePopRegs(); } else { diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp new file mode 100644 index 0000000000000000000000000000000000000000..2f444be81e1754d29ce6fe31e665a34ff79947bf --- /dev/null +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp @@ -0,0 +1,833 @@ +/* + * Copyright (c) [2022] Futurewei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "aarch64_regsaves.h" +#include "aarch64_cg.h" +#include "aarch64_live.h" +#include "aarch64_cg.h" +#include "aarch64_proepilog.h" +#include "cg_dominance.h" +#include "cg_ssa_pre.h" +#include "cg_ssu_pre.h" + +namespace maplebe { + +#define RS_DUMP GetEnabledDebug() +#define RS_EXTRA (RS_DUMP && true) +#define mLog LogInfo::MapleLogger() +#define threshold 8 + +typedef uint32 BBId; + +void AArch64RegSavesOpt::InitData() { + calleeBitsDef = cgFunc->GetMemoryPool()->NewArray(cgFunc->NumBBs()); + errno_t retDef = memset_s(calleeBitsDef, cgFunc->NumBBs() * sizeof(CalleeBitsType), + 0, cgFunc->NumBBs() * sizeof(CalleeBitsType)); + calleeBitsUse = cgFunc->GetMemoryPool()->NewArray(cgFunc->NumBBs()); + errno_t retUse = memset_s(calleeBitsUse, cgFunc->NumBBs() * sizeof(CalleeBitsType), + 0, cgFunc->NumBBs() * sizeof(CalleeBitsType)); + CHECK_FATAL(retDef == EOK && retUse == EOK, "memset_s of calleesBits failed"); + + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + const MapleVector &sp = aarchCGFunc->GetCalleeSavedRegs(); + if (!sp.empty()) { + if (std::find(sp.begin(), sp.end(), RFP) != sp.end()) { + aarchCGFunc->GetProEpilogSavedRegs().push_back(RFP); + } + if (std::find(sp.begin(), sp.end(), RLR) != sp.end()) { + aarchCGFunc->GetProEpilogSavedRegs().push_back(RLR); + } + } +} + + +void AArch64RegSavesOpt::CollectLiveInfo(BB &bb, const Operand &opnd, bool isDef, bool isUse) { + if (!opnd.IsRegister()) { + return; + } + const RegOperand ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + if (!AArch64Abi::IsCalleeSavedReg(static_cast(regNO)) || + (regNO >= R29 && regNO <= R31)) { + return; /* check only callee-save registers */ + } + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyVary) { + return; + } + if (isDef) { + /* First def */ + if (!IsCalleeBitSet(GetCalleeBitsDef(), bb.GetId(), regNO)) { + SetCalleeBit(GetCalleeBitsDef(), bb.GetId(), regNO); + } + } + if (isUse) { + /* Last use */ + SetCalleeBit(GetCalleeBitsUse(), bb.GetId(), regNO); + } +} + +void AArch64RegSavesOpt::GenerateReturnBBDefUse(BB &bb) { + PrimType returnType = cgFunc->GetFunction().GetReturnType()->GetPrimType(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + if (IsPrimitiveFloat(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(V0), k64BitSize, kRegTyFloat); + CollectLiveInfo(bb, phyOpnd, false, true); + } else if (IsPrimitiveInteger(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R0), k64BitSize, kRegTyInt); + CollectLiveInfo(bb, phyOpnd, false, true); + } +} + +void AArch64RegSavesOpt::ProcessAsmListOpnd(BB &bb, Operand &opnd, uint32 idx) { + bool isDef = false; + bool isUse = false; + switch (idx) { + case kAsmOutputListOpnd: + case kAsmClobberListOpnd: { + isDef = true; + break; + } + case kAsmInputListOpnd: { + isUse = true; + break; + } + default: + return; + } + ListOperand &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + CollectLiveInfo(bb, *op, isDef, isUse); + } +} + +void AArch64RegSavesOpt::ProcessListOpnd(BB &bb, Operand &opnd) { + ListOperand &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + CollectLiveInfo(bb, *op, false, true); + } +} + +void AArch64RegSavesOpt::ProcessMemOpnd(BB &bb, Operand &opnd) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr) { + CollectLiveInfo(bb, *base, !memOpnd.IsIntactIndexed(), true); + } + if (offset != nullptr) { + CollectLiveInfo(bb, *offset, false, true); + } +} + +void AArch64RegSavesOpt::ProcessCondOpnd(BB &bb) { + Operand &rflag = cgFunc->GetOrCreateRflag(); + CollectLiveInfo(bb, rflag, false, true); +} + +/* Record in each local BB the 1st def and the last use of a callee-saved + register */ +void AArch64RegSavesOpt::GetLocalDefUse() { + for (auto bbp : bfs->sortedBBs) { + BB &bb = *bbp; + if (bb.GetKind() == BB::kBBReturn) { + GenerateReturnBBDefUse(bb); + } + if (bb.IsEmpty()) { + continue; + } + + FOR_BB_INSNS(insn, &bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + + bool isAsm = (insn->GetMachineOpcode() == MOP_asm); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isDef = regProp->IsRegDef(); + bool isUse = regProp->IsRegUse(); + if (opnd.IsList()) { + if (isAsm) { + ProcessAsmListOpnd(bb, opnd, i); + } else { + ProcessListOpnd(bb, opnd); + } + } else if (opnd.IsMemoryAccessOperand()) { + ProcessMemOpnd(bb, opnd); + } else if (opnd.IsConditionCode()) { + ProcessCondOpnd(bb); + } else { + CollectLiveInfo(bb, opnd, isDef, isUse); + } + } /* for all operands */ + } /* for all insns */ + } /* for all sortedBBs */ + + if (RS_DUMP) { + for (int i=0; iNumBBs(); i++) { + mLog << i << " : " << calleeBitsDef[i] << " " << calleeBitsUse[i] << "\n";; + } + } +} + +void AArch64RegSavesOpt::PrintBBs() const { + mLog << "RegSaves LiveIn/Out of BFS nodes:\n"; + for (auto *bb : bfs->sortedBBs) { + mLog << "< === > "; + mLog << bb->GetId(); + mLog << " pred:["; + for (auto *predBB : bb->GetPreds()) { + mLog << " " << predBB->GetId(); + } + mLog << "] succs:["; + for (auto *succBB : bb->GetSuccs()) { + mLog << " " << succBB->GetId(); + } + mLog << "]\n LiveIn of [" << bb->GetId() << "]: "; + for (auto liveIn: bb->GetLiveInRegNO()) { + mLog << liveIn << " "; + } + mLog << "\n LiveOut of [" << bb->GetId() << "]: "; + for (auto liveOut: bb->GetLiveOutRegNO()) { + mLog << liveOut << " "; + } + mLog << "\n"; + } +} + +/* 1st def MUST not have preceding save in dominator list. Each dominator + block must not have livein or liveout of the register */ +int32 AArch64RegSavesOpt::CheckCriteria(BB *bb, regno_t reg) const { + /* Already a site to save */ + SavedRegInfo *sp = bbSavedRegs[bb->GetId()]; + if (sp != nullptr && sp->ContainSaveReg(reg)) { + return 1; + } + + /* This preceding block has livein OR liveout of reg */ + MapleSet &liveIn = bb->GetLiveInRegNO(); + MapleSet &liveOut = bb->GetLiveOutRegNO(); + if (liveIn.find(reg) != liveIn.end() || + liveOut.find(reg) != liveOut.end()) { + return 2; + } + + return 0; +} + +/* Return true if reg is already to be saved in its dominator list */ +bool AArch64RegSavesOpt::AlreadySavedInDominatorList(BB *bb, regno_t reg) const { + BB *aBB = GetDomInfo()->GetDom(bb->GetId()); + + if (RS_DUMP) { + mLog << "Checking dom list starting " << bb->GetId() << " for saved R" << reg-1 << ":\n "; + } + while (!aBB->GetPreds().empty()) { /* can't go beyond prolog */ + if (RS_DUMP) { + mLog << aBB->GetId() << " "; + } + if (int t = CheckCriteria(aBB, reg)) { + if (RS_DUMP) { + if (t == 1) { + mLog << " --R" << reg-1 << " saved here, skip!\n"; + } else { + mLog << " --R" << reg-1 << " has livein/out, skip!\n"; + } + } + return true; /* previously saved, inspect next reg */ + } + aBB = GetDomInfo()->GetDom(aBB->GetId()); + } + return false; /* not previously saved, to save at bb */ +} + +/* Determine callee-save regs save locations and record them in bbSavedRegs. + Save is needed for a 1st def callee-save register at its dominator block + outside any loop. */ +void AArch64RegSavesOpt::DetermineCalleeSaveLocationsDoms() { + if (RS_DUMP) { + mLog << "Determining regsave sites using dom list for " << cgFunc->GetName() << ":\n"; + } + for (auto *bb : bfs->sortedBBs) { + if (RS_DUMP) { + mLog << "BB: " << bb->GetId() << "\n"; + } + CalleeBitsType c = GetBBCalleeBits(GetCalleeBitsDef(), bb->GetId()); + if (c == 0) { + continue; + } + CalleeBitsType mask = 1; + for (int i=0; i<(sizeof(CalleeBitsType)<<3); i++) { + if (c & mask) { + MapleSet &liveIn = bb->GetLiveInRegNO(); + regno_t reg = ReverseRegBitMap(i); + if (oneAtaTime && oneAtaTimeReg != reg) { + mask <<= 1; + continue; + } + if (liveIn.find(reg) == liveIn.end()) { /* not livein */ + BB* bbDom = bb; /* start from current BB */ + bool done = false; + while (bbDom->GetLoop() != nullptr) { + bbDom = GetDomInfo()->GetDom(bbDom->GetId()); + if (CheckCriteria(bbDom, reg)) { + done = true; + break; + } + ASSERT(bbDom, "Can't find dominator for save location"); + } + if (done) { + mask <<= 1; + continue; + } + + /* Check if a dominator of bbDom was already a location to save */ + if (AlreadySavedInDominatorList(bbDom, reg)) { + mask <<= 1; + continue; /* no need to save again, next reg */ + } + + /* Check if the newly found block is a dominator of block(s) in the current + to be saved list. If so, remove these blocks from bbSavedRegs */ + uint32 creg = i; + SavedBBInfo *sp = regSavedBBs[creg]; + if (sp == nullptr) { + regSavedBBs[creg] = memPool->New(alloc); + } else { + bool found = false; + for (BB* sbb : sp->GetBBList()) { + for (BB* abb = sbb ; !abb->GetPreds().empty(); ) { + if (abb->GetId() == bbDom->GetId()) { + /* Found! Don't plan to save in abb */ + sp->RemoveBB(sbb); + bbSavedRegs[sbb->GetId()]->RemoveSaveReg(reg); + found = true; + mLog << " --R" << reg-1 << " save removed from BB" << sbb->GetId() << "\n"; + break; + } + abb = GetDomInfo()->GetDom(abb->GetId()); + } + } + } + regSavedBBs[creg]->InsertBB(bbDom); + + uint32 bid = bbDom->GetId(); + if (RS_DUMP) { + mLog << " --R" << reg - 1; + mLog << " to save in " << bid << "\n"; + } + SavedRegInfo *ctx = GetbbSavedRegsEntry(bid); + if (!ctx->ContainSaveReg(reg)) { + ctx->InsertSaveReg(reg); + } + } + } + mask <<= 1; + CalleeBitsType t = c; + t >>= 1; + if (t == 0) { + break; /* short cut */ + } + } + } +} + +void AArch64RegSavesOpt::DetermineCalleeSaveLocationsPre() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + MapleAllocator sprealloc(memPool); + if (RS_DUMP) { + mLog << "Determining regsave sites using ssa_pre for " << cgFunc->GetName() << ":\n"; + } + const MapleVector &callees = aarchCGFunc->GetCalleeSavedRegs(); + for (auto reg : callees) { + if (reg >= R29) { + continue; /* save/restore in prologue, epilogue */ + } + if (oneAtaTime && oneAtaTimeReg != reg) { + continue; + } + + SsaPreWorkCand wkCand(&sprealloc); + for (int bid = 1; bid < bbSavedRegs.size(); bid++) { + /* Set the BB occurrences of this callee-saved register */ + if (IsCalleeBitSet(GetCalleeBitsDef(), bid, reg) || + IsCalleeBitSet(GetCalleeBitsUse(), bid, reg)) { + wkCand.occBBs.insert(bid); + } + } + DoSavePlacementOpt(cgFunc, GetDomInfo(), &wkCand); + if (wkCand.saveAtEntryBBs.empty()) { + /* something gone wrong, skip this reg */ + wkCand.saveAtProlog = true; + } + if (wkCand.saveAtProlog) { + /* Save cannot be applied, skip this reg and place save/restore + in prolog/epilog */ + MapleVector &pe = aarchCGFunc->GetProEpilogSavedRegs(); + if (std::find(pe.begin(), pe.end(), reg) == pe.end()) { + pe.push_back(reg); + } + if (RS_DUMP) { + mLog << "Save R" << reg - 1 << " n/a, do in Pro/Epilog\n"; + } + continue; + } + if (!wkCand.saveAtEntryBBs.empty()) { + for (uint32 entBB : wkCand.saveAtEntryBBs) { + if (RS_DUMP) { + std::string r = reg <= R28 ? "r" : "v"; + mLog << "BB " << entBB << " save: " << r << reg - 1 << "\n"; + } + GetbbSavedRegsEntry(entBB)->InsertSaveReg(reg); + } + } + } +} + +/* Determine calleesave regs restore locations by calling ssu-pre, + previous bbSavedRegs memory is cleared and restore locs recorded in it */ +void AArch64RegSavesOpt::DetermineCalleeRestoreLocations() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + MapleAllocator sprealloc(memPool); + if (RS_DUMP) { + mLog << "Determining Callee Restore Locations:\n"; + } + const MapleVector &callees = aarchCGFunc->GetCalleeSavedRegs(); + for (auto reg : callees) { + if (reg >= R29) { + continue; /* save/restore in prologue, epilogue */ + } + if (oneAtaTime && oneAtaTimeReg != reg) { + MapleVector &pe = aarchCGFunc->GetProEpilogSavedRegs(); + if (std::find(pe.begin(), pe.end(), reg) == pe.end()) { + pe.push_back(reg); + } + continue; + } + + SPreWorkCand wkCand(&sprealloc); + for (int bid = 1; bid < bbSavedRegs.size(); bid++) { + /* Set the saved BB locations of this callee-saved register */ + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr) { + if (sp->ContainSaveReg(reg)) { + wkCand.saveBBs.insert(bid); + } + } + /* Set the BB occurrences of this callee-saved register */ + if (IsCalleeBitSet(GetCalleeBitsDef(), bid, reg) || + IsCalleeBitSet(GetCalleeBitsUse(), bid, reg)) { + wkCand.occBBs.insert(bid); + } + } + DoRestorePlacementOpt(cgFunc, GetPostDomInfo(), &wkCand); + if (wkCand.saveBBs.empty()) { + /* something gone wrong, skip this reg */ + wkCand.restoreAtEpilog = true; + } + if (wkCand.restoreAtEpilog) { + /* Restore cannot be applied, skip this reg and place save/restore + in prolog/epilog */ + for (int bid = 1; bid < bbSavedRegs.size(); bid++) { + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr && !sp->GetSaveSet().empty()) { + if (sp->ContainSaveReg(reg)) { + sp->RemoveSaveReg(reg); + } + } + } + MapleVector &pe = aarchCGFunc->GetProEpilogSavedRegs(); + if (std::find(pe.begin(), pe.end(), reg) == pe.end()) { + pe.push_back(reg); + } + if (RS_DUMP) { + mLog << "Restore R" << reg - 1 << " n/a, do in Pro/Epilog\n"; + } + continue; + } + if (!wkCand.restoreAtEntryBBs.empty() || !wkCand.restoreAtExitBBs.empty()) { + for (uint32 entBB : wkCand.restoreAtEntryBBs) { + if (RS_DUMP) { + std::string r = reg <= R28 ? "r" : "v"; + mLog << "BB " << entBB << " restore: " << r << reg - 1 << "\n"; + } + GetbbSavedRegsEntry(entBB)->InsertEntryReg(reg); + } + for (uint32 exitBB : wkCand.restoreAtExitBBs) { + for (BB *bb : bfs->sortedBBs) { + if (bb->GetId() == exitBB) { + if (bb->GetKind() == BB::kBBIgoto) { + CHECK_FATAL(false, "igoto detected"); + } + Insn *lastInsn = bb->GetLastInsn(); + if (lastInsn != nullptr && lastInsn->IsBranch() && + (!lastInsn->GetOperand(0).IsRegister() || /* not a reg OR */ + (!AArch64Abi::IsCalleeSavedReg( /* reg but not cs */ + static_cast(static_cast( + lastInsn->GetOperand(0)).GetRegisterNumber()))))) { + /* To insert in this block - 1 instr */ + SavedRegInfo *sp = GetbbSavedRegsEntry(exitBB); + sp->InsertExitReg(reg); + sp->insertAtLastMinusOne = true; + } else if (bb->GetSuccs().size() > 1) { + for (BB *sbb : bb->GetSuccs()) { + if (sbb->GetPreds().size() > 1) { + CHECK_FATAL(false, "critical edge detected"); + } + } + for (BB *sbb : bb->GetSuccs()) { + /* To insert at all succs */ + GetbbSavedRegsEntry(sbb->GetId())->InsertEntryReg(reg); + } + } else { + /* otherwise, BB_FT etc */ + GetbbSavedRegsEntry(exitBB)->InsertExitReg(reg); + } + } + } + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + mLog << "BB " << exitBB << " restore: " << r << reg - 1 << "\n"; + } + } + } + } +} + +int32 AArch64RegSavesOpt::FindNextOffsetForCalleeSave() { + int32 offset = + static_cast(cgFunc->GetMemlayout())-> + RealStackFrameSize() - + (static_cast(cgFunc)->SizeOfCalleeSaved() - + (kDivide2 * kIntregBytelen) /* FP/LR */) - + cgFunc->GetMemlayout()->SizeOfArgsToStackPass(); + + if (cgFunc->GetFunction().GetAttr(FUNCATTR_varargs)) { + /* GR/VR save areas are above the callee save area */ + AArch64MemLayout *ml = static_cast(cgFunc->GetMemlayout()); + int saveareasize = RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + + RoundUp(ml->GetSizeOfVRSaveArea(), kSizeOfPtr * k2BitSize); + offset -= saveareasize; + } + return offset; +} + +void AArch64RegSavesOpt::InsertCalleeSaveCode() { + int bid = 0; + BB *saveBB = cgFunc->GetCurBB(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + if (RS_DUMP) { + mLog << "Inserting Save: \n"; + } + int32 offset = FindNextOffsetForCalleeSave(); + offset += (aarchCGFunc->GetProEpilogSavedRegs().size() - 2) << 3; // 2 for R29,RLR 3 for 8 bytes + for (BB *bb : bfs->sortedBBs) { + bid = bb->GetId(); + aarchCGFunc->SetSplitBaseOffset(0); + if (bbSavedRegs[bid] != nullptr && !bbSavedRegs[bid]->GetSaveSet().empty()) { + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + AArch64reg intRegFirstHalf = kRinvalid; + AArch64reg fpRegFirstHalf = kRinvalid; + for (auto areg : bbSavedRegs[bid]->GetSaveSet()) { + AArch64reg reg = static_cast(areg); + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) + ? intRegFirstHalf : fpRegFirstHalf; + std::string r = reg <= R28 ? "R" : "V"; + /* If reg not seen before, record offset and then update */ + if (regOffset.find(areg) == regOffset.end()) { + regOffset[areg] = offset; + offset += kIntregBytelen; + } + if (firstHalf == kRinvalid) { + /* 1st half in reg pair */ + firstHalf = reg; + if (RS_DUMP) { + mLog << r << reg-1 << " save in BB" << bid << " Offset = " << regOffset[reg]<< "\n"; + } + } else { + if (regOffset[reg] == (regOffset[firstHalf] + k8ByteSize)) { + /* firstHalf & reg consecutive, make regpair */ + AArch64GenProEpilog::AppendInstructionPushPair(*cgFunc, firstHalf, reg, regType, regOffset[firstHalf]); + } else if (regOffset[firstHalf] == (regOffset[reg] + k8ByteSize)) { + /* reg & firstHalf consecutive, make regpair */ + AArch64GenProEpilog::AppendInstructionPushPair(*cgFunc, reg, firstHalf, regType, regOffset[reg]); + } else { + /* regs cannot be paired */ + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, firstHalf, regType, regOffset[firstHalf]); + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, reg, regType, regOffset[reg]); + } + firstHalf = kRinvalid; + if (RS_DUMP) { + mLog << r << reg-1 << " save in BB" << bid << " Offset = " << regOffset[reg]<< "\n"; + } + } + } + + if (intRegFirstHalf != kRinvalid) { + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, intRegFirstHalf, kRegTyInt, regOffset[intRegFirstHalf]); + } + + if(fpRegFirstHalf != kRinvalid) { + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, fpRegFirstHalf, kRegTyFloat, regOffset[fpRegFirstHalf]); + } + bb->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + } + } + cgFunc->SetCurBB(*saveBB); +} + +/* DFS to verify the save/restore are in pair(s) within a path */ +void AArch64RegSavesOpt::Verify(regno_t reg, BB *bb, std::set *visited, BBId *s, BBId *r) { + visited->insert(bb); + BBId bid = bb->GetId(); + if (RS_EXTRA) { + mLog << bid << ","; /* path trace can be long */ + } + + if (bbSavedRegs[bid]) { + bool entryRestoreMet = false; + if (bbSavedRegs[bid]->ContainEntryReg(reg)) { + if (RS_EXTRA) { + mLog << "[^" << bid << "],"; // entry restore found + } + if (!*s) { + mLog << "Alert: nR@" << bid << " found w/o save\n"; + return; + } + /* complete s/xR found, continue */ + mLog << "(" << *s << "," << bid << ") "; + *r = bid; + entryRestoreMet = true; + } + if (bbSavedRegs[bid]->ContainSaveReg(reg)) { + if (RS_EXTRA) { + mLog << "[" << bid << "],"; // save found + } + if (*s && !entryRestoreMet) { + /* another save found before last save restored */ + mLog << "Alert: save@" << bid << " found after save@" << *s << "\n"; + return; + } + if (entryRestoreMet) { + *r = 0; + } + *s = bid; + } + if (bbSavedRegs[bid]->ContainExitReg(reg)) { + if (RS_EXTRA) { + mLog << "[" << bid << "$],"; // exit restore found + } + if (!*s) { + mLog << "Alert: xR@" << bid << " found w/o save\n"; + return; + } + /* complete s/xR found, continue */ + mLog << "(" << *s << "," << bid << ") "; + *r = bid; + } + } + + if (bb->GetSuccs().size() == 0) { + if (*s && !*r) { + mLog << "Alert: save@" << *s << " w/o restore reaches end"; + } + mLog << " " << bid << " ended>\n"; + *r = 0; + } + for (BB *sBB : bb->GetSuccs()) { + if (visited->count(sBB) == 0) { + Verify(reg, sBB, visited, s, r); + } + } + if (*s == bid) { + /* clear only when returned from previous calls to the orig save site */ + /* clear savebid since all of its succs already visited */ + *s = 0; + } + if (*r == bid) { + /* clear restorebid if all of its preds already visited */ + bool clear = true; + for (BB *pBB : bb->GetPreds()) { + if (visited->count(pBB) == 0) { + clear = false; + break; + } + } + if (clear) { + *r = 0; + } + } +} + +void AArch64RegSavesOpt::InsertCalleeRestoreCode() { + int bid = 0; + BB *saveBB = cgFunc->GetCurBB(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + if (RS_DUMP) { + mLog << "Inserting Restore: \n"; + } + int32 offset = FindNextOffsetForCalleeSave(); + for (BB *bb : bfs->sortedBBs) { + bid = bb->GetId(); + aarchCGFunc->SetSplitBaseOffset(0); + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr) { + if (sp->GetEntrySet().empty() && sp->GetExitSet().empty()) { + continue; + } + + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + for (auto areg : sp->GetEntrySet()) { + AArch64reg reg = static_cast(areg); + offset = regOffset[areg]; + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + mLog << r << reg-1 << " entry restore in BB " << bid << " Saved Offset = " << offset << "\n"; + if (RS_EXTRA) { + mLog << " for save @BB [ "; + for (int b = 1; b < bbSavedRegs.size(); b++) { + if (bbSavedRegs[b] != nullptr && + bbSavedRegs[b]->ContainSaveReg(reg)) { + mLog << b << " "; + } + } + mLog << "]\n"; + } + } + + /* restore is always the same from saved offset */ + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64GenProEpilog::AppendInstructionPopSingle(*cgFunc, reg, regType, offset); + } + FOR_BB_INSNS(insn, aarchCGFunc->GetDummyBB()) { + insn->SetDoNotRemove(true); /* do not let ebo remove these restores */ + } + bb->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + for (auto areg : sp->GetExitSet()) { + AArch64reg reg = static_cast(areg); + offset = regOffset[areg]; + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + mLog << r << reg-1 << " exit restore in BB " << bid << " Offset = " << offset << "\n"; + mLog << " for save @BB [ "; + for (int b = 1; b < bbSavedRegs.size(); b++) { + if (bbSavedRegs[b] != nullptr && + bbSavedRegs[b]->ContainSaveReg(reg)) { + mLog << b << " "; + } + } + mLog << "]\n"; + } + + /* restore is always single from saved offset */ + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64GenProEpilog::AppendInstructionPopSingle(*cgFunc, reg, regType, offset); + } + FOR_BB_INSNS(insn, aarchCGFunc->GetDummyBB()) { + insn->SetDoNotRemove(true); + } + if (sp->insertAtLastMinusOne) { + bb->InsertAtEndMinus1(*aarchCGFunc->GetDummyBB()); + } else { + bb->InsertAtEnd(*aarchCGFunc->GetDummyBB()); + } + } + } + cgFunc->SetCurBB(*saveBB); +} + +/* Callee-save registers save/restore placement optimization */ +void AArch64RegSavesOpt::Run() { + // DotGenerator::GenerateDot("SR", *cgFunc, cgFunc->GetMirModule(), true, cgFunc->GetName()); + if (Globals::GetInstance()->GetOptimLevel() <= 1) { + return; + } + +#if ONE_REG_AT_A_TIME + /* only do reg placement on the following register, others in pro/epilog */ + oneAtaTime = true; + oneAtaTimeReg = R25; +#endif + + Bfs localBfs(*cgFunc, *memPool); + bfs = &localBfs; + bfs->ComputeBlockOrder(); + if (RS_DUMP) { + mLog << "##Calleeregs Placement for: " << cgFunc->GetName() << "\n"; + PrintBBs(); + } + +#ifdef REDUCE_COMPLEXITY + CGOptions::EnableRegSavesOpt(); + for (auto bb : bfs->sortedBBs) { + if (bb->GetSuccs().size() > threshold) { + CGOptions::DisableRegSavesOpt(); + return; + } + } +#endif + + /* Determined 1st def and last use of all callee-saved registers used + for all BBs */ + InitData(); + GetLocalDefUse(); + + /* Determine save sites at dominators of 1st def with no live-in and + not within loop */ + if (CGOptions::UseSsaPreSave()) { + DetermineCalleeSaveLocationsPre(); + } else { + DetermineCalleeSaveLocationsDoms(); + } + + /* Determine restore sites */ + DetermineCalleeRestoreLocations(); + + /* Verify saves/restores are in pair */ + if (RS_DUMP) { + std::vector rlist = { R19, R20, R21, R22, R23, R24, R25, R26, R27, R28 }; + for (auto reg: rlist) { + mLog << "Verify calleeregs_placement data for R" << reg-1 << ":\n"; + std::set visited; + uint32 saveBid = 0; + uint32 restoreBid = 0; + Verify(reg, cgFunc->GetFirstBB(), &visited, &saveBid, &restoreBid); + mLog << "\nVerify Done\n"; + } + } + + /* Generate callee save instrs at found sites */ + InsertCalleeSaveCode(); + + /* Generate callee restores at found sites */ + InsertCalleeRestoreCode(); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/cg_option.cpp b/src/mapleall/maple_be/src/cg/cg_option.cpp index 439499cb500a185f6ae5c77cb70588a70f5a766d..2570dc33c2b5ba1b4c037a70d2873e0e4eb0fd01 100644 --- a/src/mapleall/maple_be/src/cg/cg_option.cpp +++ b/src/mapleall/maple_be/src/cg/cg_option.cpp @@ -102,6 +102,9 @@ bool CGOptions::inRange = false; bool CGOptions::doPreLSRAOpt = false; bool CGOptions::doLocalRefSpill = false; bool CGOptions::doCalleeToSpill = false; +bool CGOptions::doRegSavesOpt = false; +bool CGOptions::useSsaPreSave = false; +bool CGOptions::useSsuPreRestore = false; bool CGOptions::replaceASM = false; bool CGOptions::generalRegOnly = false; bool CGOptions::fastMath = false; @@ -202,6 +205,9 @@ enum OptionIndex : uint64 { kFastMath, kTailCall, kAlignAnalysis, + kRegSaves, + kSsaPreSave, + kSsuPreRestore, kArm64ilp32, }; @@ -395,6 +401,36 @@ const Descriptor kUsage[] = { " --no-lsra-optcallee\n", "mplcg", {} }, + { kRegSaves, + kEnable, + "", + "calleeregs-placement", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --calleeregs-placement \tOptimize placement of callee-save registers\n" + " --no-calleeregs-placement\n", + "mplcg", + {} }, + { kSsaPreSave, + kEnable, + "", + "ssapre-save", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --ssapre-save \tUse ssapre algorithm to save callee-save registers\n" + " --no-ssapre-save\n", + "mplcg", + {} }, + { kSsuPreRestore, + kEnable, + "", + "ssupre-restore", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --ssupre-restore \tUse ssupre algorithm to restore callee-save registers\n" + " --no-ssupre-restore\n", + "mplcg", + {} }, { kPrepeep, kEnable, "", @@ -1199,7 +1235,7 @@ bool CGOptions::SolveOptions(const std::deque