diff --git a/build/tools/common/maplec b/build/tools/common/maplec index 673500285d33bc9876a82d75892952c0f00f6028..2dafb78e4a00f9693ee1441f08f4765ea3eae087 100755 --- a/build/tools/common/maplec +++ b/build/tools/common/maplec @@ -41,7 +41,7 @@ GCC=$LINARO/bin/aarch64-linux-gnu-gcc ISYSTEM_FLAGS="-isystem $LINARO/aarch64-linux-gnu/libc/usr/include -isystem $LINARO/lib/gcc/aarch64-linux-gnu/7.5.0/include" # maple options -O2="--O2 --quiet:--O2 --quiet:--O2 --quiet --verbose-asm --verbose-cg" +O2="--O2 --quiet:--O2 --quiet:--O2 --quiet --calleeregs-placement" O0="--O0 --quiet --verbose-asm --verbose-cg" # whole cmd of maplec.sh diff --git a/src/mapleall/maple_be/BUILD.gn b/src/mapleall/maple_be/BUILD.gn index 22594cdb66254c2c620a156b0504f3c483c150cc..cc900396ee7b18312ce4c3f6c13deeb1ddf9c763 100644 --- a/src/mapleall/maple_be/BUILD.gn +++ b/src/mapleall/maple_be/BUILD.gn @@ -108,6 +108,7 @@ src_libcgaarch64 = [ "src/cg/aarch64/aarch64_lsra.cpp", "src/cg/aarch64/aarch64_ra_opt.cpp", "src/cg/aarch64/aarch64_alignment.cpp", + "src/cg/aarch64/aarch64_regsaves.cpp", ] src_libcgriscv64 = [ @@ -185,6 +186,7 @@ src_libcg = [ "src/cg/reg_coalesce.cpp", "src/cg/alignment.cpp", "src/cg/cg_ssu_pre.cpp", + "src/cg/regsaves.cpp", ] cflags_cc -= [ "-DRC_NO_MMAP" ] diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h index 70d5d1614e3338f1d1883deeb25833ab1da8d8e0..a7acada3f57f732d39886c4485744618991da993 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_cgfunc.h @@ -30,6 +30,7 @@ class AArch64CGFunc : public CGFunc { MemPool &memPool, StackMemPool &stackMp, MapleAllocator &mallocator, uint32 funcId) : CGFunc(mod, c, f, b, memPool, stackMp, mallocator, funcId), calleeSavedRegs(mallocator.Adapter()), + proEpilogSavedRegs(mallocator.Adapter()), formalRegList(mallocator.Adapter()), phyRegOperandTable(mallocator.Adapter()), hashLabelOpndTable(mallocator.Adapter()), @@ -622,6 +623,10 @@ class AArch64CGFunc : public CGFunc { MIRPreg *GetPseudoRegFromVirtualRegNO(const regno_t vRegNO, bool afterSSA = false) const; + MapleVector &GetProEpilogSavedRegs() { + return proEpilogSavedRegs; + } + private: enum RelationOperator : uint8 { kAND, @@ -644,6 +649,7 @@ class AArch64CGFunc : public CGFunc { using MovkLslOperandArray = std::array; MapleVector calleeSavedRegs; + MapleVector proEpilogSavedRegs; MapleVector formalRegList; /* store the parameters register used by this function */ uint32 refCount = 0; /* Ref count number. 0 if function don't have "bl MCC_InitializeLocalStackRef" */ int32 beginOffset = 0; /* Begin offset based x29. */ diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h index ac9cb2b9bb00cee3e69e6f6f8ca04aa729920e43..da40333bddaafd56a8baed1604e9e6f6026bc1f4 100644 --- a/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_proepilog.h @@ -36,6 +36,12 @@ class AArch64GenProEpilog : public GenProEpilog { bool TailCallOpt() override; bool NeedProEpilog() override; + static AArch64MemOperand *SplitStpLdpOffsetForCalleeSavedWithAddInstruction( + CGFunc &cgFunc, const AArch64MemOperand &mo, uint32 bitLen, AArch64reg baseReg = AArch64reg::kRinvalid); + static void AppendInstructionPushPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); + static void AppendInstructionPushSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int offset); + static void AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int offset); + static void AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); void Run() override; private: void GenStackGuard(BB&); @@ -50,10 +56,6 @@ class AArch64GenProEpilog : public GenProEpilog { bool BackwardFindDependency(BB &ifbb, std::set &vecReturnSourceReg, std::list &existingInsns, std::list &moveInsns); BB *IsolateFastPath(BB&); - AArch64MemOperand *SplitStpLdpOffsetForCalleeSavedWithAddInstruction(const AArch64MemOperand &mo, uint32 bitLen, - AArch64reg baseReg = AArch64reg::kRinvalid); - void AppendInstructionPushPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); - void AppendInstructionPushSingle(AArch64reg reg, RegType rty, int offset); void AppendInstructionAllocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty); void AppendInstructionAllocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty); void GeneratePushRegs(); @@ -63,8 +65,6 @@ class AArch64GenProEpilog : public GenProEpilog { void GenerateRet(BB &bb); bool TestPredsOfRetBB(const BB &exitBB); - void AppendInstructionPopSingle(AArch64reg reg, RegType rty, int offset); - void AppendInstructionPopPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int offset); void AppendInstructionDeallocateCallFrame(AArch64reg reg0, AArch64reg reg1, RegType rty); void AppendInstructionDeallocateCallFrameDebug(AArch64reg reg0, AArch64reg reg1, RegType rty); void GeneratePopRegs(); diff --git a/src/mapleall/maple_be/include/cg/aarch64/aarch64_regsaves.h b/src/mapleall/maple_be/include/cg/aarch64/aarch64_regsaves.h new file mode 100644 index 0000000000000000000000000000000000000000..6ffc4b6327b0b476a36e0481e652866920fe2c2c --- /dev/null +++ b/src/mapleall/maple_be/include/cg/aarch64/aarch64_regsaves.h @@ -0,0 +1,200 @@ +/* + * Copyright (c) [2022] Futurewei Technologies Co., Ltd. All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan Permissive Software License v2. + * You can use this software according to the terms and conditions of the MulanPSL - 2.0. + * You may obtain a copy of MulanPSL - 2.0 at: + * + * https://opensource.org/licenses/MulanPSL-2.0 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the MulanPSL - 2.0 for more details. + */ + +#ifndef MAPLEBE_INCLUDE_CG_AARCH64REGSAVESOPT_H +#define MAPLEBE_INCLUDE_CG_AARCH64REGSAVESOPT_H + +#include "cg.h" +#include "regsaves.h" +#include "aarch64_cg.h" +#include "aarch64_insn.h" +#include "aarch64_operand.h" + +namespace maplebe { + +/* Saved reg info. This class is created to avoid the complexity of + nested Maple Containers */ +class SavedRegInfo { + public: + explicit SavedRegInfo(MapleAllocator &alloc) + : saveSet(alloc.Adapter()), + restoreEntrySet(alloc.Adapter()), + restoreExitSet(alloc.Adapter()) {} + + bool ContainSaveReg(regno_t r) { + if (saveSet.find(r) != saveSet.end()) { + return true; + } + return false; + } + + bool ContainEntryReg(regno_t r) { + if (restoreEntrySet.find(r) != restoreEntrySet.end()) { + return true; + } + return false; + } + + bool ContainExitReg(regno_t r) { + if (restoreExitSet.find(r) != restoreExitSet.end()) { + return true; + } + return false; + } + + void InsertSaveReg(regno_t r) { + saveSet.insert(r); + } + + void RemoveSaveReg(regno_t r) { + saveSet.erase(r); + } + + void InsertEntryReg(regno_t r) { + restoreEntrySet.insert(r); + } + + void InsertExitReg(regno_t r) { + restoreExitSet.insert(r); + } + + MapleSet &GetSaveSet() { + return saveSet; + } + + MapleSet &GetEntrySet() { + return restoreEntrySet; + } + + MapleSet &GetExitSet() { + return restoreExitSet; + } + + private: + MapleSet saveSet; + MapleSet restoreEntrySet; + MapleSet restoreExitSet; +}; + +class AArch64RegSavesOpt : public RegSavesOpt { + public: + AArch64RegSavesOpt(CGFunc &func, MemPool &pool, DomAnalysis &dom, PostDomAnalysis &pdom) : + RegSavesOpt(func, pool), + domInfo(&dom), + pDomInfo(&pdom), + bbSavedRegs(alloc.Adapter()), + regOffset(alloc.Adapter()) { + bbSavedRegs.resize(func.NumBBs()); + for (int i = 0; i < bbSavedRegs.size(); i++) { + bbSavedRegs[i] = nullptr; + } + } + ~AArch64RegSavesOpt() override = default; + + typedef uint64 CalleeBitsType; + + void InitData(); + void CollectLiveInfo(BB &bb, const Operand &opnd, bool isDef, bool isUse); + void GenerateReturnBBDefUse(BB &bb); + void ProcessCallInsnParam(BB &bb); + void ProcessAsmListOpnd(BB &bb, Operand &opnd, uint32 idx); + void ProcessListOpnd(BB &bb, Operand &opnd); + void ProcessMemOpnd(BB &bb, Operand &opnd); + void ProcessCondOpnd(BB &bb); + void GetLocalDefUse(); + void PrintBBs() const; + bool CheckCriteria(BB *bb, regno_t reg) const; + bool AlreadySavedInDominatorList(BB *bb, regno_t reg) const; + void DetermineCalleeSaveLocations(); + void DetermineCalleeRestoreLocations(); + int32 FindNextOffsetForCalleeSave(); + void InsertCalleeSaveCode(); + void InsertCalleeRestoreCode(); + void Run() override; + + DomAnalysis *GetDomInfo() const { + return domInfo; + } + + PostDomAnalysis *GetPostDomInfo() const { + return pDomInfo; + } + + Bfs *GetBfs() const { + return bfs; + } + + CalleeBitsType *GetCalleeBitsDef() { + return calleeBitsDef; + } + + CalleeBitsType *GetCalleeBitsUse() { + return calleeBitsUse; + } + + CalleeBitsType GetBBCalleeBits(CalleeBitsType *data, uint32 bid) const { + return data[bid]; + } + + void SetCalleeBit(CalleeBitsType *data, uint32 bid, regno_t reg) { + CalleeBitsType mask = 1ULL << RegBitMap(reg); + if ((GetBBCalleeBits(data, bid) & mask) == 0) { + data[bid] = GetBBCalleeBits(data, bid) | mask; + } + } + + void ResetCalleeBit(CalleeBitsType * data, uint32 bid, regno_t reg) { + CalleeBitsType mask = 1ULL << RegBitMap(reg); + data[bid] = GetBBCalleeBits(data, bid) & ~mask; + } + + bool IsCalleeBitSet(CalleeBitsType * data, uint32 bid, regno_t reg) { + CalleeBitsType mask = 1ULL << RegBitMap(reg); + return GetBBCalleeBits(data, bid) & mask; + } + + /* AArch64 specific callee-save registers bit positions + 0 9 10 33 -- position + R19 .. R28 V8 .. V15 V16 .. V31 -- regs */ + uint32 RegBitMap(regno_t reg) { + uint32 r; + if (reg <= R28) { + r = (reg - R19); + } else { + r = (R28 - R19 + 1) + (reg - V8); + } + return r; + } + + regno_t ReverseRegBitMap(uint32 reg) { + if (reg < 10) { + return static_cast(R19 + reg); + } else { + return static_cast(V8 + (reg - R28 - R19 - 1)); + } + } + + private: + DomAnalysis *domInfo; + PostDomAnalysis *pDomInfo; + Bfs *bfs = nullptr; + CalleeBitsType *calleeBitsDef = nullptr; + CalleeBitsType *calleeBitsUse = nullptr; + MapleVector bbSavedRegs; /* each bb may have a set of regs */ + MapleMap regOffset; /* save offset of each register */ +}; +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_AARCH64REGSAVESOPT_H */ diff --git a/src/mapleall/maple_be/include/cg/cg_option.h b/src/mapleall/maple_be/include/cg/cg_option.h index ddf04a69f732ba7c790f938ef5654dffc8c5f666..67736d35493d69b122408c5edf2f4aeb5b884c5a 100644 --- a/src/mapleall/maple_be/include/cg/cg_option.h +++ b/src/mapleall/maple_be/include/cg/cg_option.h @@ -641,6 +641,18 @@ class CGOptions : public MapleDriverOptionBase { return doCFGO; } + static void EnableRegSavesOpt() { + doRegSavesOpt = true; + } + + static void DisableRegSavesOpt() { + doRegSavesOpt = false; + } + + static bool DoRegSavesOpt() { + return doRegSavesOpt; + } + static void EnableICO() { doICO = true; } @@ -1196,6 +1208,7 @@ class CGOptions : public MapleDriverOptionBase { static bool doSchedule; static bool doAlignAnalysis; static bool doWriteRefFieldOpt; + static bool doRegSavesOpt; static bool dumpOptimizeCommonLog; static bool checkArrayStore; static bool exclusiveEH; diff --git a/src/mapleall/maple_be/include/cg/cgbb.h b/src/mapleall/maple_be/include/cg/cgbb.h index 2088907db34375da7defed3477ada5f58d8cf359..870c8cbddc2f6cd07e3d2f587c2f9392dd435d37 100644 --- a/src/mapleall/maple_be/include/cg/cgbb.h +++ b/src/mapleall/maple_be/include/cg/cgbb.h @@ -199,6 +199,8 @@ class BB { /* append all insns from bb into this bb */ void InsertAtBeginning(BB &bb); + void InsertAtEnd(BB &bb); + void InsertAtEndMinus1(BB &bb); /* clear BB but don't remove insns of this */ void ClearInsns() { diff --git a/src/mapleall/maple_be/include/cg/regsaves.h b/src/mapleall/maple_be/include/cg/regsaves.h new file mode 100644 index 0000000000000000000000000000000000000000..0722425ba58553a49c32639d6c2bd1518a404915 --- /dev/null +++ b/src/mapleall/maple_be/include/cg/regsaves.h @@ -0,0 +1,55 @@ +/* + * Copyright (c) [2022] Futurewei Technologies Co., Ltd. All rights reserved. + * + * OpenArkCompiler is licensed under the Mulan Permissive Software License v2. + * You can use this software according to the terms and conditions of the MulanPSL - 2.0. + * You may obtain a copy of MulanPSL - 2.0 at: + * + * https://opensource.org/licenses/MulanPSL-2.0 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the MulanPSL - 2.0 for more details. + */ +#ifndef MAPLEBE_INCLUDE_CG_REGSAVES_OPT_H +#define MAPLEBE_INCLUDE_CG_REGSAVES_OPT_H + +#include "cgfunc.h" +#include "cg_phase.h" + +namespace maplebe { +class RegSavesOpt { + public: + RegSavesOpt(CGFunc &func, MemPool &pool) + : cgFunc(&func), + memPool(&pool), + alloc(&pool) {} + + virtual ~RegSavesOpt() = default; + + virtual void Run() {} + + std::string PhaseName() const { + return "regsavesopt"; + } + + CGFunc *GetCGFunc() const { + return cgFunc; + } + + MemPool *GetMemPool() const { + return memPool; + } + + protected: + CGFunc *cgFunc; + MemPool *memPool; + MapleAllocator alloc; +}; + +MAPLE_FUNC_PHASE_DECLARE_BEGIN(CgRegSavesOpt, maplebe::CGFunc) +MAPLE_FUNC_PHASE_DECLARE_END +} /* namespace maplebe */ + +#endif /* MAPLEBE_INCLUDE_CG_REGSAVES_OPT_H */ diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp index 53aa88cd4ed79b6b2b1de55ee3d9ae6aec345774..6dd8d1474f4d98b1c1eee142e469d62f770f2fe4 100644 --- a/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_proepilog.cpp @@ -293,7 +293,9 @@ bool AArch64GenProEpilog::NeedProEpilog() { } } auto &aarchCGFunc = static_cast(cgFunc); - const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToRestore = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); size_t calleeSavedRegSize = kTwoRegister; CHECK_FATAL(regsToRestore.size() >= calleeSavedRegSize, "Forgot FP and LR ?"); if (funcHasCalls || regsToRestore.size() > calleeSavedRegSize || aarchCGFunc.HasStackLoadStore() || @@ -822,9 +824,10 @@ BB *AArch64GenProEpilog::IsolateFastPath(BB &bb) { return coldBB; } -AArch64MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(const AArch64MemOperand &mo, - uint32 bitLen, - AArch64reg baseRegNum) { +AArch64MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddInstruction(CGFunc &cgFunc, + const AArch64MemOperand &mo, + uint32 bitLen, + AArch64reg baseRegNum) { auto &aarchCGFunc = static_cast(cgFunc); CHECK_FATAL(mo.GetAddrMode() == AArch64MemOperand::kAddrModeBOi, "mode should be kAddrModeBOi"); AArch64OfstOperand *ofstOp = mo.GetOffsetImmediate(); @@ -846,7 +849,8 @@ AArch64MemOperand *AArch64GenProEpilog::SplitStpLdpOffsetForCalleeSavedWithAddIn return &aarchCGFunc.CreateReplacementMemOperand(bitLen, br, offsetVal); } -void AArch64GenProEpilog::AppendInstructionPushPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPushPair(CGFunc &cgFunc, + AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopPair]; @@ -857,7 +861,7 @@ void AArch64GenProEpilog::AppendInstructionPushPair(AArch64reg reg0, AArch64reg uint32 dataSize = kSizeOfPtr * kBitsPerByte; CHECK_FATAL(offset >= 0, "offset must >= 0"); if (offset > kStpLdpImm64UpperBound) { - o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(*static_cast(o2), dataSize, R16); + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast(o2), dataSize, R16); } Insn &pushInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); std::string comment = "SAVE CALLEE REGISTER PAIR"; @@ -875,7 +879,8 @@ void AArch64GenProEpilog::AppendInstructionPushPair(AArch64reg reg0, AArch64reg } } -void AArch64GenProEpilog::AppendInstructionPushSingle(AArch64reg reg, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPushSingle(CGFunc &cgFunc, + AArch64reg reg, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPushOp][rty][kPushPopSingle]; @@ -1154,7 +1159,9 @@ void AArch64GenProEpilog::AppendInstructionAllocateCallFrameDebug(AArch64reg reg void AArch64GenProEpilog::GeneratePushRegs() { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); - const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToSave = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); CHECK_FATAL(!regsToSave.empty(), "FP/LR not added to callee-saved list?"); @@ -1232,19 +1239,19 @@ void AArch64GenProEpilog::GeneratePushRegs() { /* remember it */ firstHalf = reg; } else { - AppendInstructionPushPair(firstHalf, reg, regType, offset); + AppendInstructionPushPair(cgFunc, firstHalf, reg, regType, offset); GetNextOffsetCalleeSaved(offset); firstHalf = kRinvalid; } } if (intRegFirstHalf != kRinvalid) { - AppendInstructionPushSingle(intRegFirstHalf, kRegTyInt, offset); + AppendInstructionPushSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset); GetNextOffsetCalleeSaved(offset); } if (fpRegFirstHalf != kRinvalid) { - AppendInstructionPushSingle(fpRegFirstHalf, kRegTyFloat, offset); + AppendInstructionPushSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset); GetNextOffsetCalleeSaved(offset); } @@ -1354,7 +1361,9 @@ void AArch64GenProEpilog::GenerateProlog(BB &bb) { } } - const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToSave = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); if (!regsToSave.empty()) { /* * Among other things, push the FP & LR pair. @@ -1448,7 +1457,7 @@ bool AArch64GenProEpilog::TestPredsOfRetBB(const BB &exitBB) { return true; } -void AArch64GenProEpilog::AppendInstructionPopSingle(AArch64reg reg, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPopSingle(CGFunc &cgFunc, AArch64reg reg, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopSingle]; @@ -1470,7 +1479,7 @@ void AArch64GenProEpilog::AppendInstructionPopSingle(AArch64reg reg, RegType rty } } -void AArch64GenProEpilog::AppendInstructionPopPair(AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { +void AArch64GenProEpilog::AppendInstructionPopPair(CGFunc &cgFunc, AArch64reg reg0, AArch64reg reg1, RegType rty, int32 offset) { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); MOperator mOp = pushPopOps[kRegsPopOp][rty][kPushPopPair]; @@ -1481,7 +1490,7 @@ void AArch64GenProEpilog::AppendInstructionPopPair(AArch64reg reg0, AArch64reg r uint32 dataSize = kSizeOfPtr * kBitsPerByte; CHECK_FATAL(offset >= 0, "offset must >= 0"); if (offset > kStpLdpImm64UpperBound) { - o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(*static_cast(o2), dataSize, R16); + o2 = SplitStpLdpOffsetForCalleeSavedWithAddInstruction(cgFunc, *static_cast(o2), dataSize, R16); } Insn &popInsn = currCG->BuildInstruction(mOp, o0, o1, *o2); popInsn.SetComment("RESTORE RESTORE"); @@ -1626,7 +1635,10 @@ void AArch64GenProEpilog::AppendInstructionDeallocateCallFrameDebug(AArch64reg r void AArch64GenProEpilog::GeneratePopRegs() { auto &aarchCGFunc = static_cast(cgFunc); CG *currCG = cgFunc.GetCG(); - const MapleVector ®sToRestore = aarchCGFunc.GetCalleeSavedRegs(); + + const MapleVector ®sToRestore = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); CHECK_FATAL(!regsToRestore.empty(), "FP/LR not added to callee-saved list?"); @@ -1678,19 +1690,19 @@ void AArch64GenProEpilog::GeneratePopRegs() { firstHalf = reg; } else { /* flush the pair */ - AppendInstructionPopPair(firstHalf, reg, regType, offset); + AppendInstructionPopPair(cgFunc, firstHalf, reg, regType, offset); GetNextOffsetCalleeSaved(offset); firstHalf = kRinvalid; } } if (intRegFirstHalf != kRinvalid) { - AppendInstructionPopSingle(intRegFirstHalf, kRegTyInt, offset); + AppendInstructionPopSingle(cgFunc, intRegFirstHalf, kRegTyInt, offset); GetNextOffsetCalleeSaved(offset); } if (fpRegFirstHalf != kRinvalid) { - AppendInstructionPopSingle(fpRegFirstHalf, kRegTyFloat, offset); + AppendInstructionPopSingle(cgFunc, fpRegFirstHalf, kRegTyFloat, offset); GetNextOffsetCalleeSaved(offset); } @@ -1761,7 +1773,9 @@ void AArch64GenProEpilog::GenerateEpilog(BB &bb) { } } - const MapleVector ®sToSave = aarchCGFunc.GetCalleeSavedRegs(); + const MapleVector ®sToSave = + !CGOptions::DoRegSavesOpt() ? + aarchCGFunc.GetCalleeSavedRegs() : aarchCGFunc.GetProEpilogSavedRegs(); if (!regsToSave.empty()) { GeneratePopRegs(); } else { diff --git a/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp new file mode 100644 index 0000000000000000000000000000000000000000..bbc3f299063e3ca577b1346d69198542c3b6895e --- /dev/null +++ b/src/mapleall/maple_be/src/cg/aarch64/aarch64_regsaves.cpp @@ -0,0 +1,569 @@ +/* + * Copyright (c) [2022] Futurewei Technologies Co.,Ltd.All rights reserved. + * + * OpenArkCompiler is licensed under Mulan PSL v2. + * You can use this software according to the terms and conditions of the Mulan PSL v2. + * You may obtain a copy of Mulan PSL v2 at: + * + * http://license.coscl.org.cn/MulanPSL2 + * + * THIS SOFTWARE IS PROVIDED ON AN "AS IS" BASIS, WITHOUT WARRANTIES OF ANY KIND, EITHER + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO NON-INFRINGEMENT, MERCHANTABILITY OR + * FIT FOR A PARTICULAR PURPOSE. + * See the Mulan PSL v2 for more details. + */ +#include "aarch64_regsaves.h" +#include "aarch64_cg.h" +#include "aarch64_live.h" +#include "aarch64_cg.h" +#include "aarch64_proepilog.h" +#include "cg_dominance.h" +#include "cg_ssu_pre.h" + +namespace maplebe { + +#define RS_DUMP CG_DEBUG_FUNC(*cgFunc) + +void AArch64RegSavesOpt::InitData() { + calleeBitsDef = cgFunc->GetMemoryPool()->NewArray(cgFunc->NumBBs()); + errno_t retDef = memset_s(calleeBitsDef, cgFunc->NumBBs() * sizeof(CalleeBitsType), + 0, cgFunc->NumBBs() * sizeof(CalleeBitsType)); + calleeBitsUse = cgFunc->GetMemoryPool()->NewArray(cgFunc->NumBBs()); + errno_t retUse = memset_s(calleeBitsUse, cgFunc->NumBBs() * sizeof(CalleeBitsType), + 0, cgFunc->NumBBs() * sizeof(CalleeBitsType)); + CHECK_FATAL(retDef == EOK && retUse == EOK, "memset_s of calleesBits failed"); + + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + const MapleVector &sp = aarchCGFunc->GetCalleeSavedRegs(); + if (!sp.empty()) { + if (std::find(sp.begin(), sp.end(), RFP) != sp.end()) { + aarchCGFunc->GetProEpilogSavedRegs().push_back(RFP); + } + if (std::find(sp.begin(), sp.end(), RLR) != sp.end()) { + aarchCGFunc->GetProEpilogSavedRegs().push_back(RLR); + } + } +} + + +void AArch64RegSavesOpt::CollectLiveInfo(BB &bb, const Operand &opnd, bool isDef, bool isUse) { + if (!opnd.IsRegister()) { + return; + } + const RegOperand ®Opnd = static_cast(opnd); + regno_t regNO = regOpnd.GetRegisterNumber(); + if (!AArch64Abi::IsCalleeSavedReg(static_cast(regNO)) || + (regNO >= R29 && regNO <= R31)) { + return; /* check only callee-save registers */ + } + RegType regType = regOpnd.GetRegisterType(); + if (regType == kRegTyVary) { + return; + } + if (isDef) { + /* First def */ + if (!IsCalleeBitSet(GetCalleeBitsDef(), bb.GetId(), regNO)) { + SetCalleeBit(GetCalleeBitsDef(), bb.GetId(), regNO); + } + } + if (isUse) { + /* Last use */ + SetCalleeBit(GetCalleeBitsUse(), bb.GetId(), regNO); + } +} + +void AArch64RegSavesOpt::GenerateReturnBBDefUse(BB &bb) { + PrimType returnType = cgFunc->GetFunction().GetReturnType()->GetPrimType(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + if (IsPrimitiveFloat(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(V0), k64BitSize, kRegTyFloat); + CollectLiveInfo(bb, phyOpnd, false, true); + } else if (IsPrimitiveInteger(returnType)) { + Operand &phyOpnd = + aarchCGFunc->GetOrCreatePhysicalRegisterOperand(static_cast(R0), k64BitSize, kRegTyInt); + CollectLiveInfo(bb, phyOpnd, false, true); + } +} + +void AArch64RegSavesOpt::ProcessAsmListOpnd(BB &bb, Operand &opnd, uint32 idx) { + bool isDef = false; + bool isUse = false; + switch (idx) { + case kAsmOutputListOpnd: + case kAsmClobberListOpnd: { + isDef = true; + break; + } + case kAsmInputListOpnd: { + isUse = true; + break; + } + default: + return; + } + ListOperand &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + CollectLiveInfo(bb, *op, isDef, isUse); + } +} + +void AArch64RegSavesOpt::ProcessListOpnd(BB &bb, Operand &opnd) { + ListOperand &listOpnd = static_cast(opnd); + for (auto op : listOpnd.GetOperands()) { + CollectLiveInfo(bb, *op, false, true); + } +} + +void AArch64RegSavesOpt::ProcessMemOpnd(BB &bb, Operand &opnd) { + auto &memOpnd = static_cast(opnd); + Operand *base = memOpnd.GetBaseRegister(); + Operand *offset = memOpnd.GetIndexRegister(); + if (base != nullptr) { + CollectLiveInfo(bb, *base, !memOpnd.IsIntactIndexed(), true); + } + if (offset != nullptr) { + CollectLiveInfo(bb, *offset, false, true); + } +} + +void AArch64RegSavesOpt::ProcessCondOpnd(BB &bb) { + Operand &rflag = cgFunc->GetOrCreateRflag(); + CollectLiveInfo(bb, rflag, false, true); +} + +/* Record in each local BB the 1st def and the last use of a callee-saved + register */ +void AArch64RegSavesOpt::GetLocalDefUse() { + for (auto bbp : bfs->sortedBBs) { + BB &bb = *bbp; + if (bb.GetKind() == BB::kBBReturn) { + GenerateReturnBBDefUse(bb); + } + if (bb.IsEmpty()) { + continue; + } + + FOR_BB_INSNS(insn, &bb) { + if (!insn->IsMachineInstruction()) { + continue; + } + + bool isAsm = (insn->GetMachineOpcode() == MOP_asm); + const AArch64MD *md = &AArch64CG::kMd[static_cast(insn)->GetMachineOpcode()]; + uint32 opndNum = insn->GetOperandSize(); + for (uint32 i = 0; i < opndNum; ++i) { + Operand &opnd = insn->GetOperand(i); + AArch64OpndProp *regProp = static_cast(md->operand[i]); + bool isDef = regProp->IsRegDef(); + bool isUse = regProp->IsRegUse(); + if (opnd.IsList()) { + if (isAsm) { + ProcessAsmListOpnd(bb, opnd, i); + } else { + ProcessListOpnd(bb, opnd); + } + } else if (opnd.IsMemoryAccessOperand()) { + ProcessMemOpnd(bb, opnd); + } else if (opnd.IsConditionCode()) { + ProcessCondOpnd(bb); + } else { + CollectLiveInfo(bb, opnd, isDef, isUse); + } + } /* for all operands */ + } /* for all insns */ + } /* for all sortedBBs */ + + if (RS_DUMP) { + for (int i=0; iNumBBs(); i++) { + LogInfo::MapleLogger() << i << " : " << calleeBitsDef[i] << " " << calleeBitsUse[i] << "\n";; + } + } +} + +void AArch64RegSavesOpt::PrintBBs() const { + LogInfo::MapleLogger() << "RegSaves LiveIn/Out of BFS nodes:\n"; + for (auto *bb : bfs->sortedBBs) { + LogInfo::MapleLogger() << "\n< === > "; + LogInfo::MapleLogger() << bb->GetId(); + LogInfo::MapleLogger() << " succs:"; + for (auto *succBB : bb->GetSuccs()) { + LogInfo::MapleLogger() << " " << succBB->GetId(); + } + LogInfo::MapleLogger() << "\n LiveIn of [" << bb->GetId() << "]: "; + for (auto liveIn: bb->GetLiveInRegNO()) { + LogInfo::MapleLogger() << liveIn << " "; + } + LogInfo::MapleLogger() << "\n LiveOut of [" << bb->GetId() << "] "; + for (auto liveOut: bb->GetLiveOutRegNO()) { + LogInfo::MapleLogger() << liveOut << " "; + } + } + LogInfo::MapleLogger() << "\n"; +} + +/* 1st def MUST not have preceding save in dominator list. Each dominator + block must not have livein or liveout of the register */ +bool AArch64RegSavesOpt::CheckCriteria(BB *bb, regno_t reg) const { + /* Already a site to save */ + SavedRegInfo *sp = bbSavedRegs[bb->GetId()]; + if (sp != nullptr && sp->ContainSaveReg(reg)) { + return true; + } + + /* This preceding block has livein OR liveout of reg */ + MapleSet &liveIn = bb->GetLiveInRegNO(); + MapleSet &liveOut = bb->GetLiveOutRegNO(); + if (liveIn.find(reg) != liveIn.end() || + liveOut.find(reg) != liveOut.end()) { + return true; + } + + return false; +} + +/* Return true if reg is already to be saved in its dominator list */ +bool AArch64RegSavesOpt::AlreadySavedInDominatorList(BB *bb, regno_t reg) const { + BB *aBB = GetDomInfo()->GetDom(bb->GetId()); + + while (!aBB->GetPreds().empty()) { /* can't go beyond prolog */ + if (CheckCriteria(aBB, reg)) { + return true; /* previously saved, inspect next reg */ + } + aBB = GetDomInfo()->GetDom(aBB->GetId()); + } + return false; /* not previously saved, to save at bb */ +} + +/* Determine callee-save regs save locations and record them in bbSavedRegs. + Save is needed for a 1st def callee-save register at its dominator block + outside any loop. */ +void AArch64RegSavesOpt::DetermineCalleeSaveLocations() { + for (auto *bb : bfs->sortedBBs) { + if (RS_DUMP) { + LogInfo::MapleLogger() << "BB: " << bb->GetId() << "\n"; + } + CalleeBitsType c = GetBBCalleeBits(GetCalleeBitsDef(), bb->GetId()); + CalleeBitsType mask = 1; + if (c == 0) { + continue; + } + for (int i=0; i<(sizeof(CalleeBitsType)<<3); i++) { + if (c & mask) { + MapleSet &liveIn = bb->GetLiveInRegNO(); + regno_t reg = ReverseRegBitMap(i); + if (liveIn.find(reg) == liveIn.end()) { /* not livein */ + BB* bbDom = bb; /* start from current BB */ + bool done = false; + while (bbDom->GetLoop() != nullptr) { + bbDom = GetDomInfo()->GetDom(bbDom->GetId()); + if (CheckCriteria(bbDom, reg)) { + done = true; + break; + } + ASSERT(bbDom, "Can't find dominator for save location"); + } + if (done) { + mask <<= 1; + continue; + } + /* Check if a dominator of ddDom was already a location to save */ + if (AlreadySavedInDominatorList(bbDom, reg)) { + mask <<= 1; + continue; /* no need to save again, next reg */ + } + uint32 bid = bbDom->GetId(); + if (RS_DUMP) { + LogInfo::MapleLogger() << "R" << reg - 1; + LogInfo::MapleLogger() << " dominated by BB" << bid << "\n"; + } + SavedRegInfo *ctx = bbSavedRegs[bid]; + if (ctx == nullptr) { + ctx = memPool->New(alloc); + bbSavedRegs[bid] = ctx; + } + if (!bbSavedRegs[bid]->ContainSaveReg(reg)) { + bbSavedRegs[bid]->InsertSaveReg(reg); + } + } + } + mask <<= 1; + CalleeBitsType t = c; + if ((t >> 1) == 0) { + break; /* short cut */ + } + } + } +} + +/* Determine calleesave regs restore locations by calling ssu-pre, + previous bbSavedRegs memory is cleared and restore locs recorded in it */ +void AArch64RegSavesOpt::DetermineCalleeRestoreLocations() { + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + MapleAllocator sprealloc(memPool); + const MapleVector &callees = static_cast(cgFunc)->GetCalleeSavedRegs(); + for (auto reg : callees) { + if (reg >= R29) { + continue; /* save/restore in prologue, epilogue */ + } + SPreWorkCand wkCand(&sprealloc); + for (int bid = 1; bid < bbSavedRegs.size(); bid++) { + /* Set the saved BB locations of this callee-saved register */ + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr) { + if (sp->ContainSaveReg(reg)) { + wkCand.saveBBs.insert(bid); + } + } + /* Set the BB occurrences of this callee-saved register */ + if (IsCalleeBitSet(GetCalleeBitsDef(), bid, reg) || + IsCalleeBitSet(GetCalleeBitsUse(), bid, reg)) { + wkCand.occBBs.insert(bid); + } + } + DoRestorePlacementOpt(cgFunc, GetPostDomInfo(), &wkCand); + if (wkCand.restoreAtEpilog) { + /* Restore cannot be applied, skip this reg and place save/restore + in prolog/epilog */ + for (int bid = 1; bid < bbSavedRegs.size(); bid++) { + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr && !sp->GetSaveSet().empty()) { + if (sp->ContainSaveReg(reg)) { + sp->RemoveSaveReg(reg); + } + } + } + aarchCGFunc->GetProEpilogSavedRegs().push_back(reg); + if (RS_DUMP) { + LogInfo::MapleLogger() << "Restore R" << reg - 1 << " n/a, do in Pro/Epilog\n"; + } + continue; + } + if (!wkCand.restoreAtEntryBBs.empty() || !wkCand.restoreAtExitBBs.empty()) { + for (uint32 entBB : wkCand.restoreAtEntryBBs) { + if (RS_DUMP) { + std::string r = reg <= R28 ? "r" : "v"; + LogInfo::MapleLogger() << "BB " << entBB << " restore: " << r << reg - 1 << "\n"; + } + if (bbSavedRegs[entBB] == nullptr) { + bbSavedRegs[entBB] = memPool->New(alloc); + } + bbSavedRegs[entBB]->InsertEntryReg(reg); + } + bool done = false; + for (uint32 exitBB : wkCand.restoreAtExitBBs) { + for (BB *bb : bfs->sortedBBs) { + if (bb->GetId() == exitBB) { + if (bb->GetKind() == BB::kBBIgoto) { + CHECK_FATAL(false, "igoto detected"); + } else if (bb->GetSuccs().size() > 1) { + for (BB *sbb : bb->GetSuccs()) { + if (sbb->GetPreds().size() > 1) { + CHECK_FATAL(false, "critical edge detected"); + } + } + for (BB *sbb : bb->GetSuccs()) { + if (bbSavedRegs[sbb->GetId()] == nullptr) { + bbSavedRegs[sbb->GetId()] = memPool->New(alloc); + } + bbSavedRegs[sbb->GetId()]->InsertEntryReg(reg); /* insert at both succs */ + } + done = true; + break; /* break out of the sortedBBs loop */ + } + } + } + if (!done) { + if (bbSavedRegs[exitBB] == nullptr) { + bbSavedRegs[exitBB] = memPool->New(alloc); + } + bbSavedRegs[exitBB]->InsertExitReg(reg); + } + } + } + } +} + +int32 AArch64RegSavesOpt::FindNextOffsetForCalleeSave() { + int32 offset = + static_cast(cgFunc->GetMemlayout())-> + RealStackFrameSize() - + (static_cast(cgFunc)->SizeOfCalleeSaved() - + (kDivide2 * kIntregBytelen) /* FP/LR */) - + cgFunc->GetMemlayout()->SizeOfArgsToStackPass(); + + if (cgFunc->GetFunction().GetAttr(FUNCATTR_varargs)) { + /* GR/VR save areas are above the callee save area */ + AArch64MemLayout *ml = static_cast(cgFunc->GetMemlayout()); + int saveareasize = RoundUp(ml->GetSizeOfGRSaveArea(), kSizeOfPtr * k2BitSize) + + RoundUp(ml->GetSizeOfVRSaveArea(), kSizeOfPtr * k2BitSize); + offset -= saveareasize; + } + return offset; +} + +void AArch64RegSavesOpt::InsertCalleeSaveCode() { + int bid = 0; + BB *saveBB = cgFunc->GetCurBB(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + int32 offset = FindNextOffsetForCalleeSave(); + offset += (aarchCGFunc->GetProEpilogSavedRegs().size() - 2) << 3; // 2 for R29,RLR 3 for 8 bytes + for (BB *bb : bfs->sortedBBs) { + bid = bb->GetId(); + aarchCGFunc->SetSplitBaseOffset(0); + if (bbSavedRegs[bid] != nullptr && !bbSavedRegs[bid]->GetSaveSet().empty()) { + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + AArch64reg intRegFirstHalf = kRinvalid; + AArch64reg fpRegFirstHalf = kRinvalid; + for (auto areg : bbSavedRegs[bid]->GetSaveSet()) { + AArch64reg reg = static_cast(areg); + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64reg &firstHalf = AArch64isa::IsGPRegister(reg) + ? intRegFirstHalf : fpRegFirstHalf; + std::string r = reg <= R28 ? "R" : "V"; + /* If reg not seen before, record offset and then update */ + if (regOffset.find(areg) == regOffset.end()) { + regOffset[areg] = offset; + offset += kIntregBytelen; + } + if (firstHalf == kRinvalid) { + /* 1st half in reg pair */ + firstHalf = reg; + if (RS_DUMP) { + LogInfo::MapleLogger() << r << reg-1 << " save in BB" << bid << "Offset = " << regOffset[reg]<< "\n"; + } + } else { + if (regOffset[reg] == (regOffset[firstHalf] + k8ByteSize)) { + /* firstHalf & reg consecutive, make regpair */ + AArch64GenProEpilog::AppendInstructionPushPair(*cgFunc, firstHalf, reg, regType, regOffset[firstHalf]); + } else if (regOffset[firstHalf] == (regOffset[reg] + k8ByteSize)) { + /* reg & firstHalf consecutive, make regpair */ + AArch64GenProEpilog::AppendInstructionPushPair(*cgFunc, reg, firstHalf, regType, regOffset[reg]); + } else { + /* regs cannot be paired */ + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, firstHalf, regType, regOffset[firstHalf]); + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, reg, regType, regOffset[reg]); + } + firstHalf = kRinvalid; + if (RS_DUMP) { + LogInfo::MapleLogger() << r << reg-1 << " save in BB" << bid << "Offset = " << regOffset[reg]<< "\n"; + } + } + } + + if (intRegFirstHalf != kRinvalid) { + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, intRegFirstHalf, kRegTyInt, regOffset[intRegFirstHalf]); + } + + if(fpRegFirstHalf != kRinvalid) { + AArch64GenProEpilog::AppendInstructionPushSingle(*cgFunc, fpRegFirstHalf, kRegTyFloat, regOffset[fpRegFirstHalf]); + } + bb->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + } + } + cgFunc->SetCurBB(*saveBB); +} + +void AArch64RegSavesOpt::InsertCalleeRestoreCode() { + int bid = 0; + BB *saveBB = cgFunc->GetCurBB(); + AArch64CGFunc *aarchCGFunc = static_cast(cgFunc); + + int32 offset = FindNextOffsetForCalleeSave(); + for (BB *bb : bfs->sortedBBs) { + bid = bb->GetId(); + aarchCGFunc->SetSplitBaseOffset(0); + SavedRegInfo *sp = bbSavedRegs[bid]; + if (sp != nullptr) { + if (sp->GetEntrySet().empty() && sp->GetExitSet().empty()) { + continue; + } + + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + for (auto areg : sp->GetEntrySet()) { + AArch64reg reg = static_cast(areg); + offset = regOffset[areg]; + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + LogInfo::MapleLogger() << r << reg-1 << " entry restore in BB " << bid << "\n"; + LogInfo::MapleLogger() << "Saved Offset = " << offset << "\n"; + } + + /* restore is always single from saved offset */ + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64GenProEpilog::AppendInstructionPopSingle(*cgFunc, reg, regType, offset); + } + FOR_BB_INSNS(insn, aarchCGFunc->GetDummyBB()) { + insn->SetDoNotRemove(true); /* do not let ebo remove these restores */ + } + bb->InsertAtBeginning(*aarchCGFunc->GetDummyBB()); + + aarchCGFunc->GetDummyBB()->ClearInsns(); + cgFunc->SetCurBB(*aarchCGFunc->GetDummyBB()); + for (auto areg : sp->GetExitSet()) { + AArch64reg reg = static_cast(areg); + offset = regOffset[areg]; + if (RS_DUMP) { + std::string r = reg <= R28 ? "R" : "V"; + LogInfo::MapleLogger() << r << reg-1 << " exit restore in BB " << bid << "\n"; + LogInfo::MapleLogger() << "Saved Offset = " << offset << "\n"; + } + + /* restore is always single from saved offset */ + RegType regType = AArch64isa::IsGPRegister(reg) + ? kRegTyInt : kRegTyFloat; + AArch64GenProEpilog::AppendInstructionPopSingle(*cgFunc, reg, regType, offset); + } + FOR_BB_INSNS(insn, aarchCGFunc->GetDummyBB()) { + insn->SetDoNotRemove(true); + } + if ((bb->GetKind() == BB::kBBIf && bb->GetSuccs().size() <= 1) || + bb->GetKind() == BB::kBBGoto) { + bb->InsertAtEndMinus1(*aarchCGFunc->GetDummyBB()); + } else { + bb->InsertAtEnd(*aarchCGFunc->GetDummyBB()); + } + } + } + cgFunc->SetCurBB(*saveBB); +} + +/* Callee-save registers save/restore placement optimization */ +void AArch64RegSavesOpt::Run() { + // DotGenerator::GenerateDot("SR", *cgFunc, cgFunc->GetMirModule(), true, cgFunc->GetName()); + if (Globals::GetInstance()->GetOptimLevel() <= 1) { + return; + } + + Bfs localBfs(*cgFunc, *memPool); + bfs = &localBfs; + bfs->ComputeBlockOrder(); + if (RS_DUMP) { + LogInfo::MapleLogger() << "##Calleeregs Placement for: " << cgFunc->GetName() << "\n"; + PrintBBs(); + } + + /* Determined 1st def and last use of all callee-saved registers used + for all BBs */ + InitData(); + GetLocalDefUse(); + + /* Determine save sites at dominators of 1st def with no live-in and + not within loop */ + DetermineCalleeSaveLocations(); + + /* Determine restore sites */ + DetermineCalleeRestoreLocations(); + + /* Generate callee save instrs at found sites */ + InsertCalleeSaveCode(); + + /* Generate callee restores at found sites */ + InsertCalleeRestoreCode(); +} +} /* namespace maplebe */ diff --git a/src/mapleall/maple_be/src/cg/cg_option.cpp b/src/mapleall/maple_be/src/cg/cg_option.cpp index 52f7b21d7aafd9462b546dd8454586a890d7e741..824511185a2c12b25783b21848cd47d2c3ab83c4 100644 --- a/src/mapleall/maple_be/src/cg/cg_option.cpp +++ b/src/mapleall/maple_be/src/cg/cg_option.cpp @@ -102,6 +102,7 @@ bool CGOptions::inRange = false; bool CGOptions::doPreLSRAOpt = false; bool CGOptions::doLocalRefSpill = false; bool CGOptions::doCalleeToSpill = false; +bool CGOptions::doRegSavesOpt = false; bool CGOptions::replaceASM = false; bool CGOptions::generalRegOnly = false; bool CGOptions::fastMath = false; @@ -202,6 +203,7 @@ enum OptionIndex : uint64 { kFastMath, kTailCall, kAlignAnalysis, + kRegSaves, kArm64ilp32, }; @@ -395,6 +397,16 @@ const Descriptor kUsage[] = { " --no-lsra-optcallee\n", "mplcg", {} }, + { kRegSaves, + kEnable, + "", + "calleeregs-placement", + kBuildTypeExperimental, + kArgCheckPolicyBool, + " --calleeregs-placement \tOptimize placement of callee-save registers\n" + " --no-calleeregs-placement\n", + "mplcg", + {} }, { kPrepeep, kEnable, "", @@ -1423,6 +1435,9 @@ bool CGOptions::SolveOptions(const std::deque